From 3c161384cc215ea2acdf62046b13841e28119b5d Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 4 Mar 2023 17:49:48 +0100 Subject: [PATCH 001/337] cos x GTOs integ added --- src/ao_basis/aos_in_r.irp.f | 26 +- src/ao_one_e_ints/NEED | 1 + src/ao_one_e_ints/ao_overlap.irp.f | 200 ++- src/ao_one_e_ints/kin_ao_ints.irp.f | 206 ++- src/ao_one_e_ints/pot_ao_erf_ints.irp.f | 531 +++--- src/ao_one_e_ints/pot_ao_ints.irp.f | 179 +- src/ao_two_e_ints/EZFIO.cfg | 7 - src/ao_two_e_ints/two_e_integrals.irp.f | 195 +- src/cosgtos_ao_int/EZFIO.cfg | 19 + src/cosgtos_ao_int/README.rst | 4 + src/cosgtos_ao_int/aos_cosgtos.irp.f | 210 +++ src/cosgtos_ao_int/cosgtos_ao_int.irp.f | 7 + .../gauss_legendre.irp.f | 0 src/cosgtos_ao_int/one_e_Coul_integrals.irp.f | 535 ++++++ src/cosgtos_ao_int/one_e_kin_integrals.irp.f | 223 +++ src/cosgtos_ao_int/two_e_Coul_integrals.irp.f | 1584 +++++++++++++++++ src/utils/cgtos_one_e.irp.f | 120 ++ src/utils/cgtos_utils.irp.f | 780 ++++++++ src/utils/cpx_erf.irp.f | 204 +++ src/utils/integration.irp.f | 135 +- src/utils/one_e_integration.irp.f | 72 +- src/utils/util.irp.f | 27 + 22 files changed, 4591 insertions(+), 674 deletions(-) create mode 100644 src/cosgtos_ao_int/EZFIO.cfg create mode 100644 src/cosgtos_ao_int/README.rst create mode 100644 src/cosgtos_ao_int/aos_cosgtos.irp.f create mode 100644 src/cosgtos_ao_int/cosgtos_ao_int.irp.f rename src/{ao_two_e_ints => cosgtos_ao_int}/gauss_legendre.irp.f (100%) create mode 100644 src/cosgtos_ao_int/one_e_Coul_integrals.irp.f create mode 100644 src/cosgtos_ao_int/one_e_kin_integrals.irp.f create mode 100644 src/cosgtos_ao_int/two_e_Coul_integrals.irp.f create mode 100644 src/utils/cgtos_one_e.irp.f create mode 100644 src/utils/cgtos_utils.irp.f create mode 100644 src/utils/cpx_erf.irp.f diff --git a/src/ao_basis/aos_in_r.irp.f b/src/ao_basis/aos_in_r.irp.f index 902827eb..7fcb980a 100644 --- a/src/ao_basis/aos_in_r.irp.f +++ b/src/ao_basis/aos_in_r.irp.f @@ -12,21 +12,21 @@ double precision function ao_value(i,r) integer :: power_ao(3) double precision :: accu,dx,dy,dz,r2 num_ao = ao_nucl(i) -! power_ao(1:3)= ao_power(i,1:3) -! center_ao(1:3) = nucl_coord(num_ao,1:3) -! dx = (r(1) - center_ao(1)) -! dy = (r(2) - center_ao(2)) -! dz = (r(3) - center_ao(3)) -! r2 = dx*dx + dy*dy + dz*dz -! dx = dx**power_ao(1) -! dy = dy**power_ao(2) -! dz = dz**power_ao(3) + power_ao(1:3)= ao_power(i,1:3) + center_ao(1:3) = nucl_coord(num_ao,1:3) + dx = (r(1) - center_ao(1)) + dy = (r(2) - center_ao(2)) + dz = (r(3) - center_ao(3)) + r2 = dx*dx + dy*dy + dz*dz + dx = dx**power_ao(1) + dy = dy**power_ao(2) + dz = dz**power_ao(3) accu = 0.d0 -! do m=1,ao_prim_num(i) -! beta = ao_expo_ordered_transp(m,i) -! accu += ao_coef_normalized_ordered_transp(m,i) * dexp(-beta*r2) -! enddo + do m=1,ao_prim_num(i) + beta = ao_expo_ordered_transp(m,i) + accu += ao_coef_normalized_ordered_transp(m,i) * dexp(-beta*r2) + enddo ao_value = accu * dx * dy * dz end diff --git a/src/ao_one_e_ints/NEED b/src/ao_one_e_ints/NEED index 61d23b1e..b9caaf5d 100644 --- a/src/ao_one_e_ints/NEED +++ b/src/ao_one_e_ints/NEED @@ -1,2 +1,3 @@ ao_basis pseudo +cosgtos_ao_int diff --git a/src/ao_one_e_ints/ao_overlap.irp.f b/src/ao_one_e_ints/ao_overlap.irp.f index d9061d67..597eb32a 100644 --- a/src/ao_one_e_ints/ao_overlap.irp.f +++ b/src/ao_one_e_ints/ao_overlap.irp.f @@ -1,75 +1,99 @@ - BEGIN_PROVIDER [ double precision, ao_overlap,(ao_num,ao_num) ] -&BEGIN_PROVIDER [ double precision, ao_overlap_x,(ao_num,ao_num) ] -&BEGIN_PROVIDER [ double precision, ao_overlap_y,(ao_num,ao_num) ] -&BEGIN_PROVIDER [ double precision, ao_overlap_z,(ao_num,ao_num) ] - implicit none + +! --- + + BEGIN_PROVIDER [ double precision, ao_overlap , (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_overlap_x, (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_overlap_y, (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_overlap_z, (ao_num, ao_num) ] + BEGIN_DOC -! Overlap between atomic basis functions: -! -! :math:`\int \chi_i(r) \chi_j(r) dr` + ! Overlap between atomic basis functions: + ! + ! :math:`\int \chi_i(r) \chi_j(r) dr` END_DOC - integer :: i,j,n,l - double precision :: f - integer :: dim1 + + implicit none + integer :: i, j, n, l, dim1, power_A(3), power_B(3) double precision :: overlap, overlap_x, overlap_y, overlap_z double precision :: alpha, beta, c double precision :: A_center(3), B_center(3) - integer :: power_A(3), power_B(3) - ao_overlap = 0.d0 + + ao_overlap = 0.d0 ao_overlap_x = 0.d0 ao_overlap_y = 0.d0 ao_overlap_z = 0.d0 - if (read_ao_integrals_overlap) then - call ezfio_get_ao_one_e_ints_ao_integrals_overlap(ao_overlap(1:ao_num, 1:ao_num)) - print *, 'AO overlap integrals read from disk' + + if(read_ao_integrals_overlap) then + + call ezfio_get_ao_one_e_ints_ao_integrals_overlap(ao_overlap(1:ao_num, 1:ao_num)) + print *, 'AO overlap integrals read from disk' + else - dim1=100 - !$OMP PARALLEL DO SCHEDULE(GUIDED) & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(A_center,B_center,power_A,power_B,& - !$OMP overlap_x,overlap_y, overlap_z, overlap, & - !$OMP alpha, beta,i,j,c) & - !$OMP SHARED(nucl_coord,ao_power,ao_prim_num, & - !$OMP ao_overlap_x,ao_overlap_y,ao_overlap_z,ao_overlap,ao_num,ao_coef_normalized_ordered_transp,ao_nucl, & - !$OMP ao_expo_ordered_transp,dim1) - do j=1,ao_num - A_center(1) = nucl_coord( ao_nucl(j), 1 ) - A_center(2) = nucl_coord( ao_nucl(j), 2 ) - A_center(3) = nucl_coord( ao_nucl(j), 3 ) - power_A(1) = ao_power( j, 1 ) - power_A(2) = ao_power( j, 2 ) - power_A(3) = ao_power( j, 3 ) - do i= 1,ao_num - B_center(1) = nucl_coord( ao_nucl(i), 1 ) - B_center(2) = nucl_coord( ao_nucl(i), 2 ) - B_center(3) = nucl_coord( ao_nucl(i), 3 ) - power_B(1) = ao_power( i, 1 ) - power_B(2) = ao_power( i, 2 ) - power_B(3) = ao_power( i, 3 ) - do n = 1,ao_prim_num(j) - alpha = ao_expo_ordered_transp(n,j) - do l = 1, ao_prim_num(i) - beta = ao_expo_ordered_transp(l,i) - call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1) - c = ao_coef_normalized_ordered_transp(n,j) * ao_coef_normalized_ordered_transp(l,i) - ao_overlap(i,j) += c * overlap - if(isnan(ao_overlap(i,j)))then - print*,'i,j',i,j - print*,'l,n',l,n - print*,'c,overlap',c,overlap - print*,overlap_x,overlap_y,overlap_z - stop - endif - ao_overlap_x(i,j) += c * overlap_x - ao_overlap_y(i,j) += c * overlap_y - ao_overlap_z(i,j) += c * overlap_z + if(use_cosgtos) then + !print*, ' use_cosgtos for ao_overlap ?', use_cosgtos + + do j = 1, ao_num + do i = 1, ao_num + ao_overlap (i,j) = ao_overlap_cosgtos (i,j) + ao_overlap_x(i,j) = ao_overlap_cosgtos_x(i,j) + ao_overlap_y(i,j) = ao_overlap_cosgtos_y(i,j) + ao_overlap_z(i,j) = ao_overlap_cosgtos_z(i,j) + enddo + enddo + + else + + dim1=100 + !$OMP PARALLEL DO SCHEDULE(GUIDED) & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(A_center,B_center,power_A,power_B,& + !$OMP overlap_x,overlap_y, overlap_z, overlap, & + !$OMP alpha, beta,i,j,c) & + !$OMP SHARED(nucl_coord,ao_power,ao_prim_num, & + !$OMP ao_overlap_x,ao_overlap_y,ao_overlap_z,ao_overlap,ao_num,ao_coef_normalized_ordered_transp,ao_nucl, & + !$OMP ao_expo_ordered_transp,dim1) + do j=1,ao_num + A_center(1) = nucl_coord( ao_nucl(j), 1 ) + A_center(2) = nucl_coord( ao_nucl(j), 2 ) + A_center(3) = nucl_coord( ao_nucl(j), 3 ) + power_A(1) = ao_power( j, 1 ) + power_A(2) = ao_power( j, 2 ) + power_A(3) = ao_power( j, 3 ) + do i= 1,ao_num + B_center(1) = nucl_coord( ao_nucl(i), 1 ) + B_center(2) = nucl_coord( ao_nucl(i), 2 ) + B_center(3) = nucl_coord( ao_nucl(i), 3 ) + power_B(1) = ao_power( i, 1 ) + power_B(2) = ao_power( i, 2 ) + power_B(3) = ao_power( i, 3 ) + do n = 1,ao_prim_num(j) + alpha = ao_expo_ordered_transp(n,j) + do l = 1, ao_prim_num(i) + beta = ao_expo_ordered_transp(l,i) + call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1) + c = ao_coef_normalized_ordered_transp(n,j) * ao_coef_normalized_ordered_transp(l,i) + ao_overlap(i,j) += c * overlap + if(isnan(ao_overlap(i,j)))then + print*,'i,j',i,j + print*,'l,n',l,n + print*,'c,overlap',c,overlap + print*,overlap_x,overlap_y,overlap_z + stop + endif + ao_overlap_x(i,j) += c * overlap_x + ao_overlap_y(i,j) += c * overlap_y + ao_overlap_z(i,j) += c * overlap_z + enddo + enddo enddo enddo - enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO + + endif + endif + if (write_ao_integrals_overlap) then call ezfio_set_ao_one_e_ints_ao_integrals_overlap(ao_overlap(1:ao_num, 1:ao_num)) print *, 'AO overlap integrals written to disk' @@ -77,6 +101,8 @@ END_PROVIDER +! --- + BEGIN_PROVIDER [ double precision, ao_overlap_imag, (ao_num, ao_num) ] implicit none BEGIN_DOC @@ -85,6 +111,8 @@ BEGIN_PROVIDER [ double precision, ao_overlap_imag, (ao_num, ao_num) ] ao_overlap_imag = 0.d0 END_PROVIDER +! --- + BEGIN_PROVIDER [ complex*16, ao_overlap_complex, (ao_num, ao_num) ] implicit none BEGIN_DOC @@ -98,41 +126,43 @@ BEGIN_PROVIDER [ complex*16, ao_overlap_complex, (ao_num, ao_num) ] enddo END_PROVIDER +! --- +BEGIN_PROVIDER [ double precision, ao_overlap_abs, (ao_num, ao_num) ] - -BEGIN_PROVIDER [ double precision, ao_overlap_abs,(ao_num,ao_num) ] - implicit none BEGIN_DOC -! Overlap between absolute values of atomic basis functions: -! -! :math:`\int |\chi_i(r)| |\chi_j(r)| dr` + ! Overlap between absolute values of atomic basis functions: + ! + ! :math:`\int |\chi_i(r)| |\chi_j(r)| dr` END_DOC - integer :: i,j,n,l - double precision :: f - integer :: dim1 - double precision :: overlap, overlap_x, overlap_y, overlap_z + + implicit none + integer :: i, j, n, l, dim1, power_A(3), power_B(3) + double precision :: overlap_x, overlap_y, overlap_z double precision :: alpha, beta double precision :: A_center(3), B_center(3) - integer :: power_A(3), power_B(3) double precision :: lower_exp_val, dx - if (is_periodic) then - do j=1,ao_num - do i= 1,ao_num - ao_overlap_abs(i,j)= cdabs(ao_overlap_complex(i,j)) + + if(is_periodic) then + + do j = 1, ao_num + do i = 1, ao_num + ao_overlap_abs(i,j) = cdabs(ao_overlap_complex(i,j)) enddo enddo + else + dim1=100 lower_exp_val = 40.d0 - !$OMP PARALLEL DO SCHEDULE(GUIDED) & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(A_center,B_center,power_A,power_B, & - !$OMP overlap_x,overlap_y, overlap_z, overlap, & - !$OMP alpha, beta,i,j,dx) & - !$OMP SHARED(nucl_coord,ao_power,ao_prim_num, & - !$OMP ao_overlap_abs,ao_num,ao_coef_normalized_ordered_transp,ao_nucl,& - !$OMP ao_expo_ordered_transp,dim1,lower_exp_val) + !$OMP PARALLEL DO SCHEDULE(GUIDED) & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(A_center,B_center,power_A,power_B, & + !$OMP overlap_x,overlap_y, overlap_z, & + !$OMP alpha, beta,i,j,dx) & + !$OMP SHARED(nucl_coord,ao_power,ao_prim_num, & + !$OMP ao_overlap_abs,ao_num,ao_coef_normalized_ordered_transp,ao_nucl,& + !$OMP ao_expo_ordered_transp,dim1,lower_exp_val) do j=1,ao_num A_center(1) = nucl_coord( ao_nucl(j), 1 ) A_center(2) = nucl_coord( ao_nucl(j), 2 ) @@ -160,10 +190,14 @@ BEGIN_PROVIDER [ double precision, ao_overlap_abs,(ao_num,ao_num) ] enddo enddo enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO + endif + END_PROVIDER +! --- + BEGIN_PROVIDER [ double precision, S_inv,(ao_num,ao_num) ] implicit none BEGIN_DOC diff --git a/src/ao_one_e_ints/kin_ao_ints.irp.f b/src/ao_one_e_ints/kin_ao_ints.irp.f index 4f117deb..a5ee0670 100644 --- a/src/ao_one_e_ints/kin_ao_ints.irp.f +++ b/src/ao_one_e_ints/kin_ao_ints.irp.f @@ -1,7 +1,10 @@ - BEGIN_PROVIDER [ double precision, ao_deriv2_x,(ao_num,ao_num) ] -&BEGIN_PROVIDER [ double precision, ao_deriv2_y,(ao_num,ao_num) ] -&BEGIN_PROVIDER [ double precision, ao_deriv2_z,(ao_num,ao_num) ] - implicit none + +! --- + + BEGIN_PROVIDER [ double precision, ao_deriv2_x, (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_deriv2_y, (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_deriv2_z, (ao_num, ao_num) ] + BEGIN_DOC ! Second derivative matrix elements in the |AO| basis. ! @@ -11,114 +14,131 @@ ! \langle \chi_i(x,y,z) | \frac{\partial^2}{\partial x^2} |\chi_j (x,y,z) \rangle ! END_DOC - integer :: i,j,n,l - double precision :: f - integer :: dim1 + + implicit none + integer :: i, j, n, l, dim1, power_A(3), power_B(3) double precision :: overlap, overlap_y, overlap_z double precision :: overlap_x0, overlap_y0, overlap_z0 double precision :: alpha, beta, c double precision :: A_center(3), B_center(3) - integer :: power_A(3), power_B(3) double precision :: d_a_2,d_2 - dim1=100 - ! -- Dummy call to provide everything - A_center(:) = 0.d0 - B_center(:) = 1.d0 - alpha = 1.d0 - beta = .1d0 - power_A = 1 - power_B = 0 - call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,d_a_2,overlap_z,overlap,dim1) - ! -- + if(use_cosgtos) then + !print*, 'use_cosgtos for ao_kinetic_integrals ?', use_cosgtos - !$OMP PARALLEL DO SCHEDULE(GUIDED) & - !$OMP DEFAULT(NONE) & - !$OMP PRIVATE(A_center,B_center,power_A,power_B,& - !$OMP overlap_y, overlap_z, overlap, & - !$OMP alpha, beta,i,j,c,d_a_2,d_2,deriv_tmp, & - !$OMP overlap_x0,overlap_y0,overlap_z0) & - !$OMP SHARED(nucl_coord,ao_power,ao_prim_num, & - !$OMP ao_deriv2_x,ao_deriv2_y,ao_deriv2_z,ao_num,ao_coef_normalized_ordered_transp,ao_nucl, & - !$OMP ao_expo_ordered_transp,dim1) - do j=1,ao_num - A_center(1) = nucl_coord( ao_nucl(j), 1 ) - A_center(2) = nucl_coord( ao_nucl(j), 2 ) - A_center(3) = nucl_coord( ao_nucl(j), 3 ) - power_A(1) = ao_power( j, 1 ) - power_A(2) = ao_power( j, 2 ) - power_A(3) = ao_power( j, 3 ) - do i= 1,ao_num - ao_deriv2_x(i,j)= 0.d0 - ao_deriv2_y(i,j)= 0.d0 - ao_deriv2_z(i,j)= 0.d0 - B_center(1) = nucl_coord( ao_nucl(i), 1 ) - B_center(2) = nucl_coord( ao_nucl(i), 2 ) - B_center(3) = nucl_coord( ao_nucl(i), 3 ) - power_B(1) = ao_power( i, 1 ) - power_B(2) = ao_power( i, 2 ) - power_B(3) = ao_power( i, 3 ) - do n = 1,ao_prim_num(j) - alpha = ao_expo_ordered_transp(n,j) - do l = 1, ao_prim_num(i) - beta = ao_expo_ordered_transp(l,i) - call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_x0,overlap_y0,overlap_z0,overlap,dim1) - c = ao_coef_normalized_ordered_transp(n,j) * ao_coef_normalized_ordered_transp(l,i) + do j = 1, ao_num + do i = 1, ao_num + ao_deriv2_x(i,j) = ao_deriv2_cosgtos_x(i,j) + ao_deriv2_y(i,j) = ao_deriv2_cosgtos_y(i,j) + ao_deriv2_z(i,j) = ao_deriv2_cosgtos_z(i,j) + enddo + enddo - power_A(1) = power_A(1)-2 - if (power_A(1)>-1) then - call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,d_a_2,overlap_y,overlap_z,overlap,dim1) - else - d_a_2 = 0.d0 - endif - power_A(1) = power_A(1)+4 - call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,d_2,overlap_y,overlap_z,overlap,dim1) - power_A(1) = power_A(1)-2 + else - double precision :: deriv_tmp - deriv_tmp = (-2.d0 * alpha * (2.d0 * power_A(1) +1.d0) * overlap_x0 & - +power_A(1) * (power_A(1)-1.d0) * d_a_2 & - +4.d0 * alpha * alpha * d_2 )*overlap_y0*overlap_z0 + dim1=100 - ao_deriv2_x(i,j) += c*deriv_tmp - power_A(2) = power_A(2)-2 - if (power_A(2)>-1) then - call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,d_a_2,overlap_z,overlap,dim1) - else - d_a_2 = 0.d0 - endif - power_A(2) = power_A(2)+4 - call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,d_2,overlap_z,overlap,dim1) - power_A(2) = power_A(2)-2 + ! -- Dummy call to provide everything + A_center(:) = 0.d0 + B_center(:) = 1.d0 + alpha = 1.d0 + beta = .1d0 + power_A = 1 + power_B = 0 + call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,d_a_2,overlap_z,overlap,dim1) + ! -- - deriv_tmp = (-2.d0 * alpha * (2.d0 * power_A(2) +1.d0 ) * overlap_y0 & - +power_A(2) * (power_A(2)-1.d0) * d_a_2 & - +4.d0 * alpha * alpha * d_2 )*overlap_x0*overlap_z0 - ao_deriv2_y(i,j) += c*deriv_tmp + !$OMP PARALLEL DO SCHEDULE(GUIDED) & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(A_center,B_center,power_A,power_B,& + !$OMP overlap_y, overlap_z, overlap, & + !$OMP alpha, beta,i,j,c,d_a_2,d_2,deriv_tmp, & + !$OMP overlap_x0,overlap_y0,overlap_z0) & + !$OMP SHARED(nucl_coord,ao_power,ao_prim_num, & + !$OMP ao_deriv2_x,ao_deriv2_y,ao_deriv2_z,ao_num,ao_coef_normalized_ordered_transp,ao_nucl, & + !$OMP ao_expo_ordered_transp,dim1) + do j=1,ao_num + A_center(1) = nucl_coord( ao_nucl(j), 1 ) + A_center(2) = nucl_coord( ao_nucl(j), 2 ) + A_center(3) = nucl_coord( ao_nucl(j), 3 ) + power_A(1) = ao_power( j, 1 ) + power_A(2) = ao_power( j, 2 ) + power_A(3) = ao_power( j, 3 ) + do i= 1,ao_num + ao_deriv2_x(i,j)= 0.d0 + ao_deriv2_y(i,j)= 0.d0 + ao_deriv2_z(i,j)= 0.d0 + B_center(1) = nucl_coord( ao_nucl(i), 1 ) + B_center(2) = nucl_coord( ao_nucl(i), 2 ) + B_center(3) = nucl_coord( ao_nucl(i), 3 ) + power_B(1) = ao_power( i, 1 ) + power_B(2) = ao_power( i, 2 ) + power_B(3) = ao_power( i, 3 ) + do n = 1,ao_prim_num(j) + alpha = ao_expo_ordered_transp(n,j) + do l = 1, ao_prim_num(i) + beta = ao_expo_ordered_transp(l,i) + call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_x0,overlap_y0,overlap_z0,overlap,dim1) + c = ao_coef_normalized_ordered_transp(n,j) * ao_coef_normalized_ordered_transp(l,i) - power_A(3) = power_A(3)-2 - if (power_A(3)>-1) then - call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,overlap_z,d_a_2,overlap,dim1) - else - d_a_2 = 0.d0 - endif - power_A(3) = power_A(3)+4 - call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,overlap_z,d_2,overlap,dim1) - power_A(3) = power_A(3)-2 + power_A(1) = power_A(1)-2 + if (power_A(1)>-1) then + call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,d_a_2,overlap_y,overlap_z,overlap,dim1) + else + d_a_2 = 0.d0 + endif + power_A(1) = power_A(1)+4 + call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,d_2,overlap_y,overlap_z,overlap,dim1) + power_A(1) = power_A(1)-2 - deriv_tmp = (-2.d0 * alpha * (2.d0 * power_A(3) +1.d0 ) * overlap_z0 & - +power_A(3) * (power_A(3)-1.d0) * d_a_2 & - +4.d0 * alpha * alpha * d_2 )*overlap_x0*overlap_y0 - ao_deriv2_z(i,j) += c*deriv_tmp + double precision :: deriv_tmp + deriv_tmp = (-2.d0 * alpha * (2.d0 * power_A(1) +1.d0) * overlap_x0 & + +power_A(1) * (power_A(1)-1.d0) * d_a_2 & + +4.d0 * alpha * alpha * d_2 )*overlap_y0*overlap_z0 + ao_deriv2_x(i,j) += c*deriv_tmp + power_A(2) = power_A(2)-2 + if (power_A(2)>-1) then + call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,d_a_2,overlap_z,overlap,dim1) + else + d_a_2 = 0.d0 + endif + power_A(2) = power_A(2)+4 + call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,d_2,overlap_z,overlap,dim1) + power_A(2) = power_A(2)-2 + + deriv_tmp = (-2.d0 * alpha * (2.d0 * power_A(2) +1.d0 ) * overlap_y0 & + +power_A(2) * (power_A(2)-1.d0) * d_a_2 & + +4.d0 * alpha * alpha * d_2 )*overlap_x0*overlap_z0 + ao_deriv2_y(i,j) += c*deriv_tmp + + power_A(3) = power_A(3)-2 + if (power_A(3)>-1) then + call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,overlap_z,d_a_2,overlap,dim1) + else + d_a_2 = 0.d0 + endif + power_A(3) = power_A(3)+4 + call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,overlap_z,d_2,overlap,dim1) + power_A(3) = power_A(3)-2 + + deriv_tmp = (-2.d0 * alpha * (2.d0 * power_A(3) +1.d0 ) * overlap_z0 & + +power_A(3) * (power_A(3)-1.d0) * d_a_2 & + +4.d0 * alpha * alpha * d_2 )*overlap_x0*overlap_y0 + ao_deriv2_z(i,j) += c*deriv_tmp + + enddo + enddo enddo enddo - enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO + + endif END_PROVIDER +! --- + BEGIN_PROVIDER [double precision, ao_kinetic_integrals, (ao_num,ao_num)] implicit none BEGIN_DOC diff --git a/src/ao_one_e_ints/pot_ao_erf_ints.irp.f b/src/ao_one_e_ints/pot_ao_erf_ints.irp.f index c4a573be..dddf98d4 100644 --- a/src/ao_one_e_ints/pot_ao_erf_ints.irp.f +++ b/src/ao_one_e_ints/pot_ao_erf_ints.irp.f @@ -1,3 +1,6 @@ + +! --- + subroutine give_all_erf_kl_ao(integrals_ao,mu_in,C_center) implicit none BEGIN_DOC @@ -15,36 +18,104 @@ subroutine give_all_erf_kl_ao(integrals_ao,mu_in,C_center) enddo end +! --- + +double precision function NAI_pol_mult_erf_ao(i_ao, j_ao, mu_in, C_center) -double precision function NAI_pol_mult_erf_ao(i_ao,j_ao,mu_in,C_center) - implicit none BEGIN_DOC + ! ! Computes the following integral : - ! $\int_{-\infty}^{infty} dr \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! $\int_{-\infty}^{infty} dr \chi_i(r) \chi_j(r) \frac{\erf(\mu |r - R_C|)}{|r - R_C|}$. + ! END_DOC - integer, intent(in) :: i_ao,j_ao + + implicit none + integer, intent(in) :: i_ao, j_ao double precision, intent(in) :: mu_in, C_center(3) - integer :: i,j,num_A,num_B, power_A(3), power_B(3), n_pt_in - double precision :: A_center(3), B_center(3),integral, alpha,beta + + integer :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in + double precision :: A_center(3), B_center(3), integral, alpha, beta + double precision :: NAI_pol_mult_erf - num_A = ao_nucl(i_ao) - power_A(1:3)= ao_power(i_ao,1:3) + + num_A = ao_nucl(i_ao) + power_A(1:3) = ao_power(i_ao,1:3) A_center(1:3) = nucl_coord(num_A,1:3) - num_B = ao_nucl(j_ao) - power_B(1:3)= ao_power(j_ao,1:3) + num_B = ao_nucl(j_ao) + power_B(1:3) = ao_power(j_ao,1:3) B_center(1:3) = nucl_coord(num_B,1:3) + n_pt_in = n_pt_max_integrals + NAI_pol_mult_erf_ao = 0.d0 do i = 1, ao_prim_num(i_ao) alpha = ao_expo_ordered_transp(i,i_ao) do j = 1, ao_prim_num(j_ao) beta = ao_expo_ordered_transp(j,j_ao) - integral = NAI_pol_mult_erf(A_center,B_center,power_A,power_B,alpha,beta,C_center,n_pt_in,mu_in) - NAI_pol_mult_erf_ao += integral * ao_coef_normalized_ordered_transp(j,j_ao)*ao_coef_normalized_ordered_transp(i,i_ao) + + integral = NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in,mu_in) + + NAI_pol_mult_erf_ao += integral * ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao) enddo enddo -end +end function NAI_pol_mult_erf_ao + +! --- + +double precision function NAI_pol_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_center) + + BEGIN_DOC + ! + ! Computes the following integral : + ! $\int_{-\infty}^{infty} dr \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu |r - R_C|)}{|r - R_C|}$. + ! + END_DOC + + implicit none + integer, intent(in) :: i_ao, j_ao + double precision, intent(in) :: beta, B_center(3) + double precision, intent(in) :: mu_in, C_center(3) + + integer :: i, j, power_A1(3), power_A2(3), n_pt_in + double precision :: A1_center(3), A2_center(3), alpha1, alpha2, coef12, coef1, integral + + double precision, external :: NAI_pol_mult_erf_with1s, NAI_pol_mult_erf_ao + + ASSERT(beta .ge. 0.d0) + if(beta .lt. 1d-10) then + NAI_pol_mult_erf_ao_with1s = NAI_pol_mult_erf_ao(i_ao, j_ao, mu_in, C_center) + return + endif + + power_A1(1:3) = ao_power(i_ao,1:3) + power_A2(1:3) = ao_power(j_ao,1:3) + + A1_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3) + A2_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3) + + n_pt_in = n_pt_max_integrals + + NAI_pol_mult_erf_ao_with1s = 0.d0 + do i = 1, ao_prim_num(i_ao) + alpha1 = ao_expo_ordered_transp (i,i_ao) + coef1 = ao_coef_normalized_ordered_transp(i,i_ao) + + do j = 1, ao_prim_num(j_ao) + alpha2 = ao_expo_ordered_transp(j,j_ao) + coef12 = coef1 * ao_coef_normalized_ordered_transp(j,j_ao) + if(dabs(coef12) .lt. 1d-14) cycle + + integral = NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2 & + , beta, B_center, C_center, n_pt_in, mu_in ) + + NAI_pol_mult_erf_ao_with1s += integral * coef12 + enddo + enddo + +end function NAI_pol_mult_erf_ao_with1s + +! --- double precision function NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in) @@ -127,58 +198,221 @@ end function NAI_pol_mult_erf ! --- - -double precision function NAI_pol_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_center) +subroutine NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, LD_C, n_pt_in, mu_in, res_v, LD_resv, n_points) BEGIN_DOC ! ! Computes the following integral : - ! $\int_{-\infty}^{infty} dr \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu |r - R_C|)}{|r - R_C|}$. + ! + ! .. math:: + ! + ! \int dr (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 ) + ! \frac{\erf(\mu |r - R_C |)}{| r - R_C |}$. ! END_DOC + include 'utils/constants.include.F' + implicit none - integer, intent(in) :: i_ao, j_ao - double precision, intent(in) :: beta, B_center(3) - double precision, intent(in) :: mu_in, C_center(3) - integer :: i, j, power_A1(3), power_A2(3), n_pt_in - double precision :: A1_center(3), A2_center(3), alpha1, alpha2, coef12, coef1, integral + integer, intent(in) :: n_pt_in, n_points, LD_C, LD_resv + integer, intent(in) :: power_A(3), power_B(3) + double precision, intent(in) :: A_center(3), B_center(3), alpha, beta, mu_in + double precision, intent(in) :: C_center(LD_C,3) + double precision, intent(out) :: res_v(LD_resv) - double precision, external :: NAI_pol_mult_erf_with1s, NAI_pol_mult_erf_ao + integer :: i, n_pt, n_pt_out, ipoint + double precision :: P_center(3) + double precision :: d(0:n_pt_in), coeff, dist, const, factor + double precision :: const_factor, dist_integral + double precision :: accu, p_inv, p, rho, p_inv_2 + double precision :: p_new, p_new2, coef_tmp - ASSERT(beta .ge. 0.d0) - if(beta .lt. 1d-10) then - NAI_pol_mult_erf_ao_with1s = NAI_pol_mult_erf_ao(i_ao, j_ao, mu_in, C_center) + double precision :: rint + + res_V(1:LD_resv) = 0.d0 + + p = alpha + beta + p_inv = 1.d0 / p + p_inv_2 = 0.5d0 * p_inv + rho = alpha * beta * p_inv + p_new = mu_in / dsqrt(p + mu_in * mu_in) + p_new2 = p_new * p_new + coef_tmp = p * p_new2 + + dist = 0.d0 + do i = 1, 3 + P_center(i) = (alpha * A_center(i) + beta * B_center(i)) * p_inv + dist += (A_center(i) - B_center(i)) * (A_center(i) - B_center(i)) + enddo + + const_factor = dist * rho + if(const_factor > 80.d0) then + return + endif + factor = dexp(-const_factor) + coeff = dtwo_pi * factor * p_inv * p_new + + n_pt = 2 * ( power_A(1) + power_B(1) + power_A(2) + power_B(2) + power_A(3) + power_B(3) ) + + if(n_pt == 0) then + + do ipoint = 1, n_points + dist_integral = 0.d0 + do i = 1, 3 + dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i)) + enddo + const = coef_tmp * dist_integral + + res_v(ipoint) = coeff * rint(0, const) + enddo + + else + + do ipoint = 1, n_points + dist_integral = 0.d0 + do i = 1, 3 + dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i)) + enddo + const = coef_tmp * dist_integral + + do i = 0, n_pt_in + d(i) = 0.d0 + enddo + call give_polynomial_mult_center_one_e_erf_opt(A_center, B_center, power_A, power_B, C_center(ipoint,1:3), n_pt_in, d, n_pt_out, p_inv_2, p_new2, P_center) + + if(n_pt_out < 0) then + res_v(ipoint) = 0.d0 + cycle + endif + + ! sum of integrals of type : int {t,[0,1]} exp-(rho.(P-Q)^2 * t^2) * t^i + accu = 0.d0 + do i = 0, n_pt_out, 2 + accu += d(i) * rint(i/2, const) + enddo + + res_v(ipoint) = accu * coeff + enddo + + endif + +end subroutine NAI_pol_mult_erf_v + +! --- + +double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2 & + , beta, B_center, C_center, n_pt_in, mu_in ) + + BEGIN_DOC + ! + ! Computes the following integral : + ! + ! .. math:: + ! + ! \int dx (x - A1_x)^a_1 (x - B1_x)^a_2 \exp(-\alpha_1 (x - A1_x)^2 - \alpha_2 (x - A2_x)^2) + ! \int dy (y - A1_y)^b_1 (y - B1_y)^b_2 \exp(-\alpha_1 (y - A1_y)^2 - \alpha_2 (y - A2_y)^2) + ! \int dz (x - A1_z)^c_1 (z - B1_z)^c_2 \exp(-\alpha_1 (z - A1_z)^2 - \alpha_2 (z - A2_z)^2) + ! \exp(-\beta (r - B)^2) + ! \frac{\erf(\mu |r - R_C|)}{|r - R_C|}$. + ! + END_DOC + + include 'utils/constants.include.F' + + implicit none + integer, intent(in) :: n_pt_in + integer, intent(in) :: power_A1(3), power_A2(3) + double precision, intent(in) :: C_center(3), A1_center(3), A2_center(3), B_center(3) + double precision, intent(in) :: alpha1, alpha2, beta, mu_in + + integer :: i, n_pt, n_pt_out + double precision :: alpha12, alpha12_inv, alpha12_inv_2, rho12, A12_center(3), dist12, const_factor12 + double precision :: p, p_inv, p_inv_2, rho, P_center(3), dist, const_factor + double precision :: dist_integral + double precision :: d(0:n_pt_in), coeff, const, factor + double precision :: accu + double precision :: p_new + + double precision :: rint + + + ! e^{-alpha1 (r - A1)^2} e^{-alpha2 (r - A2)^2} = e^{-K12} e^{-alpha12 (r - A12)^2} + alpha12 = alpha1 + alpha2 + alpha12_inv = 1.d0 / alpha12 + alpha12_inv_2 = 0.5d0 * alpha12_inv + rho12 = alpha1 * alpha2 * alpha12_inv + A12_center(1) = (alpha1 * A1_center(1) + alpha2 * A2_center(1)) * alpha12_inv + A12_center(2) = (alpha1 * A1_center(2) + alpha2 * A2_center(2)) * alpha12_inv + A12_center(3) = (alpha1 * A1_center(3) + alpha2 * A2_center(3)) * alpha12_inv + dist12 = (A1_center(1) - A2_center(1)) * (A1_center(1) - A2_center(1)) & + + (A1_center(2) - A2_center(2)) * (A1_center(2) - A2_center(2)) & + + (A1_center(3) - A2_center(3)) * (A1_center(3) - A2_center(3)) + + const_factor12 = dist12 * rho12 + if(const_factor12 > 80.d0) then + NAI_pol_mult_erf_with1s = 0.d0 return endif - power_A1(1:3) = ao_power(i_ao,1:3) - power_A2(1:3) = ao_power(j_ao,1:3) + ! --- - A1_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3) - A2_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3) + ! e^{-K12} e^{-alpha12 (r - A12)^2} e^{-beta (r - B)^2} = e^{-K} e^{-p (r - P)^2} + p = alpha12 + beta + p_inv = 1.d0 / p + p_inv_2 = 0.5d0 * p_inv + rho = alpha12 * beta * p_inv + P_center(1) = (alpha12 * A12_center(1) + beta * B_center(1)) * p_inv + P_center(2) = (alpha12 * A12_center(2) + beta * B_center(2)) * p_inv + P_center(3) = (alpha12 * A12_center(3) + beta * B_center(3)) * p_inv + dist = (A12_center(1) - B_center(1)) * (A12_center(1) - B_center(1)) & + + (A12_center(2) - B_center(2)) * (A12_center(2) - B_center(2)) & + + (A12_center(3) - B_center(3)) * (A12_center(3) - B_center(3)) - n_pt_in = n_pt_max_integrals + const_factor = const_factor12 + dist * rho + if(const_factor > 80.d0) then + NAI_pol_mult_erf_with1s = 0.d0 + return + endif - NAI_pol_mult_erf_ao_with1s = 0.d0 - do i = 1, ao_prim_num(i_ao) - alpha1 = ao_expo_ordered_transp (i,i_ao) - coef1 = ao_coef_normalized_ordered_transp(i,i_ao) + dist_integral = (P_center(1) - C_center(1)) * (P_center(1) - C_center(1)) & + + (P_center(2) - C_center(2)) * (P_center(2) - C_center(2)) & + + (P_center(3) - C_center(3)) * (P_center(3) - C_center(3)) - do j = 1, ao_prim_num(j_ao) - alpha2 = ao_expo_ordered_transp(j,j_ao) - coef12 = coef1 * ao_coef_normalized_ordered_transp(j,j_ao) - if(dabs(coef12) .lt. 1d-14) cycle + ! --- - integral = NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2 & - , beta, B_center, C_center, n_pt_in, mu_in ) + p_new = mu_in / dsqrt(p + mu_in * mu_in) + factor = dexp(-const_factor) + coeff = dtwo_pi * factor * p_inv * p_new - NAI_pol_mult_erf_ao_with1s += integral * coef12 - enddo + n_pt = 2 * ( (power_A1(1) + power_A2(1)) + (power_A1(2) + power_A2(2)) + (power_A1(3) + power_A2(3)) ) + const = p * dist_integral * p_new * p_new + if(n_pt == 0) then + NAI_pol_mult_erf_with1s = coeff * rint(0, const) + return + endif + + do i = 0, n_pt_in + d(i) = 0.d0 enddo + p_new = p_new * p_new + call give_polynomial_mult_center_one_e_erf_opt( A1_center, A2_center, power_A1, power_A2, C_center, n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center) -end function NAI_pol_mult_erf_ao_with1s + if(n_pt_out < 0) then + NAI_pol_mult_erf_with1s = 0.d0 + return + endif + + ! sum of integrals of type : int {t,[0,1]} exp-(rho.(P-Q)^2 * t^2) * t^i + accu = 0.d0 + do i = 0, n_pt_out, 2 + accu += d(i) * rint(i/2, const) + enddo + NAI_pol_mult_erf_with1s = accu * coeff + +end function NAI_pol_mult_erf_with1s + +! --- subroutine NAI_pol_mult_erf_with1s_v(A1_center, A2_center, power_A1, power_A2, alpha1, alpha2, beta, B_center, LD_B, C_center, LD_C, n_pt_in, mu_in, res_v, LD_resv, n_points) @@ -428,107 +662,6 @@ subroutine give_polynomial_mult_center_one_e_erf_opt(A_center, B_center, power_A end subroutine give_polynomial_mult_center_one_e_erf_opt ! --- -subroutine NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, LD_C, n_pt_in, mu_in, res_v, LD_resv, n_points) - - BEGIN_DOC - ! - ! Computes the following integral : - ! - ! .. math:: - ! - ! \int dr (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 ) - ! \frac{\erf(\mu |r - R_C |)}{| r - R_C |}$. - ! - END_DOC - - include 'utils/constants.include.F' - - implicit none - - integer, intent(in) :: n_pt_in, n_points, LD_C, LD_resv - integer, intent(in) :: power_A(3), power_B(3) - double precision, intent(in) :: A_center(3), B_center(3), alpha, beta, mu_in - double precision, intent(in) :: C_center(LD_C,3) - double precision, intent(out) :: res_v(LD_resv) - - integer :: i, n_pt, n_pt_out, ipoint - double precision :: P_center(3) - double precision :: d(0:n_pt_in), coeff, dist, const, factor - double precision :: const_factor, dist_integral - double precision :: accu, p_inv, p, rho, p_inv_2 - double precision :: p_new, p_new2, coef_tmp - - double precision :: rint - - res_V(1:LD_resv) = 0.d0 - - p = alpha + beta - p_inv = 1.d0 / p - p_inv_2 = 0.5d0 * p_inv - rho = alpha * beta * p_inv - p_new = mu_in / dsqrt(p + mu_in * mu_in) - p_new2 = p_new * p_new - coef_tmp = p * p_new2 - - dist = 0.d0 - do i = 1, 3 - P_center(i) = (alpha * A_center(i) + beta * B_center(i)) * p_inv - dist += (A_center(i) - B_center(i)) * (A_center(i) - B_center(i)) - enddo - - const_factor = dist * rho - if(const_factor > 80.d0) then - return - endif - factor = dexp(-const_factor) - coeff = dtwo_pi * factor * p_inv * p_new - - n_pt = 2 * ( power_A(1) + power_B(1) + power_A(2) + power_B(2) + power_A(3) + power_B(3) ) - - if(n_pt == 0) then - - do ipoint = 1, n_points - dist_integral = 0.d0 - do i = 1, 3 - dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i)) - enddo - const = coef_tmp * dist_integral - - res_v(ipoint) = coeff * rint(0, const) - enddo - - else - - do ipoint = 1, n_points - dist_integral = 0.d0 - do i = 1, 3 - dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i)) - enddo - const = coef_tmp * dist_integral - - do i = 0, n_pt_in - d(i) = 0.d0 - enddo - call give_polynomial_mult_center_one_e_erf_opt(A_center, B_center, power_A, power_B, C_center(ipoint,1:3), n_pt_in, d, n_pt_out, p_inv_2, p_new2, P_center) - - if(n_pt_out < 0) then - res_v(ipoint) = 0.d0 - cycle - endif - - ! sum of integrals of type : int {t,[0,1]} exp-(rho.(P-Q)^2 * t^2) * t^i - accu = 0.d0 - do i = 0, n_pt_out, 2 - accu += d(i) * rint(i/2, const) - enddo - - res_v(ipoint) = accu * coeff - enddo - - endif - -end subroutine NAI_pol_mult_erf_v - subroutine give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,power_A,power_B,C_center,n_pt_in,d,n_pt_out,mu_in) @@ -659,113 +792,3 @@ subroutine give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,po end -double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2 & - , beta, B_center, C_center, n_pt_in, mu_in ) - - BEGIN_DOC - ! - ! Computes the following integral : - ! - ! .. math:: - ! - ! \int dx (x - A1_x)^a_1 (x - B1_x)^a_2 \exp(-\alpha_1 (x - A1_x)^2 - \alpha_2 (x - A2_x)^2) - ! \int dy (y - A1_y)^b_1 (y - B1_y)^b_2 \exp(-\alpha_1 (y - A1_y)^2 - \alpha_2 (y - A2_y)^2) - ! \int dz (x - A1_z)^c_1 (z - B1_z)^c_2 \exp(-\alpha_1 (z - A1_z)^2 - \alpha_2 (z - A2_z)^2) - ! \exp(-\beta (r - B)^2) - ! \frac{\erf(\mu |r - R_C|)}{|r - R_C|}$. - ! - END_DOC - - include 'utils/constants.include.F' - - implicit none - integer, intent(in) :: n_pt_in - integer, intent(in) :: power_A1(3), power_A2(3) - double precision, intent(in) :: C_center(3), A1_center(3), A2_center(3), B_center(3) - double precision, intent(in) :: alpha1, alpha2, beta, mu_in - - integer :: i, n_pt, n_pt_out - double precision :: alpha12, alpha12_inv, alpha12_inv_2, rho12, A12_center(3), dist12, const_factor12 - double precision :: p, p_inv, p_inv_2, rho, P_center(3), dist, const_factor - double precision :: dist_integral - double precision :: d(0:n_pt_in), coeff, const, factor - double precision :: accu - double precision :: p_new - - double precision :: rint - - - ! e^{-alpha1 (r - A1)^2} e^{-alpha2 (r - A2)^2} = e^{-K12} e^{-alpha12 (r - A12)^2} - alpha12 = alpha1 + alpha2 - alpha12_inv = 1.d0 / alpha12 - alpha12_inv_2 = 0.5d0 * alpha12_inv - rho12 = alpha1 * alpha2 * alpha12_inv - A12_center(1) = (alpha1 * A1_center(1) + alpha2 * A2_center(1)) * alpha12_inv - A12_center(2) = (alpha1 * A1_center(2) + alpha2 * A2_center(2)) * alpha12_inv - A12_center(3) = (alpha1 * A1_center(3) + alpha2 * A2_center(3)) * alpha12_inv - dist12 = (A1_center(1) - A2_center(1)) * (A1_center(1) - A2_center(1)) & - + (A1_center(2) - A2_center(2)) * (A1_center(2) - A2_center(2)) & - + (A1_center(3) - A2_center(3)) * (A1_center(3) - A2_center(3)) - - const_factor12 = dist12 * rho12 - if(const_factor12 > 80.d0) then - NAI_pol_mult_erf_with1s = 0.d0 - return - endif - - ! --- - - ! e^{-K12} e^{-alpha12 (r - A12)^2} e^{-beta (r - B)^2} = e^{-K} e^{-p (r - P)^2} - p = alpha12 + beta - p_inv = 1.d0 / p - p_inv_2 = 0.5d0 * p_inv - rho = alpha12 * beta * p_inv - P_center(1) = (alpha12 * A12_center(1) + beta * B_center(1)) * p_inv - P_center(2) = (alpha12 * A12_center(2) + beta * B_center(2)) * p_inv - P_center(3) = (alpha12 * A12_center(3) + beta * B_center(3)) * p_inv - dist = (A12_center(1) - B_center(1)) * (A12_center(1) - B_center(1)) & - + (A12_center(2) - B_center(2)) * (A12_center(2) - B_center(2)) & - + (A12_center(3) - B_center(3)) * (A12_center(3) - B_center(3)) - - const_factor = const_factor12 + dist * rho - if(const_factor > 80.d0) then - NAI_pol_mult_erf_with1s = 0.d0 - return - endif - - dist_integral = (P_center(1) - C_center(1)) * (P_center(1) - C_center(1)) & - + (P_center(2) - C_center(2)) * (P_center(2) - C_center(2)) & - + (P_center(3) - C_center(3)) * (P_center(3) - C_center(3)) - - ! --- - - p_new = mu_in / dsqrt(p + mu_in * mu_in) - factor = dexp(-const_factor) - coeff = dtwo_pi * factor * p_inv * p_new - - n_pt = 2 * ( (power_A1(1) + power_A2(1)) + (power_A1(2) + power_A2(2)) + (power_A1(3) + power_A2(3)) ) - const = p * dist_integral * p_new * p_new - if(n_pt == 0) then - NAI_pol_mult_erf_with1s = coeff * rint(0, const) - return - endif - - do i = 0, n_pt_in - d(i) = 0.d0 - enddo - p_new = p_new * p_new - call give_polynomial_mult_center_one_e_erf_opt( A1_center, A2_center, power_A1, power_A2, C_center, n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center) - - if(n_pt_out < 0) then - NAI_pol_mult_erf_with1s = 0.d0 - return - endif - - ! sum of integrals of type : int {t,[0,1]} exp-(rho.(P-Q)^2 * t^2) * t^i - accu = 0.d0 - do i = 0, n_pt_out, 2 - accu += d(i) * rint(i/2, const) - enddo - NAI_pol_mult_erf_with1s = accu * coeff - -end function NAI_pol_mult_erf_with1s diff --git a/src/ao_one_e_ints/pot_ao_ints.irp.f b/src/ao_one_e_ints/pot_ao_ints.irp.f index 1d92dc7d..928053ad 100644 --- a/src/ao_one_e_ints/pot_ao_ints.irp.f +++ b/src/ao_one_e_ints/pot_ao_ints.irp.f @@ -1,4 +1,8 @@ + +! --- + BEGIN_PROVIDER [ double precision, ao_integrals_n_e, (ao_num,ao_num)] + BEGIN_DOC ! Nucleus-electron interaction, in the |AO| basis set. ! @@ -6,84 +10,100 @@ BEGIN_PROVIDER [ double precision, ao_integrals_n_e, (ao_num,ao_num)] ! ! These integrals also contain the pseudopotential integrals. END_DOC + implicit none - double precision :: alpha, beta, gama, delta - integer :: num_A,num_B - double precision :: A_center(3),B_center(3),C_center(3) - integer :: power_A(3),power_B(3) - integer :: i,j,k,l,n_pt_in,m - double precision :: overlap_x,overlap_y,overlap_z,overlap,dx,NAI_pol_mult + integer :: num_A, num_B, power_A(3), power_B(3) + integer :: i, j, k, l, n_pt_in, m + double precision :: alpha, beta + double precision :: A_center(3),B_center(3),C_center(3) + double precision :: overlap_x,overlap_y,overlap_z,overlap,dx,NAI_pol_mult + + ao_integrals_n_e = 0.d0 if (read_ao_integrals_n_e) then + call ezfio_get_ao_one_e_ints_ao_integrals_n_e(ao_integrals_n_e) print *, 'AO N-e integrals read from disk' + else - ao_integrals_n_e = 0.d0 + if(use_cosgtos) then + !print *, " use_cosgtos for ao_integrals_n_e ?", use_cosgtos - ! _ - ! /| / |_) - ! | / | \ - ! + do j = 1, ao_num + do i = 1, ao_num + ao_integrals_n_e(i,j) = ao_integrals_n_e_cosgtos(i,j) + enddo + enddo - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,& - !$OMP num_A,num_B,Z,c,n_pt_in) & - !$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,nucl_coord,ao_coef_normalized_ordered_transp,& - !$OMP n_pt_max_integrals,ao_integrals_n_e,nucl_num,nucl_charge) + else - n_pt_in = n_pt_max_integrals + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,& + !$OMP num_A,num_B,Z,c,c1,n_pt_in) & + !$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,nucl_coord,ao_coef_normalized_ordered_transp,& + !$OMP n_pt_max_integrals,ao_integrals_n_e,nucl_num,nucl_charge) - !$OMP DO SCHEDULE (dynamic) + n_pt_in = n_pt_max_integrals - do j = 1, ao_num - num_A = ao_nucl(j) - power_A(1:3)= ao_power(j,1:3) - A_center(1:3) = nucl_coord(num_A,1:3) + !$OMP DO SCHEDULE (dynamic) - do i = 1, ao_num + do j = 1, ao_num + num_A = ao_nucl(j) + power_A(1:3)= ao_power(j,1:3) + A_center(1:3) = nucl_coord(num_A,1:3) - num_B = ao_nucl(i) - power_B(1:3)= ao_power(i,1:3) - B_center(1:3) = nucl_coord(num_B,1:3) + do i = 1, ao_num - do l=1,ao_prim_num(j) - alpha = ao_expo_ordered_transp(l,j) + num_B = ao_nucl(i) + power_B(1:3)= ao_power(i,1:3) + B_center(1:3) = nucl_coord(num_B,1:3) - do m=1,ao_prim_num(i) - beta = ao_expo_ordered_transp(m,i) + do l=1,ao_prim_num(j) + alpha = ao_expo_ordered_transp(l,j) - double precision :: c - c = 0.d0 + do m=1,ao_prim_num(i) + beta = ao_expo_ordered_transp(m,i) - do k = 1, nucl_num - double precision :: Z - Z = nucl_charge(k) + double precision :: c, c1 + c = 0.d0 - C_center(1:3) = nucl_coord(k,1:3) + do k = 1, nucl_num + double precision :: Z + Z = nucl_charge(k) - c = c - Z * NAI_pol_mult(A_center,B_center, & - power_A,power_B,alpha,beta,C_center,n_pt_in) + C_center(1:3) = nucl_coord(k,1:3) + !print *, ' ' + !print *, A_center, B_center, C_center, power_A, power_B + !print *, alpha, beta + + c1 = NAI_pol_mult( A_center, B_center, power_A, power_B & + , alpha, beta, C_center, n_pt_in ) + + !print *, ' c1 = ', c1 + + c = c - Z * c1 + + enddo + ao_integrals_n_e(i,j) = ao_integrals_n_e(i,j) & + + ao_coef_normalized_ordered_transp(l,j) & + * ao_coef_normalized_ordered_transp(m,i) * c enddo - ao_integrals_n_e(i,j) = ao_integrals_n_e(i,j) & - + ao_coef_normalized_ordered_transp(l,j) & - * ao_coef_normalized_ordered_transp(m,i) * c enddo enddo enddo - enddo !$OMP END DO !$OMP END PARALLEL - IF (DO_PSEUDO) THEN + + endif + + + IF(do_pseudo) THEN ao_integrals_n_e += ao_pseudo_integrals ENDIF - IF(point_charges) THEN - ao_integrals_n_e += ao_integrals_pt_chrg - ENDIF - endif @@ -102,7 +122,7 @@ BEGIN_PROVIDER [ double precision, ao_integrals_n_e_imag, (ao_num,ao_num)] ! :math:`\langle \chi_i | -\sum_A \frac{1}{|r-R_A|} | \chi_j \rangle` END_DOC implicit none - double precision :: alpha, beta, gama, delta + double precision :: alpha, beta integer :: num_A,num_B double precision :: A_center(3),B_center(3),C_center(3) integer :: power_A(3),power_B(3) @@ -125,7 +145,7 @@ BEGIN_PROVIDER [ double precision, ao_integrals_n_e_per_atom, (ao_num,ao_num,nuc ! :math:`\langle \chi_i | -\frac{1}{|r-R_A|} | \chi_j \rangle` END_DOC implicit none - double precision :: alpha, beta, gama, delta + double precision :: alpha, beta integer :: i_c,num_A,num_B double precision :: A_center(3),B_center(3),C_center(3) integer :: power_A(3),power_B(3) @@ -268,6 +288,7 @@ double precision function NAI_pol_mult(A_center,B_center,power_A,power_B,alpha,b end +! --- subroutine give_polynomial_mult_center_one_e(A_center,B_center,alpha,beta,power_A,power_B,C_center,n_pt_in,d,n_pt_out) implicit none @@ -579,61 +600,3 @@ double precision function V_r(n,alpha) end -double precision function V_phi(n,m) - implicit none - BEGIN_DOC - ! Computes the angular $\phi$ part of the nuclear attraction integral: - ! - ! $\int_{0}^{2 \pi} \cos(\phi)^n \sin(\phi)^m d\phi$. - END_DOC - integer :: n,m, i - double precision :: prod, Wallis - prod = 1.d0 - do i = 0,shiftr(n,1)-1 - prod = prod/ (1.d0 + dfloat(m+1)/dfloat(n-i-i-1)) - enddo - V_phi = 4.d0 * prod * Wallis(m) -end - - -double precision function V_theta(n,m) - implicit none - BEGIN_DOC - ! Computes the angular $\theta$ part of the nuclear attraction integral: - ! - ! $\int_{0}^{\pi} \cos(\theta)^n \sin(\theta)^m d\theta$ - END_DOC - integer :: n,m,i - double precision :: Wallis, prod - include 'utils/constants.include.F' - V_theta = 0.d0 - prod = 1.d0 - do i = 0,shiftr(n,1)-1 - prod = prod / (1.d0 + dfloat(m+1)/dfloat(n-i-i-1)) - enddo - V_theta = (prod+prod) * Wallis(m) -end - - -double precision function Wallis(n) - implicit none - BEGIN_DOC - ! Wallis integral: - ! - ! $\int_{0}^{\pi} \cos(\theta)^n d\theta$. - END_DOC - double precision :: fact - integer :: n,p - include 'utils/constants.include.F' - if(iand(n,1).eq.0)then - Wallis = fact(shiftr(n,1)) - Wallis = pi * fact(n) / (dble(ibset(0_8,n)) * (Wallis+Wallis)*Wallis) - else - p = shiftr(n,1) - Wallis = fact(p) - Wallis = dble(ibset(0_8,p+p)) * Wallis*Wallis / fact(p+p+1) - endif - -end - - diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg index b18c65d1..dfceddb5 100644 --- a/src/ao_two_e_ints/EZFIO.cfg +++ b/src/ao_two_e_ints/EZFIO.cfg @@ -4,13 +4,6 @@ doc: Read/Write |AO| integrals from/to disk [ Write | Read | None ] interface: ezfio,provider,ocaml default: None -[ao_integrals_threshold] -type: Threshold -doc: If | (pq|rs) | < `ao_integrals_threshold` then (pq|rs) is zero -interface: ezfio,provider,ocaml -default: 1.e-15 -ezfio_name: threshold_ao - [do_direct_integrals] type: logical doc: Compute integrals on the fly (very slow, only for debugging) diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f index 83fbadfd..82ffbc90 100644 --- a/src/ao_two_e_ints/two_e_integrals.irp.f +++ b/src/ao_two_e_ints/two_e_integrals.irp.f @@ -1,102 +1,123 @@ -double precision function ao_two_e_integral(i,j,k,l) - implicit none + +! --- + +double precision function ao_two_e_integral(i, j, k, l) + BEGIN_DOC ! integral of the AO basis or (ij|kl) ! i(r1) j(r1) 1/r12 k(r2) l(r2) END_DOC - integer,intent(in) :: i,j,k,l - integer :: p,q,r,s - double precision :: I_center(3),J_center(3),K_center(3),L_center(3) - integer :: num_i,num_j,num_k,num_l,dim1,I_power(3),J_power(3),K_power(3),L_power(3) - double precision :: integral + implicit none include 'utils/constants.include.F' + + integer, intent(in) :: i, j, k, l + + integer :: p, q, r, s + integer :: num_i,num_j,num_k,num_l,dim1,I_power(3),J_power(3),K_power(3),L_power(3) + integer :: iorder_p(3), iorder_q(3) + double precision :: I_center(3), J_center(3), K_center(3), L_center(3) + double precision :: integral double precision :: P_new(0:max_dim,3),P_center(3),fact_p,pp double precision :: Q_new(0:max_dim,3),Q_center(3),fact_q,qq - integer :: iorder_p(3), iorder_q(3) + double precision :: ao_two_e_integral_schwartz_accel - if (ao_prim_num(i) * ao_prim_num(j) * ao_prim_num(k) * ao_prim_num(l) > 1024 ) then - ao_two_e_integral = ao_two_e_integral_schwartz_accel(i,j,k,l) - else + double precision :: ao_two_e_integral_cosgtos - dim1 = n_pt_max_integrals - num_i = ao_nucl(i) - num_j = ao_nucl(j) - num_k = ao_nucl(k) - num_l = ao_nucl(l) - ao_two_e_integral = 0.d0 + if(use_cosgtos) then + !print *, ' use_cosgtos for ao_two_e_integral ?', use_cosgtos - if (num_i /= num_j .or. num_k /= num_l .or. num_j /= num_k)then - do p = 1, 3 - I_power(p) = ao_power(i,p) - J_power(p) = ao_power(j,p) - K_power(p) = ao_power(k,p) - L_power(p) = ao_power(l,p) - I_center(p) = nucl_coord(num_i,p) - J_center(p) = nucl_coord(num_j,p) - K_center(p) = nucl_coord(num_k,p) - L_center(p) = nucl_coord(num_l,p) - enddo + ao_two_e_integral = ao_two_e_integral_cosgtos(i, j, k, l) - double precision :: coef1, coef2, coef3, coef4 - double precision :: p_inv,q_inv - double precision :: general_primitive_integral + else - do p = 1, ao_prim_num(i) - coef1 = ao_coef_normalized_ordered_transp(p,i) - do q = 1, ao_prim_num(j) - coef2 = coef1*ao_coef_normalized_ordered_transp(q,j) - call give_explicit_poly_and_gaussian(P_new,P_center,pp,fact_p,iorder_p,& - ao_expo_ordered_transp(p,i),ao_expo_ordered_transp(q,j), & - I_power,J_power,I_center,J_center,dim1) - p_inv = 1.d0/pp - do r = 1, ao_prim_num(k) - coef3 = coef2*ao_coef_normalized_ordered_transp(r,k) - do s = 1, ao_prim_num(l) - coef4 = coef3*ao_coef_normalized_ordered_transp(s,l) - call give_explicit_poly_and_gaussian(Q_new,Q_center,qq,fact_q,iorder_q,& - ao_expo_ordered_transp(r,k),ao_expo_ordered_transp(s,l), & - K_power,L_power,K_center,L_center,dim1) - q_inv = 1.d0/qq - integral = general_primitive_integral(dim1, & - P_new,P_center,fact_p,pp,p_inv,iorder_p, & - Q_new,Q_center,fact_q,qq,q_inv,iorder_q) - ao_two_e_integral = ao_two_e_integral + coef4 * integral - enddo ! s - enddo ! r - enddo ! q - enddo ! p + if (ao_prim_num(i) * ao_prim_num(j) * ao_prim_num(k) * ao_prim_num(l) > 1024 ) then + + ao_two_e_integral = ao_two_e_integral_schwartz_accel(i,j,k,l) else - do p = 1, 3 - I_power(p) = ao_power(i,p) - J_power(p) = ao_power(j,p) - K_power(p) = ao_power(k,p) - L_power(p) = ao_power(l,p) - enddo - double precision :: ERI + dim1 = n_pt_max_integrals - do p = 1, ao_prim_num(i) - coef1 = ao_coef_normalized_ordered_transp(p,i) - do q = 1, ao_prim_num(j) - coef2 = coef1*ao_coef_normalized_ordered_transp(q,j) - do r = 1, ao_prim_num(k) - coef3 = coef2*ao_coef_normalized_ordered_transp(r,k) - do s = 1, ao_prim_num(l) - coef4 = coef3*ao_coef_normalized_ordered_transp(s,l) - integral = ERI( & - ao_expo_ordered_transp(p,i),ao_expo_ordered_transp(q,j),ao_expo_ordered_transp(r,k),ao_expo_ordered_transp(s,l),& - I_power(1),J_power(1),K_power(1),L_power(1), & - I_power(2),J_power(2),K_power(2),L_power(2), & - I_power(3),J_power(3),K_power(3),L_power(3)) - ao_two_e_integral = ao_two_e_integral + coef4 * integral - enddo ! s - enddo ! r - enddo ! q - enddo ! p + num_i = ao_nucl(i) + num_j = ao_nucl(j) + num_k = ao_nucl(k) + num_l = ao_nucl(l) + ao_two_e_integral = 0.d0 + + if (num_i /= num_j .or. num_k /= num_l .or. num_j /= num_k)then + do p = 1, 3 + I_power(p) = ao_power(i,p) + J_power(p) = ao_power(j,p) + K_power(p) = ao_power(k,p) + L_power(p) = ao_power(l,p) + I_center(p) = nucl_coord(num_i,p) + J_center(p) = nucl_coord(num_j,p) + K_center(p) = nucl_coord(num_k,p) + L_center(p) = nucl_coord(num_l,p) + enddo + + double precision :: coef1, coef2, coef3, coef4 + double precision :: p_inv,q_inv + double precision :: general_primitive_integral + + do p = 1, ao_prim_num(i) + coef1 = ao_coef_normalized_ordered_transp(p,i) + do q = 1, ao_prim_num(j) + coef2 = coef1*ao_coef_normalized_ordered_transp(q,j) + call give_explicit_poly_and_gaussian(P_new,P_center,pp,fact_p,iorder_p,& + ao_expo_ordered_transp(p,i),ao_expo_ordered_transp(q,j), & + I_power,J_power,I_center,J_center,dim1) + p_inv = 1.d0/pp + do r = 1, ao_prim_num(k) + coef3 = coef2*ao_coef_normalized_ordered_transp(r,k) + do s = 1, ao_prim_num(l) + coef4 = coef3*ao_coef_normalized_ordered_transp(s,l) + call give_explicit_poly_and_gaussian(Q_new,Q_center,qq,fact_q,iorder_q,& + ao_expo_ordered_transp(r,k),ao_expo_ordered_transp(s,l), & + K_power,L_power,K_center,L_center,dim1) + q_inv = 1.d0/qq + integral = general_primitive_integral(dim1, & + P_new,P_center,fact_p,pp,p_inv,iorder_p, & + Q_new,Q_center,fact_q,qq,q_inv,iorder_q) + ao_two_e_integral = ao_two_e_integral + coef4 * integral + enddo ! s + enddo ! r + enddo ! q + enddo ! p + + else + + do p = 1, 3 + I_power(p) = ao_power(i,p) + J_power(p) = ao_power(j,p) + K_power(p) = ao_power(k,p) + L_power(p) = ao_power(l,p) + enddo + double precision :: ERI + + do p = 1, ao_prim_num(i) + coef1 = ao_coef_normalized_ordered_transp(p,i) + do q = 1, ao_prim_num(j) + coef2 = coef1*ao_coef_normalized_ordered_transp(q,j) + do r = 1, ao_prim_num(k) + coef3 = coef2*ao_coef_normalized_ordered_transp(r,k) + do s = 1, ao_prim_num(l) + coef4 = coef3*ao_coef_normalized_ordered_transp(s,l) + integral = ERI( & + ao_expo_ordered_transp(p,i),ao_expo_ordered_transp(q,j),ao_expo_ordered_transp(r,k),ao_expo_ordered_transp(s,l),& + I_power(1),J_power(1),K_power(1),L_power(1), & + I_power(2),J_power(2),K_power(2),L_power(2), & + I_power(3),J_power(3),K_power(3),L_power(3)) + ao_two_e_integral = ao_two_e_integral + coef4 * integral + enddo ! s + enddo ! r + enddo ! q + enddo ! p + + endif endif @@ -104,6 +125,8 @@ double precision function ao_two_e_integral(i,j,k,l) end +! --- + double precision function ao_two_e_integral_schwartz_accel(i,j,k,l) implicit none BEGIN_DOC @@ -421,14 +444,17 @@ BEGIN_PROVIDER [ logical, ao_two_e_integrals_in_map ] END_PROVIDER -BEGIN_PROVIDER [ double precision, ao_two_e_integral_schwartz,(ao_num,ao_num) ] - implicit none +! --- + +BEGIN_PROVIDER [ double precision, ao_two_e_integral_schwartz, (ao_num, ao_num) ] + BEGIN_DOC ! Needed to compute Schwartz inequalities END_DOC - integer :: i,k - double precision :: ao_two_e_integral,cpu_1,cpu_2, wall_1, wall_2 + implicit none + integer :: i, k + double precision :: ao_two_e_integral,cpu_1,cpu_2, wall_1, wall_2 ao_two_e_integral_schwartz(1,1) = ao_two_e_integral(1,1,1,1) !$OMP PARALLEL DO PRIVATE(i,k) & @@ -445,6 +471,7 @@ BEGIN_PROVIDER [ double precision, ao_two_e_integral_schwartz,(ao_num,ao_num) ] END_PROVIDER +! --- double precision function general_primitive_integral(dim, & P_new,P_center,fact_p,p,p_inv,iorder_p, & diff --git a/src/cosgtos_ao_int/EZFIO.cfg b/src/cosgtos_ao_int/EZFIO.cfg new file mode 100644 index 00000000..8edeecd0 --- /dev/null +++ b/src/cosgtos_ao_int/EZFIO.cfg @@ -0,0 +1,19 @@ +[ao_expoim_cosgtos] +type: double precision +doc: imag part for Exponents for each primitive of each cosGTOs |AO| +size: (ao_basis.ao_num,ao_basis.ao_prim_num_max) +interface: ezfio, provider + +[use_cosgtos] +type: logical +doc: If true, use cosgtos for AO integrals +interface: ezfio,provider,ocaml +default: False + +[ao_integrals_threshold] +type: Threshold +doc: If | (pq|rs) | < `ao_integrals_threshold` then (pq|rs) is zero +interface: ezfio,provider,ocaml +default: 1.e-15 +ezfio_name: threshold_ao + diff --git a/src/cosgtos_ao_int/README.rst b/src/cosgtos_ao_int/README.rst new file mode 100644 index 00000000..01f25d6d --- /dev/null +++ b/src/cosgtos_ao_int/README.rst @@ -0,0 +1,4 @@ +============== +cosgtos_ao_int +============== + diff --git a/src/cosgtos_ao_int/aos_cosgtos.irp.f b/src/cosgtos_ao_int/aos_cosgtos.irp.f new file mode 100644 index 00000000..6a4d54fd --- /dev/null +++ b/src/cosgtos_ao_int/aos_cosgtos.irp.f @@ -0,0 +1,210 @@ + +! --- + +BEGIN_PROVIDER [ double precision, ao_coef_norm_ord_transp_cosgtos, (ao_prim_num_max, ao_num) ] + + implicit none + integer :: i, j + + do j = 1, ao_num + do i = 1, ao_prim_num_max + ao_coef_norm_ord_transp_cosgtos(i,j) = ao_coef_norm_ord_cosgtos(j,i) + enddo + enddo + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ complex*16, ao_expo_ord_transp_cosgtos, (ao_prim_num_max, ao_num) ] + + implicit none + integer :: i, j + + do j = 1, ao_num + do i = 1, ao_prim_num_max + ao_expo_ord_transp_cosgtos(i,j) = ao_expo_ord_cosgtos(j,i) + enddo + enddo + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, ao_coef_norm_cosgtos, (ao_num, ao_prim_num_max) ] + + implicit none + + integer :: i, j, powA(3), nz + double precision :: norm + complex*16 :: overlap_x, overlap_y, overlap_z, C_A(3) + complex*16 :: integ1, integ2, expo + + nz = 100 + + C_A(1) = (0.d0, 0.d0) + C_A(2) = (0.d0, 0.d0) + C_A(3) = (0.d0, 0.d0) + + ao_coef_norm_cosgtos = 0.d0 + + do i = 1, ao_num + + powA(1) = ao_power(i,1) + powA(2) = ao_power(i,2) + powA(3) = ao_power(i,3) + + ! Normalization of the primitives + if(primitives_normalized) then + + do j = 1, ao_prim_num(i) + + expo = ao_expo(i,j) + (0.d0, 1.d0) * ao_expoim_cosgtos(i,j) + + call overlap_cgaussian_xyz(C_A, C_A, expo, expo, powA, powA, overlap_x, overlap_y, overlap_z, integ1, nz) + call overlap_cgaussian_xyz(C_A, C_A, conjg(expo), expo, powA, powA, overlap_x, overlap_y, overlap_z, integ2, nz) + + norm = 2.d0 * real( integ1 + integ2 ) + + ao_coef_norm_cosgtos(i,j) = ao_coef(i,j) / dsqrt(norm) + enddo + + else + + do j = 1, ao_prim_num(i) + ao_coef_norm_cosgtos(i,j) = ao_coef(i,j) + enddo + + endif + + enddo + +END_PROVIDER + +! --- + + BEGIN_PROVIDER [ double precision, ao_coef_norm_ord_cosgtos, (ao_num, ao_prim_num_max) ] +&BEGIN_PROVIDER [ complex*16 , ao_expo_ord_cosgtos, (ao_num, ao_prim_num_max) ] + + implicit none + integer :: i, j + integer :: iorder(ao_prim_num_max) + double precision :: d(ao_prim_num_max,3) + + d = 0.d0 + + do i = 1, ao_num + + do j = 1, ao_prim_num(i) + iorder(j) = j + d(j,1) = ao_expo(i,j) + d(j,2) = ao_coef_norm_cosgtos(i,j) + d(j,3) = ao_expoim_cosgtos(i,j) + enddo + + call dsort (d(1,1), iorder, ao_prim_num(i)) + call dset_order(d(1,2), iorder, ao_prim_num(i)) + call dset_order(d(1,3), iorder, ao_prim_num(i)) + + do j = 1, ao_prim_num(i) + ao_expo_ord_cosgtos (i,j) = d(j,1) + (0.d0, 1.d0) * d(j,3) + ao_coef_norm_ord_cosgtos(i,j) = d(j,2) + enddo + + enddo + +END_PROVIDER + +! --- + + BEGIN_PROVIDER [ double precision, ao_overlap_cosgtos, (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_overlap_cosgtos_x, (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_overlap_cosgtos_y, (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_overlap_cosgtos_z, (ao_num, ao_num) ] + + implicit none + integer :: i, j, n, l, dim1, power_A(3), power_B(3) + double precision :: c, overlap, overlap_x, overlap_y, overlap_z + complex*16 :: alpha, beta, A_center(3), B_center(3) + complex*16 :: overlap1, overlap_x1, overlap_y1, overlap_z1 + complex*16 :: overlap2, overlap_x2, overlap_y2, overlap_z2 + + ao_overlap_cosgtos = 0.d0 + ao_overlap_cosgtos_x = 0.d0 + ao_overlap_cosgtos_y = 0.d0 + ao_overlap_cosgtos_z = 0.d0 + + dim1 = 100 + + !$OMP PARALLEL DO SCHEDULE(GUIDED) & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE( A_center, B_center, power_A, power_B, alpha, beta, i, j, n, l, c & + !$OMP , overlap_x , overlap_y , overlap_z , overlap & + !$OMP , overlap_x1, overlap_y1, overlap_z1, overlap1 & + !$OMP , overlap_x2, overlap_y2, overlap_z2, overlap2 ) & + !$OMP SHARED( nucl_coord, ao_power, ao_prim_num, ao_num, ao_nucl, dim1 & + !$OMP , ao_overlap_cosgtos_x, ao_overlap_cosgtos_y, ao_overlap_cosgtos_z, ao_overlap_cosgtos & + !$OMP , ao_coef_norm_ord_transp_cosgtos, ao_expo_ord_transp_cosgtos ) + + do j = 1, ao_num + + A_center(1) = nucl_coord(ao_nucl(j),1) * (1.d0, 0.d0) + A_center(2) = nucl_coord(ao_nucl(j),2) * (1.d0, 0.d0) + A_center(3) = nucl_coord(ao_nucl(j),3) * (1.d0, 0.d0) + power_A(1) = ao_power(j,1) + power_A(2) = ao_power(j,2) + power_A(3) = ao_power(j,3) + + do i = 1, ao_num + + B_center(1) = nucl_coord(ao_nucl(i),1) * (1.d0, 0.d0) + B_center(2) = nucl_coord(ao_nucl(i),2) * (1.d0, 0.d0) + B_center(3) = nucl_coord(ao_nucl(i),3) * (1.d0, 0.d0) + power_B(1) = ao_power(i,1) + power_B(2) = ao_power(i,2) + power_B(3) = ao_power(i,3) + + do n = 1, ao_prim_num(j) + alpha = ao_expo_ord_transp_cosgtos(n,j) + + do l = 1, ao_prim_num(i) + c = ao_coef_norm_ord_transp_cosgtos(n,j) * ao_coef_norm_ord_transp_cosgtos(l,i) + beta = ao_expo_ord_transp_cosgtos(l,i) + + call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_x1, overlap_y1, overlap_z1, overlap1, dim1 ) + + call overlap_cgaussian_xyz( A_center, B_center, conjg(alpha), beta, power_A, power_B & + , overlap_x2, overlap_y2, overlap_z2, overlap2, dim1 ) + + overlap_x = 2.d0 * real( overlap_x1 + overlap_x2 ) + overlap_y = 2.d0 * real( overlap_y1 + overlap_y2 ) + overlap_z = 2.d0 * real( overlap_z1 + overlap_z2 ) + overlap = 2.d0 * real( overlap1 + overlap2 ) + + ao_overlap_cosgtos(i,j) = ao_overlap_cosgtos(i,j) + c * overlap + + if( isnan(ao_overlap_cosgtos(i,j)) ) then + print*,'i, j', i, j + print*,'l, n', l, n + print*,'c, overlap', c, overlap + print*, overlap_x, overlap_y, overlap_z + stop + endif + + ao_overlap_cosgtos_x(i,j) = ao_overlap_cosgtos_x(i,j) + c * overlap_x + ao_overlap_cosgtos_y(i,j) = ao_overlap_cosgtos_y(i,j) + c * overlap_y + ao_overlap_cosgtos_z(i,j) = ao_overlap_cosgtos_z(i,j) + c * overlap_z + + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + +! --- + + + diff --git a/src/cosgtos_ao_int/cosgtos_ao_int.irp.f b/src/cosgtos_ao_int/cosgtos_ao_int.irp.f new file mode 100644 index 00000000..d65dfba5 --- /dev/null +++ b/src/cosgtos_ao_int/cosgtos_ao_int.irp.f @@ -0,0 +1,7 @@ +program cosgtos_ao_int + implicit none + BEGIN_DOC +! TODO : Put the documentation of the program here + END_DOC + print *, 'Hello world' +end diff --git a/src/ao_two_e_ints/gauss_legendre.irp.f b/src/cosgtos_ao_int/gauss_legendre.irp.f similarity index 100% rename from src/ao_two_e_ints/gauss_legendre.irp.f rename to src/cosgtos_ao_int/gauss_legendre.irp.f diff --git a/src/cosgtos_ao_int/one_e_Coul_integrals.irp.f b/src/cosgtos_ao_int/one_e_Coul_integrals.irp.f new file mode 100644 index 00000000..7f94f226 --- /dev/null +++ b/src/cosgtos_ao_int/one_e_Coul_integrals.irp.f @@ -0,0 +1,535 @@ + +! --- + +BEGIN_PROVIDER [ double precision, ao_integrals_n_e_cosgtos, (ao_num, ao_num)] + + BEGIN_DOC + ! + ! Nucleus-electron interaction, in the cosgtos |AO| basis set. + ! + ! :math:`\langle \chi_i | -\sum_A \frac{1}{|r-R_A|} | \chi_j \rangle` + ! + END_DOC + + implicit none + integer :: num_A, num_B, power_A(3), power_B(3) + integer :: i, j, k, l, n_pt_in, m + double precision :: c, Z, A_center(3), B_center(3), C_center(3) + complex*16 :: alpha, beta, c1, c2 + + complex*16 :: NAI_pol_mult_cosgtos + + ao_integrals_n_e_cosgtos = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE ( i, j, k, l, m, alpha, beta, A_center, B_center, C_center & + !$OMP , power_A, power_B, num_A, num_B, Z, c, c1, c2, n_pt_in ) & + !$OMP SHARED ( ao_num, ao_prim_num, ao_nucl, nucl_coord, ao_power, nucl_num, nucl_charge & + !$OMP , ao_expo_ord_transp_cosgtos, ao_coef_norm_ord_transp_cosgtos & + !$OMP , n_pt_max_integrals, ao_integrals_n_e_cosgtos ) + + n_pt_in = n_pt_max_integrals + + !$OMP DO SCHEDULE (dynamic) + + do j = 1, ao_num + num_A = ao_nucl(j) + power_A(1:3) = ao_power(j,1:3) + A_center(1:3) = nucl_coord(num_A,1:3) + + do i = 1, ao_num + num_B = ao_nucl(i) + power_B(1:3) = ao_power(i,1:3) + B_center(1:3) = nucl_coord(num_B,1:3) + + do l = 1, ao_prim_num(j) + alpha = ao_expo_ord_transp_cosgtos(l,j) + + do m = 1, ao_prim_num(i) + beta = ao_expo_ord_transp_cosgtos(m,i) + + c = 0.d0 + do k = 1, nucl_num + + Z = nucl_charge(k) + + C_center(1:3) = nucl_coord(k,1:3) + + !print *, ' ' + !print *, A_center, B_center, C_center, power_A, power_B + !print *, real(alpha), real(beta) + + c1 = NAI_pol_mult_cosgtos( A_center, B_center, power_A, power_B & + , alpha, beta, C_center, n_pt_in ) + + !c2 = c1 + c2 = NAI_pol_mult_cosgtos( A_center, B_center, power_A, power_B & + , conjg(alpha), beta, C_center, n_pt_in ) + + !print *, ' c1 = ', real(c1) + !print *, ' c2 = ', real(c2) + + c = c - Z * 2.d0 * real(c1 + c2) + + enddo + ao_integrals_n_e_cosgtos(i,j) = ao_integrals_n_e_cosgtos(i,j) & + + ao_coef_norm_ord_transp_cosgtos(l,j) & + * ao_coef_norm_ord_transp_cosgtos(m,i) * c + enddo + enddo + enddo + enddo + + !$OMP END DO + !$OMP END PARALLEL + +END_PROVIDER + +! --- + +complex*16 function NAI_pol_mult_cosgtos(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in) + + BEGIN_DOC + ! + ! Computes the electron-nucleus attraction with two primitves cosgtos. + ! + ! :math:`\langle g_i | \frac{1}{|r-R_c|} | g_j \rangle` + ! + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: n_pt_in, power_A(3), power_B(3) + double precision, intent(in) :: C_center(3), A_center(3), B_center(3) + complex*16, intent(in) :: alpha, beta + + integer :: i, n_pt, n_pt_out + double precision :: dist, const_mod + complex*16 :: p, p_inv, rho, dist_integral, const, const_factor, coeff, factor + complex*16 :: accu, P_center(3) + complex*16 :: d(0:n_pt_in) + + complex*16 :: V_n_e_cosgtos + complex*16 :: crint + + if ( (A_center(1)/=B_center(1)) .or. (A_center(2)/=B_center(2)) .or. (A_center(3)/=B_center(3)) .or. & + (A_center(1)/=C_center(1)) .or. (A_center(2)/=C_center(2)) .or. (A_center(3)/=C_center(3)) ) then + + continue + + else + + NAI_pol_mult_cosgtos = V_n_e_cosgtos( power_A(1), power_A(2), power_A(3) & + , power_B(1), power_B(2), power_B(3) & + , alpha, beta ) + return + + endif + + p = alpha + beta + p_inv = (1.d0, 0.d0) / p + rho = alpha * beta * p_inv + + dist = 0.d0 + dist_integral = (0.d0, 0.d0) + do i = 1, 3 + P_center(i) = (alpha * A_center(i) + beta * B_center(i)) * p_inv + dist += (A_center(i) - B_center(i)) * (A_center(i) - B_center(i)) + dist_integral += (P_center(i) - C_center(i)) * (P_center(i) - C_center(i)) + enddo + + const_factor = dist * rho + const = p * dist_integral + + const_mod = dsqrt(real(const_factor)*real(const_factor) + aimag(const_factor)*aimag(const_factor)) + if(const_mod > 80.d0) then + NAI_pol_mult_cosgtos = (0.d0, 0.d0) + return + endif + + factor = zexp(-const_factor) + coeff = dtwo_pi * factor * p_inv + + do i = 0, n_pt_in + d(i) = (0.d0, 0.d0) + enddo + + n_pt = 2 * ( (power_A(1) + power_B(1)) + (power_A(2) + power_B(2)) + (power_A(3) + power_B(3)) ) + if(n_pt == 0) then + NAI_pol_mult_cosgtos = coeff * crint(0, const) + return + endif + + call give_cpolynomial_mult_center_one_e( A_center, B_center, alpha, beta & + , power_A, power_B, C_center, n_pt_in, d, n_pt_out) + + if(n_pt_out < 0) then + NAI_pol_mult_cosgtos = (0.d0, 0.d0) + return + endif + + accu = (0.d0, 0.d0) + do i = 0, n_pt_out, 2 + accu += crint(shiftr(i, 1), const) * d(i) + +! print *, shiftr(i, 1), real(const), real(d(i)), real(crint(shiftr(i, 1), const)) + enddo + NAI_pol_mult_cosgtos = accu * coeff + +end function NAI_pol_mult_cosgtos + +! --- + +subroutine give_cpolynomial_mult_center_one_e( A_center, B_center, alpha, beta & + , power_A, power_B, C_center, n_pt_in, d, n_pt_out) + + BEGIN_DOC + ! Returns the explicit polynomial in terms of the "t" variable of the following + ! + ! $I_{x1}(a_x, d_x,p,q) \times I_{x1}(a_y, d_y,p,q) \times I_{x1}(a_z, d_z,p,q)$. + END_DOC + + implicit none + + integer, intent(in) :: n_pt_in, power_A(3), power_B(3) + double precision, intent(in) :: A_center(3), B_center(3), C_center(3) + complex*16, intent(in) :: alpha, beta + integer, intent(out) :: n_pt_out + complex*16, intent(out) :: d(0:n_pt_in) + + integer :: a_x, b_x, a_y, b_y, a_z, b_z + integer :: n_pt1, n_pt2, n_pt3, dim, i, n_pt_tmp + complex*16 :: p, P_center(3), rho, p_inv, p_inv_2 + complex*16 :: R1x(0:2), B01(0:2), R1xp(0:2),R2x(0:2) + complex*16 :: d1(0:n_pt_in), d2(0:n_pt_in), d3(0:n_pt_in) + + ASSERT (n_pt_in > 1) + + p = alpha + beta + p_inv = (1.d0, 0.d0) / p + p_inv_2 = 0.5d0 * p_inv + + do i = 1, 3 + P_center(i) = (alpha * A_center(i) + beta * B_center(i)) * p_inv + enddo + + do i = 0, n_pt_in + d(i) = (0.d0, 0.d0) + d1(i) = (0.d0, 0.d0) + d2(i) = (0.d0, 0.d0) + d3(i) = (0.d0, 0.d0) + enddo + + ! --- + + n_pt1 = n_pt_in + + R1x(0) = (P_center(1) - A_center(1)) + R1x(1) = (0.d0, 0.d0) + R1x(2) = -(P_center(1) - C_center(1)) + + R1xp(0) = (P_center(1) - B_center(1)) + R1xp(1) = (0.d0, 0.d0) + R1xp(2) = -(P_center(1) - C_center(1)) + + R2x(0) = p_inv_2 + R2x(1) = (0.d0, 0.d0) + R2x(2) = -p_inv_2 + + a_x = power_A(1) + b_x = power_B(1) + call I_x1_pol_mult_one_e_cosgtos(a_x, b_x, R1x, R1xp, R2x, d1, n_pt1, n_pt_in) + + if(n_pt1 < 0) then + n_pt_out = -1 + do i = 0, n_pt_in + d(i) = (0.d0, 0.d0) + enddo + return + endif + + ! --- + + n_pt2 = n_pt_in + + R1x(0) = (P_center(2) - A_center(2)) + R1x(1) = (0.d0, 0.d0) + R1x(2) = -(P_center(2) - C_center(2)) + + R1xp(0) = (P_center(2) - B_center(2)) + R1xp(1) = (0.d0, 0.d0) + R1xp(2) = -(P_center(2) - C_center(2)) + + a_y = power_A(2) + b_y = power_B(2) + call I_x1_pol_mult_one_e_cosgtos(a_y, b_y, R1x, R1xp, R2x, d2, n_pt2, n_pt_in) + + if(n_pt2 < 0) then + n_pt_out = -1 + do i = 0, n_pt_in + d(i) = (0.d0, 0.d0) + enddo + return + endif + + ! --- + + n_pt3 = n_pt_in + + R1x(0) = (P_center(3) - A_center(3)) + R1x(1) = (0.d0, 0.d0) + R1x(2) = -(P_center(3) - C_center(3)) + + R1xp(0) = (P_center(3) - B_center(3)) + R1xp(1) = (0.d0, 0.d0) + R1xp(2) = -(P_center(3) - C_center(3)) + + a_z = power_A(3) + b_z = power_B(3) + call I_x1_pol_mult_one_e_cosgtos(a_z, b_z, R1x, R1xp, R2x, d3, n_pt3, n_pt_in) + + if(n_pt3 < 0) then + n_pt_out = -1 + do i = 0, n_pt_in + d(i) = (0.d0, 0.d0) + enddo + return + endif + + ! --- + + n_pt_tmp = 0 + call multiply_cpoly(d1, n_pt1, d2, n_pt2, d, n_pt_tmp) + do i = 0, n_pt_tmp + d1(i) = (0.d0, 0.d0) + enddo + + n_pt_out = 0 + call multiply_cpoly(d, n_pt_tmp, d3, n_pt3, d1, n_pt_out) + do i = 0, n_pt_out + d(i) = d1(i) + enddo + +end subroutine give_cpolynomial_mult_center_one_e + +! --- + +recursive subroutine I_x1_pol_mult_one_e_cosgtos(a, c, R1x, R1xp, R2x, d, nd, n_pt_in) + + BEGIN_DOC + ! Recursive routine involved in the electron-nucleus potential + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: a, c, n_pt_in + complex*16, intent(in) :: R1x(0:2), R1xp(0:2), R2x(0:2) + integer, intent(inout) :: nd + complex*16, intent(inout) :: d(0:n_pt_in) + + integer :: nx, ix, dim, iy, ny + complex*16 :: X(0:max_dim) + complex*16 :: Y(0:max_dim) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X, Y + + dim = n_pt_in + + if( (a==0) .and. (c==0)) then + + nd = 0 + d(0) = (1.d0, 0.d0) + return + + elseif( (c < 0) .or. (nd < 0) ) then + + nd = -1 + return + + elseif((a == 0) .and. (c .ne. 0)) then + + call I_x2_pol_mult_one_e_cosgtos(c, R1x, R1xp, R2x, d, nd, n_pt_in) + + elseif(a == 1) then + + nx = nd + do ix = 0, n_pt_in + X(ix) = (0.d0, 0.d0) + Y(ix) = (0.d0, 0.d0) + enddo + + call I_x2_pol_mult_one_e_cosgtos(c-1, R1x, R1xp, R2x, X, nx, n_pt_in) + + do ix = 0, nx + X(ix) *= dble(c) + enddo + + call multiply_cpoly(X, nx, R2x, 2, d, nd) + + ny = 0 + call I_x2_pol_mult_one_e_cosgtos(c, R1x, R1xp, R2x, Y, ny, n_pt_in) + call multiply_cpoly(Y, ny, R1x, 2, d, nd) + + else + + nx = 0 + do ix = 0, n_pt_in + X(ix) = (0.d0, 0.d0) + Y(ix) = (0.d0, 0.d0) + enddo + + call I_x1_pol_mult_one_e_cosgtos(a-2, c, R1x, R1xp, R2x, X, nx, n_pt_in) + + do ix = 0, nx + X(ix) *= dble(a-1) + enddo + call multiply_cpoly(X, nx, R2x, 2, d, nd) + + nx = nd + do ix = 0, n_pt_in + X(ix) = (0.d0, 0.d0) + enddo + + call I_x1_pol_mult_one_e_cosgtos(a-1, c-1, R1x, R1xp, R2x, X, nx, n_pt_in) + do ix = 0, nx + X(ix) *= dble(c) + enddo + + call multiply_cpoly(X, nx, R2x, 2, d, nd) + + ny = 0 + call I_x1_pol_mult_one_e_cosgtos(a-1, c, R1x, R1xp, R2x, Y, ny, n_pt_in) + call multiply_cpoly(Y, ny, R1x, 2, d, nd) + + endif + +end subroutine I_x1_pol_mult_one_e_cosgtos + +! --- + +recursive subroutine I_x2_pol_mult_one_e_cosgtos(c, R1x, R1xp, R2x, d, nd, dim) + + BEGIN_DOC + ! Recursive routine involved in the electron-nucleus potential + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: dim, c + complex*16, intent(in) :: R1x(0:2), R1xp(0:2), R2x(0:2) + integer, intent(inout) :: nd + complex*16, intent(out) :: d(0:max_dim) + + integer :: i, nx, ix, ny + complex*16 :: X(0:max_dim), Y(0:max_dim) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X, Y + + if(c == 0) then + + nd = 0 + d(0) = (1.d0, 0.d0) + return + + elseif((nd < 0) .or. (c < 0)) then + + nd = -1 + return + + else + + nx = 0 + do ix = 0, dim + X(ix) = (0.d0, 0.d0) + Y(ix) = (0.d0, 0.d0) + enddo + + call I_x1_pol_mult_one_e_cosgtos(0, c-2, R1x, R1xp, R2x, X, nx, dim) + + do ix = 0, nx + X(ix) *= dble(c-1) + enddo + + call multiply_cpoly(X, nx, R2x, 2, d, nd) + + ny = 0 + do ix = 0, dim + Y(ix) = (0.d0, 0.d0) + enddo + + call I_x1_pol_mult_one_e_cosgtos(0, c-1, R1x, R1xp, R2x, Y, ny, dim) + + if(ny .ge. 0) then + call multiply_cpoly(Y, ny, R1xp, 2, d, nd) + endif + + endif + +end subroutine I_x2_pol_mult_one_e_cosgtos + +! --- + +complex*16 function V_n_e_cosgtos(a_x, a_y, a_z, b_x, b_y, b_z, alpha, beta) + + BEGIN_DOC + ! Primitve nuclear attraction between the two primitves centered on the same atom. + ! + ! $p_1 = x^{a_x} y^{a_y} z^{a_z} \exp(-\alpha r^2)$ + ! + ! $p_2 = x^{b_x} y^{b_y} z^{b_z} \exp(-\beta r^2)$ + END_DOC + + implicit none + + integer, intent(in) :: a_x, a_y, a_z, b_x, b_y, b_z + complex*16, intent(in) :: alpha, beta + + double precision :: V_phi, V_theta + complex*16 :: V_r_cosgtos + + if( (iand(a_x + b_x, 1) == 1) .or. & + (iand(a_y + b_y, 1) == 1) .or. & + (iand(a_z + b_z, 1) == 1) ) then + + V_n_e_cosgtos = (0.d0, 0.d0) + + else + + V_n_e_cosgtos = V_r_cosgtos(a_x + b_x + a_y + b_y + a_z + b_z + 1, alpha + beta) & + * V_phi(a_x + b_x, a_y + b_y) & + * V_theta(a_z + b_z, a_x + b_x + a_y + b_y + 1) + endif + +end function V_n_e_cosgtos + +! --- + +complex*16 function V_r_cosgtos(n, alpha) + + BEGIN_DOC + ! Computes the radial part of the nuclear attraction integral: + ! + ! $\int_{0}^{\infty} r^n \exp(-\alpha r^2) dr$ + ! + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer , intent(in) :: n + complex*16, intent(in) :: alpha + + double precision :: fact + + if(iand(n, 1) .eq. 1) then + V_r_cosgtos = 0.5d0 * fact(shiftr(n, 1)) / (alpha**(shiftr(n, 1) + 1)) + else + V_r_cosgtos = sqpi * fact(n) / fact(shiftr(n, 1)) * (0.5d0/zsqrt(alpha))**(n+1) + endif + +end function V_r_cosgtos + +! --- + diff --git a/src/cosgtos_ao_int/one_e_kin_integrals.irp.f b/src/cosgtos_ao_int/one_e_kin_integrals.irp.f new file mode 100644 index 00000000..710b04d4 --- /dev/null +++ b/src/cosgtos_ao_int/one_e_kin_integrals.irp.f @@ -0,0 +1,223 @@ + +! --- + + BEGIN_PROVIDER [ double precision, ao_deriv2_cosgtos_x, (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_deriv2_cosgtos_y, (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_deriv2_cosgtos_z, (ao_num, ao_num) ] + + implicit none + integer :: i, j, n, l, dim1, power_A(3), power_B(3) + double precision :: c, deriv_tmp + complex*16 :: alpha, beta, A_center(3), B_center(3) + complex*16 :: overlap_x, overlap_y, overlap_z, overlap + complex*16 :: overlap_x0_1, overlap_y0_1, overlap_z0_1 + complex*16 :: overlap_x0_2, overlap_y0_2, overlap_z0_2 + complex*16 :: overlap_m2_1, overlap_p2_1 + complex*16 :: overlap_m2_2, overlap_p2_2 + complex*16 :: deriv_tmp_1, deriv_tmp_2 + + + dim1 = 100 + + ! -- Dummy call to provide everything + + A_center(:) = (0.0d0, 0.d0) + B_center(:) = (1.0d0, 0.d0) + alpha = (1.0d0, 0.d0) + beta = (0.1d0, 0.d0) + power_A = 1 + power_B = 0 + call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_x0_1, overlap_y0_1, overlap_z0_1, overlap, dim1 ) + + ! --- + + !$OMP PARALLEL DO SCHEDULE(GUIDED) & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE( A_center, B_center, power_A, power_B, alpha, beta, i, j, l, n, c & + !$OMP , deriv_tmp, deriv_tmp_1, deriv_tmp_2 & + !$OMP , overlap_x, overlap_y, overlap_z, overlap & + !$OMP , overlap_m2_1, overlap_p2_1, overlap_m2_2, overlap_p2_2 & + !$OMP , overlap_x0_1, overlap_y0_1, overlap_z0_1, overlap_x0_2, overlap_y0_2, overlap_z0_2 ) & + !$OMP SHARED( nucl_coord, ao_power, ao_prim_num, ao_num, ao_nucl, dim1 & + !$OMP , ao_coef_norm_ord_transp_cosgtos, ao_expo_ord_transp_cosgtos & + !$OMP , ao_deriv2_cosgtos_x, ao_deriv2_cosgtos_y, ao_deriv2_cosgtos_z ) + + do j = 1, ao_num + A_center(1) = nucl_coord(ao_nucl(j),1) * (1.d0, 0.d0) + A_center(2) = nucl_coord(ao_nucl(j),2) * (1.d0, 0.d0) + A_center(3) = nucl_coord(ao_nucl(j),3) * (1.d0, 0.d0) + power_A(1) = ao_power(j,1) + power_A(2) = ao_power(j,2) + power_A(3) = ao_power(j,3) + + do i = 1, ao_num + B_center(1) = nucl_coord(ao_nucl(i),1) * (1.d0, 0.d0) + B_center(2) = nucl_coord(ao_nucl(i),2) * (1.d0, 0.d0) + B_center(3) = nucl_coord(ao_nucl(i),3) * (1.d0, 0.d0) + power_B(1) = ao_power(i,1) + power_B(2) = ao_power(i,2) + power_B(3) = ao_power(i,3) + + ao_deriv2_cosgtos_x(i,j) = 0.d0 + ao_deriv2_cosgtos_y(i,j) = 0.d0 + ao_deriv2_cosgtos_z(i,j) = 0.d0 + + do n = 1, ao_prim_num(j) + alpha = ao_expo_ord_transp_cosgtos(n,j) + + do l = 1, ao_prim_num(i) + c = ao_coef_norm_ord_transp_cosgtos(n,j) * ao_coef_norm_ord_transp_cosgtos(l,i) + beta = ao_expo_ord_transp_cosgtos(l,i) + + call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_x0_1, overlap_y0_1, overlap_z0_1, overlap, dim1 ) + + call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B & + , overlap_x0_2, overlap_y0_2, overlap_z0_2, overlap, dim1 ) + + ! --- + + power_A(1) = power_A(1) - 2 + if(power_A(1) > -1) then + call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_m2_1, overlap_y, overlap_z, overlap, dim1 ) + + call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B & + , overlap_m2_2, overlap_y, overlap_z, overlap, dim1 ) + else + overlap_m2_1 = (0.d0, 0.d0) + overlap_m2_2 = (0.d0, 0.d0) + endif + + power_A(1) = power_A(1) + 4 + call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_p2_1, overlap_y, overlap_z, overlap, dim1 ) + + call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B & + , overlap_p2_2, overlap_y, overlap_z, overlap, dim1 ) + + power_A(1) = power_A(1) - 2 + + deriv_tmp_1 = ( -2.d0 * alpha * (2.d0 * power_A(1) + 1.d0) * overlap_x0_1 & + + power_A(1) * (power_A(1) - 1.d0) * overlap_m2_1 & + + 4.d0 * alpha * alpha * overlap_p2_1 ) * overlap_y0_1 * overlap_z0_1 + + deriv_tmp_2 = ( -2.d0 * alpha * (2.d0 * power_A(1) + 1.d0) * overlap_x0_2 & + + power_A(1) * (power_A(1) - 1.d0) * overlap_m2_2 & + + 4.d0 * alpha * alpha * overlap_p2_2 ) * overlap_y0_2 * overlap_z0_2 + + deriv_tmp = 2.d0 * real(deriv_tmp_1 + deriv_tmp_2) + + ao_deriv2_cosgtos_x(i,j) += c * deriv_tmp + + ! --- + + power_A(2) = power_A(2) - 2 + if(power_A(2) > -1) then + call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_x, overlap_m2_1, overlap_y, overlap, dim1 ) + + call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B & + , overlap_x, overlap_m2_2, overlap_y, overlap, dim1 ) + else + overlap_m2_1 = (0.d0, 0.d0) + overlap_m2_2 = (0.d0, 0.d0) + endif + + power_A(2) = power_A(2) + 4 + call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_x, overlap_p2_1, overlap_y, overlap, dim1 ) + + call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B & + , overlap_x, overlap_p2_2, overlap_y, overlap, dim1 ) + + power_A(2) = power_A(2) - 2 + + deriv_tmp_1 = ( -2.d0 * alpha * (2.d0 * power_A(2) + 1.d0) * overlap_y0_1 & + + power_A(2) * (power_A(2) - 1.d0) * overlap_m2_1 & + + 4.d0 * alpha * alpha * overlap_p2_1 ) * overlap_x0_1 * overlap_z0_1 + + deriv_tmp_2 = ( -2.d0 * alpha * (2.d0 * power_A(2) + 1.d0) * overlap_y0_2 & + + power_A(2) * (power_A(2) - 1.d0) * overlap_m2_2 & + + 4.d0 * alpha * alpha * overlap_p2_2 ) * overlap_x0_2 * overlap_z0_2 + + deriv_tmp = 2.d0 * real(deriv_tmp_1 + deriv_tmp_2) + + ao_deriv2_cosgtos_y(i,j) += c * deriv_tmp + + ! --- + + power_A(3) = power_A(3) - 2 + if(power_A(3) > -1) then + call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_x, overlap_y, overlap_m2_1, overlap, dim1 ) + + call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B & + , overlap_x, overlap_y, overlap_m2_2, overlap, dim1 ) + else + overlap_m2_1 = (0.d0, 0.d0) + overlap_m2_2 = (0.d0, 0.d0) + endif + + power_A(3) = power_A(3) + 4 + call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_x, overlap_y, overlap_p2_1, overlap, dim1 ) + + call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B & + , overlap_x, overlap_y, overlap_p2_2, overlap, dim1 ) + + power_A(3) = power_A(3) - 2 + + deriv_tmp_1 = ( -2.d0 * alpha * (2.d0 * power_A(3) + 1.d0) * overlap_z0_1 & + + power_A(3) * (power_A(3) - 1.d0) * overlap_m2_1 & + + 4.d0 * alpha * alpha * overlap_p2_1 ) * overlap_x0_1 * overlap_y0_1 + + deriv_tmp_2 = ( -2.d0 * alpha * (2.d0 * power_A(3) + 1.d0) * overlap_z0_2 & + + power_A(3) * (power_A(3) - 1.d0) * overlap_m2_2 & + + 4.d0 * alpha * alpha * overlap_p2_2 ) * overlap_x0_2 * overlap_y0_2 + + deriv_tmp = 2.d0 * real(deriv_tmp_1 + deriv_tmp_2) + + ao_deriv2_cosgtos_z(i,j) += c * deriv_tmp + + ! --- + + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, ao_kinetic_integrals_cosgtos, (ao_num, ao_num)] + + BEGIN_DOC + ! + ! Kinetic energy integrals in the cosgtos |AO| basis. + ! + ! $\langle \chi_i |\hat{T}| \chi_j \rangle$ + ! + END_DOC + + implicit none + integer :: i, j + + !$OMP PARALLEL DO DEFAULT(NONE) & + !$OMP PRIVATE(i, j) & + !$OMP SHARED(ao_num, ao_kinetic_integrals_cosgtos, ao_deriv2_cosgtos_x, ao_deriv2_cosgtos_y, ao_deriv2_cosgtos_z) + do j = 1, ao_num + do i = 1, ao_num + ao_kinetic_integrals_cosgtos(i,j) = -0.5d0 * ( ao_deriv2_cosgtos_x(i,j) & + + ao_deriv2_cosgtos_y(i,j) & + + ao_deriv2_cosgtos_z(i,j) ) + enddo + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + +! --- diff --git a/src/cosgtos_ao_int/two_e_Coul_integrals.irp.f b/src/cosgtos_ao_int/two_e_Coul_integrals.irp.f new file mode 100644 index 00000000..527a98d5 --- /dev/null +++ b/src/cosgtos_ao_int/two_e_Coul_integrals.irp.f @@ -0,0 +1,1584 @@ + +! --- + +double precision function ao_two_e_integral_cosgtos(i, j, k, l) + + BEGIN_DOC + ! integral of the AO basis or (ij|kl) + ! i(r1) j(r1) 1/r12 k(r2) l(r2) + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: i, j, k, l + + integer :: p, q, r, s + integer :: num_i, num_j, num_k, num_l, dim1, I_power(3), J_power(3), K_power(3), L_power(3) + integer :: iorder_p1(3), iorder_p2(3), iorder_p3(3), iorder_p4(3), iorder_q1(3), iorder_q2(3) + double precision :: coef1, coef2, coef3, coef4 + complex*16 :: I_center(3), J_center(3), K_center(3), L_center(3) + complex*16 :: expo1, expo2, expo3, expo4 + complex*16 :: P1_new(0:max_dim,3), P1_center(3), fact_p1, pp1, p1_inv + complex*16 :: P2_new(0:max_dim,3), P2_center(3), fact_p2, pp2, p2_inv + complex*16 :: P3_new(0:max_dim,3), P3_center(3), fact_p3, pp3, p3_inv + complex*16 :: P4_new(0:max_dim,3), P4_center(3), fact_p4, pp4, p4_inv + complex*16 :: Q1_new(0:max_dim,3), Q1_center(3), fact_q1, qq1, q1_inv + complex*16 :: Q2_new(0:max_dim,3), Q2_center(3), fact_q2, qq2, q2_inv + complex*16 :: integral1, integral2, integral3, integral4 + complex*16 :: integral5, integral6, integral7, integral8 + complex*16 :: integral_tot + + double precision :: ao_two_e_integral_cosgtos_schwartz_accel + complex*16 :: ERI_cosgtos + complex*16 :: general_primitive_integral_cosgtos + + if(ao_prim_num(i) * ao_prim_num(j) * ao_prim_num(k) * ao_prim_num(l) > 1024) then + + !print *, ' with shwartz acc ' + ao_two_e_integral_cosgtos = ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) + + else + !print *, ' without shwartz acc ' + + dim1 = n_pt_max_integrals + + num_i = ao_nucl(i) + num_j = ao_nucl(j) + num_k = ao_nucl(k) + num_l = ao_nucl(l) + + ao_two_e_integral_cosgtos = 0.d0 + + if(num_i /= num_j .or. num_k /= num_l .or. num_j /= num_k) then + !print *, ' not the same center' + + do p = 1, 3 + I_power(p) = ao_power(i,p) + J_power(p) = ao_power(j,p) + K_power(p) = ao_power(k,p) + L_power(p) = ao_power(l,p) + I_center(p) = nucl_coord(num_i,p) * (1.d0, 0.d0) + J_center(p) = nucl_coord(num_j,p) * (1.d0, 0.d0) + K_center(p) = nucl_coord(num_k,p) * (1.d0, 0.d0) + L_center(p) = nucl_coord(num_l,p) * (1.d0, 0.d0) + enddo + + do p = 1, ao_prim_num(i) + coef1 = ao_coef_norm_ord_transp_cosgtos(p,i) + expo1 = ao_expo_ord_transp_cosgtos(p,i) + + do q = 1, ao_prim_num(j) + coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(q,j) + expo2 = ao_expo_ord_transp_cosgtos(q,j) + + call give_explicit_cpoly_and_cgaussian( P1_new, P1_center, pp1, fact_p1, iorder_p1 & + , expo1, expo2, I_power, J_power, I_center, J_center, dim1 ) + p1_inv = (1.d0,0.d0) / pp1 + + call give_explicit_cpoly_and_cgaussian( P2_new, P2_center, pp2, fact_p2, iorder_p2 & + , conjg(expo1), expo2, I_power, J_power, I_center, J_center, dim1 ) + p2_inv = (1.d0,0.d0) / pp2 + + call give_explicit_cpoly_and_cgaussian( P3_new, P3_center, pp3, fact_p3, iorder_p3 & + , expo1, conjg(expo2), I_power, J_power, I_center, J_center, dim1 ) + p3_inv = (1.d0,0.d0) / pp3 + + call give_explicit_cpoly_and_cgaussian( P4_new, P4_center, pp4, fact_p4, iorder_p4 & + , conjg(expo1), conjg(expo2), I_power, J_power, I_center, J_center, dim1 ) + p4_inv = (1.d0,0.d0) / pp4 + + !integer :: ii + !do ii = 1, 3 + ! print *, 'fact_p1', fact_p1 + ! print *, 'fact_p2', fact_p2 + ! print *, 'fact_p3', fact_p3 + ! print *, 'fact_p4', fact_p4 + ! !print *, pp1, p1_inv + ! !print *, pp2, p2_inv + ! !print *, pp3, p3_inv + ! !print *, pp4, p4_inv + !enddo + ! if( abs(aimag(P1_center(ii))) .gt. 0.d0 ) then + ! print *, ' P_1 is complex !!' + ! print *, P1_center + ! print *, expo1, expo2 + ! print *, conjg(expo1), conjg(expo2) + ! stop + ! endif + ! if( abs(aimag(P2_center(ii))) .gt. 0.d0 ) then + ! print *, ' P_2 is complex !!' + ! print *, P2_center + ! print *, ' old expos:' + ! print *, expo1, expo2 + ! print *, conjg(expo1), conjg(expo2) + ! print *, ' new expo:' + ! print *, pp2, p2_inv + ! print *, ' factor:' + ! print *, fact_p2 + ! print *, ' old centers:' + ! print *, I_center, J_center + ! print *, ' powers:' + ! print *, I_power, J_power + ! stop + ! endif + ! if( abs(aimag(P3_center(ii))) .gt. 0.d0 ) then + ! print *, ' P_3 is complex !!' + ! print *, P3_center + ! print *, expo1, expo2 + ! print *, conjg(expo1), conjg(expo2) + ! stop + ! endif + ! if( abs(aimag(P4_center(ii))) .gt. 0.d0 ) then + ! print *, ' P_4 is complex !!' + ! print *, P4_center + ! print *, expo1, expo2 + ! print *, conjg(expo1), conjg(expo2) + ! stop + ! endif + !enddo + + do r = 1, ao_prim_num(k) + coef3 = coef2 * ao_coef_norm_ord_transp_cosgtos(r,k) + expo3 = ao_expo_ord_transp_cosgtos(r,k) + + do s = 1, ao_prim_num(l) + coef4 = coef3 * ao_coef_norm_ord_transp_cosgtos(s,l) + expo4 = ao_expo_ord_transp_cosgtos(s,l) + + call give_explicit_cpoly_and_cgaussian( Q1_new, Q1_center, qq1, fact_q1, iorder_q1 & + , expo3, expo4, K_power, L_power, K_center, L_center, dim1 ) + q1_inv = (1.d0,0.d0) / qq1 + + call give_explicit_cpoly_and_cgaussian( Q2_new, Q2_center, qq2, fact_q2, iorder_q2 & + , conjg(expo3), expo4, K_power, L_power, K_center, L_center, dim1 ) + q2_inv = (1.d0,0.d0) / qq2 + + !do ii = 1, 3 + ! !print *, qq1, q1_inv + ! !print *, qq2, q2_inv + ! print *, 'fact_q1', fact_q1 + ! print *, 'fact_q2', fact_q2 + !enddo + ! if( abs(aimag(Q1_center(ii))) .gt. 0.d0 ) then + ! print *, ' Q_1 is complex !!' + ! print *, Q1_center + ! print *, expo3, expo4 + ! print *, conjg(expo3), conjg(expo4) + ! stop + ! endif + ! if( abs(aimag(Q2_center(ii))) .gt. 0.d0 ) then + ! print *, ' Q_2 is complex !!' + ! print *, Q2_center + ! print *, expo3, expo4 + ! print *, conjg(expo3), conjg(expo4) + ! stop + ! endif + !enddo + + + integral1 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 & + , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 ) + + integral2 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 & + , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 ) + + integral3 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 & + , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 ) + + integral4 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 & + , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 ) + + integral5 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 & + , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 ) + + integral6 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 & + , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 ) + + integral7 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 & + , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 ) + + integral8 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 & + , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 ) + + integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 + + !integral_tot = integral1 + !print*, integral_tot + + ao_two_e_integral_cosgtos = ao_two_e_integral_cosgtos + coef4 * 2.d0 * real(integral_tot) + enddo ! s + enddo ! r + enddo ! q + enddo ! p + + else + !print *, ' the same center' + + do p = 1, 3 + I_power(p) = ao_power(i,p) + J_power(p) = ao_power(j,p) + K_power(p) = ao_power(k,p) + L_power(p) = ao_power(l,p) + enddo + + do p = 1, ao_prim_num(i) + coef1 = ao_coef_norm_ord_transp_cosgtos(p,i) + expo1 = ao_expo_ord_transp_cosgtos(p,i) + + do q = 1, ao_prim_num(j) + coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(q,j) + expo2 = ao_expo_ord_transp_cosgtos(q,j) + + do r = 1, ao_prim_num(k) + coef3 = coef2 * ao_coef_norm_ord_transp_cosgtos(r,k) + expo3 = ao_expo_ord_transp_cosgtos(r,k) + + do s = 1, ao_prim_num(l) + coef4 = coef3 * ao_coef_norm_ord_transp_cosgtos(s,l) + expo4 = ao_expo_ord_transp_cosgtos(s,l) + + integral1 = ERI_cosgtos( expo1, expo2, expo3, expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral2 = ERI_cosgtos( expo1, expo2, conjg(expo3), expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral3 = ERI_cosgtos( conjg(expo1), expo2, expo3, expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral4 = ERI_cosgtos( conjg(expo1), expo2, conjg(expo3), expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral5 = ERI_cosgtos( expo1, conjg(expo2), expo3, expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral6 = ERI_cosgtos( expo1, conjg(expo2), conjg(expo3), expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral7 = ERI_cosgtos( conjg(expo1), conjg(expo2), expo3, expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral8 = ERI_cosgtos( conjg(expo1), conjg(expo2), conjg(expo3), expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 + + + ao_two_e_integral_cosgtos = ao_two_e_integral_cosgtos + coef4 * 2.d0 * real(integral_tot) + enddo ! s + enddo ! r + enddo ! q + enddo ! p + + endif + endif + +end function ao_two_e_integral_cosgtos + +! --- + +double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) + + BEGIN_DOC + ! integral of the AO basis or (ij|kl) + ! i(r1) j(r1) 1/r12 k(r2) l(r2) + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: i, j, k, l + + integer :: p, q, r, s + integer :: num_i, num_j, num_k, num_l, dim1, I_power(3), J_power(3), K_power(3), L_power(3) + integer :: iorder_p1(3), iorder_p2(3), iorder_p3(3), iorder_p4(3), iorder_q1(3), iorder_q2(3) + double precision :: coef1, coef2, coef3, coef4 + complex*16 :: I_center(3), J_center(3), K_center(3), L_center(3) + complex*16 :: expo1, expo2, expo3, expo4 + complex*16 :: P1_new(0:max_dim,3), P1_center(3), fact_p1, pp1, p1_inv + complex*16 :: P2_new(0:max_dim,3), P2_center(3), fact_p2, pp2, p2_inv + complex*16 :: P3_new(0:max_dim,3), P3_center(3), fact_p3, pp3, p3_inv + complex*16 :: P4_new(0:max_dim,3), P4_center(3), fact_p4, pp4, p4_inv + complex*16 :: Q1_new(0:max_dim,3), Q1_center(3), fact_q1, qq1, q1_inv + complex*16 :: Q2_new(0:max_dim,3), Q2_center(3), fact_q2, qq2, q2_inv + complex*16 :: integral1, integral2, integral3, integral4 + complex*16 :: integral5, integral6, integral7, integral8 + complex*16 :: integral_tot + + double precision, allocatable :: schwartz_kl(:,:) + double precision :: thr + double precision :: schwartz_ij + + complex*16 :: ERI_cosgtos + complex*16 :: general_primitive_integral_cosgtos + + ao_two_e_integral_cosgtos_schwartz_accel = 0.d0 + + dim1 = n_pt_max_integrals + + num_i = ao_nucl(i) + num_j = ao_nucl(j) + num_k = ao_nucl(k) + num_l = ao_nucl(l) + + + thr = ao_integrals_threshold*ao_integrals_threshold + + allocate( schwartz_kl(0:ao_prim_num(l),0:ao_prim_num(k)) ) + + if(num_i /= num_j .or. num_k /= num_l .or. num_j /= num_k) then + + do p = 1, 3 + I_power(p) = ao_power(i,p) + J_power(p) = ao_power(j,p) + K_power(p) = ao_power(k,p) + L_power(p) = ao_power(l,p) + I_center(p) = nucl_coord(num_i,p) * (1.d0, 0.d0) + J_center(p) = nucl_coord(num_j,p) * (1.d0, 0.d0) + K_center(p) = nucl_coord(num_k,p) * (1.d0, 0.d0) + L_center(p) = nucl_coord(num_l,p) * (1.d0, 0.d0) + enddo + + schwartz_kl(0,0) = 0.d0 + do r = 1, ao_prim_num(k) + coef1 = ao_coef_norm_ord_transp_cosgtos(r,k) * ao_coef_norm_ord_transp_cosgtos(r,k) + expo1 = ao_expo_ord_transp_cosgtos(r,k) + + schwartz_kl(0,r) = 0.d0 + do s = 1, ao_prim_num(l) + coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(s,l) * ao_coef_norm_ord_transp_cosgtos(s,l) + expo2 = ao_expo_ord_transp_cosgtos(s,l) + + call give_explicit_cpoly_and_cgaussian( P1_new, P1_center, pp1, fact_p1, iorder_p1 & + , expo1, expo2, K_power, L_power, K_center, L_center, dim1 ) + p1_inv = (1.d0,0.d0) / pp1 + + call give_explicit_cpoly_and_cgaussian( P2_new, P2_center, pp2, fact_p2, iorder_p2 & + , conjg(expo1), expo2, K_power, L_power, K_center, L_center, dim1 ) + p2_inv = (1.d0,0.d0) / pp2 + + call give_explicit_cpoly_and_cgaussian( P3_new, P3_center, pp3, fact_p3, iorder_p3 & + , expo1, conjg(expo2), K_power, L_power, K_center, L_center, dim1 ) + p3_inv = (1.d0,0.d0) / pp3 + + call give_explicit_cpoly_and_cgaussian( P4_new, P4_center, pp4, fact_p4, iorder_p4 & + , conjg(expo1), conjg(expo2), K_power, L_power, K_center, L_center, dim1 ) + p4_inv = (1.d0,0.d0) / pp4 + + integral1 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 & + , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 ) + + integral2 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 & + , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 ) + + integral3 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 & + , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 ) + + integral4 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 & + , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 ) + + integral5 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 & + , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 ) + + integral6 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 & + , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 ) + + integral7 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 & + , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 ) + + integral8 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 & + , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 ) + + integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 + + + schwartz_kl(s,r) = coef2 * 2.d0 * real(integral_tot) + + schwartz_kl(0,r) = max(schwartz_kl(0,r), schwartz_kl(s,r)) + enddo + + schwartz_kl(0,0) = max(schwartz_kl(0,r), schwartz_kl(0,0)) + enddo + + + do p = 1, ao_prim_num(i) + coef1 = ao_coef_norm_ord_transp_cosgtos(p,i) + expo1 = ao_expo_ord_transp_cosgtos(p,i) + + do q = 1, ao_prim_num(j) + coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(q,j) + expo2 = ao_expo_ord_transp_cosgtos(q,j) + + call give_explicit_cpoly_and_cgaussian( P1_new, P1_center, pp1, fact_p1, iorder_p1 & + , expo1, expo2, I_power, J_power, I_center, J_center, dim1 ) + p1_inv = (1.d0,0.d0) / pp1 + + call give_explicit_cpoly_and_cgaussian( P2_new, P2_center, pp2, fact_p2, iorder_p2 & + , conjg(expo1), expo2, I_power, J_power, I_center, J_center, dim1 ) + p2_inv = (1.d0,0.d0) / pp2 + + call give_explicit_cpoly_and_cgaussian( P3_new, P3_center, pp3, fact_p3, iorder_p3 & + , expo1, conjg(expo2), I_power, J_power, I_center, J_center, dim1 ) + p3_inv = (1.d0,0.d0) / pp3 + + call give_explicit_cpoly_and_cgaussian( P4_new, P4_center, pp4, fact_p4, iorder_p4 & + , conjg(expo1), conjg(expo2), I_power, J_power, I_center, J_center, dim1 ) + p4_inv = (1.d0,0.d0) / pp4 + + integral1 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 & + , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 ) + + integral2 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 & + , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 ) + + integral3 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 & + , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 ) + + integral4 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 & + , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 ) + + integral5 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 & + , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 ) + + integral6 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 & + , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 ) + + integral7 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 & + , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 ) + + integral8 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 & + , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 ) + + integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 + + schwartz_ij = coef2 * coef2 * 2.d0 * real(integral_tot) + + if(schwartz_kl(0,0)*schwartz_ij < thr) cycle + + do r = 1, ao_prim_num(k) + if(schwartz_kl(0,r)*schwartz_ij < thr) cycle + + coef3 = coef2 * ao_coef_norm_ord_transp_cosgtos(r,k) + expo3 = ao_expo_ord_transp_cosgtos(r,k) + + do s = 1, ao_prim_num(l) + if(schwartz_kl(s,r)*schwartz_ij < thr) cycle + + coef4 = coef3 * ao_coef_norm_ord_transp_cosgtos(s,l) + expo4 = ao_expo_ord_transp_cosgtos(s,l) + + call give_explicit_cpoly_and_cgaussian( Q1_new, Q1_center, qq1, fact_q1, iorder_q1 & + , expo3, expo4, K_power, L_power, K_center, L_center, dim1 ) + q1_inv = (1.d0,0.d0) / qq1 + + call give_explicit_cpoly_and_cgaussian( Q2_new, Q2_center, qq2, fact_q2, iorder_q2 & + , conjg(expo3), expo4, K_power, L_power, K_center, L_center, dim1 ) + q2_inv = (1.d0,0.d0) / qq2 + + integral1 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 & + , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 ) + + integral2 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 & + , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 ) + + integral3 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 & + , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 ) + + integral4 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 & + , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 ) + + + integral5 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 & + , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 ) + + integral6 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 & + , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 ) + + integral7 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 & + , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 ) + + integral8 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 & + , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 ) + + integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 + + ao_two_e_integral_cosgtos_schwartz_accel = ao_two_e_integral_cosgtos_schwartz_accel & + + coef4 * 2.d0 * real(integral_tot) + enddo ! s + enddo ! r + enddo ! q + enddo ! p + + else + + do p = 1, 3 + I_power(p) = ao_power(i,p) + J_power(p) = ao_power(j,p) + K_power(p) = ao_power(k,p) + L_power(p) = ao_power(l,p) + enddo + + schwartz_kl(0,0) = 0.d0 + do r = 1, ao_prim_num(k) + coef1 = ao_coef_norm_ord_transp_cosgtos(r,k) * ao_coef_norm_ord_transp_cosgtos(r,k) + expo1 = ao_expo_ord_transp_cosgtos(r,k) + + schwartz_kl(0,r) = 0.d0 + do s = 1, ao_prim_num(l) + coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(s,l) * ao_coef_norm_ord_transp_cosgtos(s,l) + expo2 = ao_expo_ord_transp_cosgtos(s,l) + + integral1 = ERI_cosgtos( expo1, expo2, expo1, expo2 & + , K_power(1), L_power(1), K_power(1), L_power(1) & + , K_power(2), L_power(2), K_power(2), L_power(2) & + , K_power(3), L_power(3), K_power(3), L_power(3) ) + integral2 = ERI_cosgtos( expo1, expo2, conjg(expo1), expo2 & + , K_power(1), L_power(1), K_power(1), L_power(1) & + , K_power(2), L_power(2), K_power(2), L_power(2) & + , K_power(3), L_power(3), K_power(3), L_power(3) ) + + integral3 = ERI_cosgtos( conjg(expo1), expo2, expo1, expo2 & + , K_power(1), L_power(1), K_power(1), L_power(1) & + , K_power(2), L_power(2), K_power(2), L_power(2) & + , K_power(3), L_power(3), K_power(3), L_power(3) ) + integral4 = ERI_cosgtos( conjg(expo1), expo2, conjg(expo1), expo2 & + , K_power(1), L_power(1), K_power(1), L_power(1) & + , K_power(2), L_power(2), K_power(2), L_power(2) & + , K_power(3), L_power(3), K_power(3), L_power(3) ) + + integral5 = ERI_cosgtos( expo1, conjg(expo2), expo1, expo2 & + , K_power(1), L_power(1), K_power(1), L_power(1) & + , K_power(2), L_power(2), K_power(2), L_power(2) & + , K_power(3), L_power(3), K_power(3), L_power(3) ) + integral6 = ERI_cosgtos( expo1, conjg(expo2), conjg(expo1), expo2 & + , K_power(1), L_power(1), K_power(1), L_power(1) & + , K_power(2), L_power(2), K_power(2), L_power(2) & + , K_power(3), L_power(3), K_power(3), L_power(3) ) + + integral7 = ERI_cosgtos( conjg(expo1), conjg(expo2), expo1, expo2 & + , K_power(1), L_power(1), K_power(1), L_power(1) & + , K_power(2), L_power(2), K_power(2), L_power(2) & + , K_power(3), L_power(3), K_power(3), L_power(3) ) + integral8 = ERI_cosgtos( conjg(expo1), conjg(expo2), conjg(expo1), expo2 & + , K_power(1), L_power(1), K_power(1), L_power(1) & + , K_power(2), L_power(2), K_power(2), L_power(2) & + , K_power(3), L_power(3), K_power(3), L_power(3) ) + + integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 + + + schwartz_kl(s,r) = coef2 * 2.d0 * real(integral_tot) + + schwartz_kl(0,r) = max(schwartz_kl(0,r), schwartz_kl(s,r)) + enddo + schwartz_kl(0,0) = max(schwartz_kl(0,r), schwartz_kl(0,0)) + enddo + + do p = 1, ao_prim_num(i) + coef1 = ao_coef_norm_ord_transp_cosgtos(p,i) + expo1 = ao_expo_ord_transp_cosgtos(p,i) + + do q = 1, ao_prim_num(j) + coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(q,j) + expo2 = ao_expo_ord_transp_cosgtos(q,j) + + integral1 = ERI_cosgtos( expo1, expo2, expo1, expo2 & + , I_power(1), J_power(1), I_power(1), J_power(1) & + , I_power(2), J_power(2), I_power(2), J_power(2) & + , I_power(3), J_power(3), I_power(3), J_power(3) ) + + integral2 = ERI_cosgtos( expo1, expo2, conjg(expo1), expo2 & + , I_power(1), J_power(1), I_power(1), J_power(1) & + , I_power(2), J_power(2), I_power(2), J_power(2) & + , I_power(3), J_power(3), I_power(3), J_power(3) ) + + integral3 = ERI_cosgtos( conjg(expo1), expo2, expo1, expo2 & + , I_power(1), J_power(1), I_power(1), J_power(1) & + , I_power(2), J_power(2), I_power(2), J_power(2) & + , I_power(3), J_power(3), I_power(3), J_power(3) ) + + integral4 = ERI_cosgtos( conjg(expo1), expo2, conjg(expo1), expo2 & + , I_power(1), J_power(1), I_power(1), J_power(1) & + , I_power(2), J_power(2), I_power(2), J_power(2) & + , I_power(3), J_power(3), I_power(3), J_power(3) ) + + integral5 = ERI_cosgtos( expo1, conjg(expo2), expo1, expo2 & + , I_power(1), J_power(1), I_power(1), J_power(1) & + , I_power(2), J_power(2), I_power(2), J_power(2) & + , I_power(3), J_power(3), I_power(3), J_power(3) ) + + integral6 = ERI_cosgtos( expo1, conjg(expo2), conjg(expo1), expo2 & + , I_power(1), J_power(1), I_power(1), J_power(1) & + , I_power(2), J_power(2), I_power(2), J_power(2) & + , I_power(3), J_power(3), I_power(3), J_power(3) ) + + integral7 = ERI_cosgtos( conjg(expo1), conjg(expo2), expo1, expo2 & + , I_power(1), J_power(1), I_power(1), J_power(1) & + , I_power(2), J_power(2), I_power(2), J_power(2) & + , I_power(3), J_power(3), I_power(3), J_power(3) ) + + integral8 = ERI_cosgtos( conjg(expo1), conjg(expo2), conjg(expo1), expo2 & + , I_power(1), J_power(1), I_power(1), J_power(1) & + , I_power(2), J_power(2), I_power(2), J_power(2) & + , I_power(3), J_power(3), I_power(3), J_power(3) ) + + integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 + + schwartz_ij = coef2 * coef2 * 2.d0 * real(integral_tot) + + if(schwartz_kl(0,0)*schwartz_ij < thr) cycle + do r = 1, ao_prim_num(k) + if(schwartz_kl(0,r)*schwartz_ij < thr) cycle + + coef3 = coef2 * ao_coef_norm_ord_transp_cosgtos(r,k) + expo3 = ao_expo_ord_transp_cosgtos(r,k) + + do s = 1, ao_prim_num(l) + if(schwartz_kl(s,r)*schwartz_ij < thr) cycle + + coef4 = coef3 * ao_coef_norm_ord_transp_cosgtos(s,l) + expo4 = ao_expo_ord_transp_cosgtos(s,l) + + integral1 = ERI_cosgtos( expo1, expo2, expo3, expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral2 = ERI_cosgtos( expo1, expo2, conjg(expo3), expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral3 = ERI_cosgtos( conjg(expo1), expo2, expo3, expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral4 = ERI_cosgtos( conjg(expo1), expo2, conjg(expo3), expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral5 = ERI_cosgtos( expo1, conjg(expo2), expo3, expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral6 = ERI_cosgtos( expo1, conjg(expo2), conjg(expo3), expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral7 = ERI_cosgtos( conjg(expo1), conjg(expo2), expo3, expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral8 = ERI_cosgtos( conjg(expo1), conjg(expo2), conjg(expo3), expo4 & + , I_power(1), J_power(1), K_power(1), L_power(1) & + , I_power(2), J_power(2), K_power(2), L_power(2) & + , I_power(3), J_power(3), K_power(3), L_power(3) ) + + integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 + + ao_two_e_integral_cosgtos_schwartz_accel = ao_two_e_integral_cosgtos_schwartz_accel & + + coef4 * 2.d0 * real(integral_tot) + enddo ! s + enddo ! r + enddo ! q + enddo ! p + + endif + + deallocate(schwartz_kl) + +end function ao_two_e_integral_cosgtos_schwartz_accel + +! --- + +BEGIN_PROVIDER [ double precision, ao_two_e_integral_cosgtos_schwartz, (ao_num,ao_num) ] + + BEGIN_DOC + ! Needed to compute Schwartz inequalities + END_DOC + + implicit none + integer :: i, k + double precision :: ao_two_e_integral_cosgtos + + ao_two_e_integral_cosgtos_schwartz(1,1) = ao_two_e_integral_cosgtos(1, 1, 1, 1) + + !$OMP PARALLEL DO PRIVATE(i,k) & + !$OMP DEFAULT(NONE) & + !$OMP SHARED(ao_num, ao_two_e_integral_cosgtos_schwartz) & + !$OMP SCHEDULE(dynamic) + do i = 1, ao_num + do k = 1, i + ao_two_e_integral_cosgtos_schwartz(i,k) = dsqrt(ao_two_e_integral_cosgtos(i, i, k, k)) + ao_two_e_integral_cosgtos_schwartz(k,i) = ao_two_e_integral_cosgtos_schwartz(i,k) + enddo + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + +! --- + +complex*16 function general_primitive_integral_cosgtos( dim, P_new, P_center, fact_p, p, p_inv, iorder_p & + , Q_new, Q_center, fact_q, q, q_inv, iorder_q ) + + BEGIN_DOC + ! + ! Computes the integral where p,q,r,s are cos-cGTOS primitives + ! + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: dim + integer, intent(in) :: iorder_p(3), iorder_q(3) + complex*16, intent(in) :: P_new(0:max_dim,3), P_center(3), fact_p, p, p_inv + complex*16, intent(in) :: Q_new(0:max_dim,3), Q_center(3), fact_q, q, q_inv + + integer :: i, j, nx, ny, nz, n_Ix, n_Iy, n_Iz, iorder, n_pt_tmp, n_pt_out + double precision :: tmp_mod + double precision :: ppq_re, ppq_im, ppq_mod, sq_ppq_re, sq_ppq_im + complex*16 :: pq, pq_inv, pq_inv_2, p01_1, p01_2, p10_1, p10_2, ppq, sq_ppq + complex*16 :: rho, dist, const + complex*16 :: accu, tmp_p, tmp_q + complex*16 :: dx(0:max_dim), Ix_pol(0:max_dim), dy(0:max_dim), Iy_pol(0:max_dim), dz(0:max_dim), Iz_pol(0:max_dim) + complex*16 :: d1(0:max_dim), d_poly(0:max_dim) + + complex*16 :: crint_sum + + + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: dx, Ix_pol, dy, Iy_pol, dz, Iz_pol + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: d1, d_poly + + general_primitive_integral_cosgtos = (0.d0, 0.d0) + + pq = (0.5d0, 0.d0) * p_inv * q_inv + pq_inv = (0.5d0, 0.d0) / (p + q) + pq_inv_2 = pq_inv + pq_inv + p10_1 = q * pq ! 1/(2p) + p01_1 = p * pq ! 1/(2q) + p10_2 = pq_inv_2 * p10_1 * q ! 0.5d0*q/(pq + p*p) + p01_2 = pq_inv_2 * p01_1 * p ! 0.5d0*p/(q*q + pq) + + ! get \sqrt(p + q) + !ppq = p + q + !ppq_re = REAL (ppq) + !ppq_im = AIMAG(ppq) + !ppq_mod = dsqrt(ppq_re*ppq_re + ppq_im*ppq_im) + !sq_ppq_re = sq_op5 * dsqrt(ppq_re + ppq_mod) + !sq_ppq_im = 0.5d0 * ppq_im / sq_ppq_re + !sq_ppq = sq_ppq_re + (0.d0, 1.d0) * sq_ppq_im + sq_ppq = zsqrt(p + q) + + ! --- + + iorder = iorder_p(1) + iorder_q(1) + iorder_p(1) + iorder_q(1) + + do i = 0, iorder + Ix_pol(i) = (0.d0, 0.d0) + enddo + + n_Ix = 0 + do i = 0, iorder_p(1) + + tmp_p = P_new(i,1) + tmp_mod = dsqrt(REAL(tmp_p)*REAL(tmp_p) + AIMAG(tmp_p)*AIMAG(tmp_p)) + if(tmp_mod < thresh) cycle + + do j = 0, iorder_q(1) + + tmp_q = tmp_p * Q_new(j,1) + tmp_mod = dsqrt(REAL(tmp_q)*REAL(tmp_q) + AIMAG(tmp_q)*AIMAG(tmp_q)) + if(tmp_mod < thresh) cycle + + !DIR$ FORCEINLINE + call give_cpolynom_mult_center_x(P_center(1), Q_center(1), i, j, p, q, iorder, pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, dx, nx) + !DIR$ FORCEINLINE + call add_cpoly_multiply(dx, nx, tmp_q, Ix_pol, n_Ix) + enddo + enddo + if(n_Ix == -1) then + return + endif + + ! --- + + iorder = iorder_p(2) + iorder_q(2) + iorder_p(2) + iorder_q(2) + + do i = 0, iorder + Iy_pol(i) = (0.d0, 0.d0) + enddo + + n_Iy = 0 + do i = 0, iorder_p(2) + + tmp_p = P_new(i,2) + tmp_mod = dsqrt(REAL(tmp_p)*REAL(tmp_p) + AIMAG(tmp_p)*AIMAG(tmp_p)) + if(tmp_mod < thresh) cycle + + do j = 0, iorder_q(2) + + tmp_q = tmp_p * Q_new(j,2) + tmp_mod = dsqrt(REAL(tmp_q)*REAL(tmp_q) + AIMAG(tmp_q)*AIMAG(tmp_q)) + if(tmp_mod < thresh) cycle + + !DIR$ FORCEINLINE + call give_cpolynom_mult_center_x(P_center(2), Q_center(2), i, j, p, q, iorder, pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, dy, ny) + !DIR$ FORCEINLINE + call add_cpoly_multiply(dy, ny, tmp_q, Iy_pol, n_Iy) + enddo + enddo + + if(n_Iy == -1) then + return + endif + + ! --- + + iorder = iorder_p(3) + iorder_q(3) + iorder_p(3) + iorder_q(3) + + do i = 0, iorder + Iz_pol(i) = (0.d0, 0.d0) + enddo + + n_Iz = 0 + do i = 0, iorder_p(3) + + tmp_p = P_new(i,3) + tmp_mod = dsqrt(REAL(tmp_p)*REAL(tmp_p) + AIMAG(tmp_p)*AIMAG(tmp_p)) + if(tmp_mod < thresh) cycle + + do j = 0, iorder_q(3) + + tmp_q = tmp_p * Q_new(j,3) + tmp_mod = dsqrt(REAL(tmp_q)*REAL(tmp_q) + AIMAG(tmp_q)*AIMAG(tmp_q)) + if(tmp_mod < thresh) cycle + + !DIR$ FORCEINLINE + call give_cpolynom_mult_center_x(P_center(3), Q_center(3), i, j, p, q, iorder, pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, dz, nz) + !DIR$ FORCEINLINE + call add_cpoly_multiply(dz, nz, tmp_q, Iz_pol, n_Iz) + enddo + enddo + + if(n_Iz == -1) then + return + endif + + ! --- + + rho = p * q * pq_inv_2 + dist = (P_center(1) - Q_center(1)) * (P_center(1) - Q_center(1)) & + + (P_center(2) - Q_center(2)) * (P_center(2) - Q_center(2)) & + + (P_center(3) - Q_center(3)) * (P_center(3) - Q_center(3)) + const = dist * rho + + n_pt_tmp = n_Ix + n_Iy + do i = 0, n_pt_tmp + d_poly(i) = (0.d0, 0.d0) + enddo + + !DIR$ FORCEINLINE + call multiply_cpoly(Ix_pol, n_Ix, Iy_pol, n_Iy, d_poly, n_pt_tmp) + if(n_pt_tmp == -1) then + return + endif + n_pt_out = n_pt_tmp + n_Iz + do i = 0, n_pt_out + d1(i) = (0.d0, 0.d0) + enddo + + !DIR$ FORCEINLINE + call multiply_cpoly(d_poly, n_pt_tmp, Iz_pol, n_Iz, d1, n_pt_out) + + accu = crint_sum(n_pt_out, const, d1) +! print *, n_pt_out, real(d1(0:n_pt_out)) +! print *, real(accu) + + general_primitive_integral_cosgtos = fact_p * fact_q * accu * pi_5_2 * p_inv * q_inv / sq_ppq + +end function general_primitive_integral_cosgtos + +! --- + +complex*16 function ERI_cosgtos(alpha, beta, delta, gama, a_x, b_x, c_x, d_x, a_y, b_y, c_y, d_y, a_z, b_z, c_z, d_z) + + BEGIN_DOC + ! ATOMIC PRIMTIVE two-electron integral between the 4 primitives :: + ! primitive_1 = x1**(a_x) y1**(a_y) z1**(a_z) exp(-alpha * r1**2) + ! primitive_2 = x1**(b_x) y1**(b_y) z1**(b_z) exp(- beta * r1**2) + ! primitive_3 = x2**(c_x) y2**(c_y) z2**(c_z) exp(-delta * r2**2) + ! primitive_4 = x2**(d_x) y2**(d_y) z2**(d_z) exp(- gama * r2**2) + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: a_x, b_x, c_x, d_x, a_y, b_y, c_y, d_y, a_z, b_z, c_z, d_z + complex*16, intent(in) :: delta, gama, alpha, beta + + integer :: a_x_2, b_x_2, c_x_2, d_x_2, a_y_2, b_y_2, c_y_2, d_y_2, a_z_2, b_z_2, c_z_2, d_z_2 + integer :: i, j, k, l, n_pt + integer :: nx, ny, nz + double precision :: ppq_re, ppq_im, ppq_mod, sq_ppq_re, sq_ppq_im + complex*16 :: p, q, ppq, sq_ppq, coeff, I_f + + ERI_cosgtos = (0.d0, 0.d0) + + ASSERT (REAL(alpha) >= 0.d0) + ASSERT (REAL(beta ) >= 0.d0) + ASSERT (REAL(delta) >= 0.d0) + ASSERT (REAL(gama ) >= 0.d0) + + nx = a_x + b_x + c_x + d_x + if(iand(nx,1) == 1) then + ERI_cosgtos = (0.d0, 0.d0) + return + endif + + ny = a_y + b_y + c_y + d_y + if(iand(ny,1) == 1) then + ERI_cosgtos = (0.d0, 0.d0) + return + endif + + nz = a_z + b_z + c_z + d_z + if(iand(nz,1) == 1) then + ERI_cosgtos = (0.d0, 0.d0) + return + endif + + n_pt = shiftl(nx+ny+nz, 1) + + p = alpha + beta + q = delta + gama + + ! get \sqrt(p + q) + !ppq = p + q + !ppq_re = REAL (ppq) + !ppq_im = AIMAG(ppq) + !ppq_mod = dsqrt(ppq_re*ppq_re + ppq_im*ppq_im) + !sq_ppq_re = sq_op5 * dsqrt(ppq_re + ppq_mod) + !sq_ppq_im = 0.5d0 * ppq_im / sq_ppq_re + !sq_ppq = sq_ppq_re + (0.d0, 1.d0) * sq_ppq_im + sq_ppq = zsqrt(p + q) + + coeff = pi_5_2 / (p * q * sq_ppq) + if(n_pt == 0) then + ERI_cosgtos = coeff + return + endif + + call integrale_new_cosgtos(I_f, a_x, b_x, c_x, d_x, a_y, b_y, c_y, d_y, a_z, b_z, c_z, d_z, p, q, n_pt) + + ERI_cosgtos = I_f * coeff + +end function ERI_cosgtos + +! --- + +subroutine integrale_new_cosgtos(I_f, a_x, b_x, c_x, d_x, a_y, b_y, c_y, d_y, a_z, b_z, c_z, d_z, p, q, n_pt) + + BEGIN_DOC + ! Calculates the integral of the polynomial : + ! + ! $I_{x_1}(a_x+b_x, c_x+d_x, p, q) \, I_{x_1}(a_y+b_y, c_y+d_y, p, q) \, I_{x_1}(a_z+b_z, c_z+d_z, p, q)$ + ! in $( 0 ; 1)$ + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: n_pt + integer, intent(in) :: a_x, b_x, c_x, d_x, a_y, b_y, c_y, d_y, a_z, b_z, c_z, d_z + complex*16, intent(out) :: I_f + + integer :: i, j, ix, iy, iz, jx, jy, jz, sx, sy, sz + complex*16 :: p, q + complex*16 :: pq_inv, p10_1, p10_2, p01_1, p01_2, pq_inv_2 + complex*16 :: B00(n_pt_max_integrals), B10(n_pt_max_integrals), B01(n_pt_max_integrals) + complex*16 :: t1(n_pt_max_integrals), t2(n_pt_max_integrals) + + + ASSERT (n_pt > 1) + + j = shiftr(n_pt, 1) + + pq_inv = (0.5d0, 0.d0) / (p + q) + p10_1 = (0.5d0, 0.d0) / p + p01_1 = (0.5d0, 0.d0) / q + p10_2 = (0.5d0, 0.d0) * q /(p * q + p * p) + p01_2 = (0.5d0, 0.d0) * p /(q * q + q * p) + pq_inv_2 = pq_inv + pq_inv + + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: t1, t2, B10, B01, B00 + ix = a_x + b_x + jx = c_x + d_x + iy = a_y + b_y + jy = c_y + d_y + iz = a_z + b_z + jz = c_z + d_z + sx = ix + jx + sy = iy + jy + sz = iz + jz + + do i = 1, n_pt + B10(i) = p10_1 - gauleg_t2(i, j) * p10_2 + B01(i) = p01_1 - gauleg_t2(i, j) * p01_2 + B00(i) = gauleg_t2(i, j) * pq_inv + enddo + + if(sx > 0) then + call I_x1_new_cosgtos(ix, jx, B10, B01, B00, t1, n_pt) + else + do i = 1, n_pt + t1(i) = (1.d0, 0.d0) + enddo + endif + + if(sy > 0) then + call I_x1_new_cosgtos(iy, jy, B10, B01, B00, t2, n_pt) + do i = 1, n_pt + t1(i) = t1(i) * t2(i) + enddo + endif + + if(sz > 0) then + call I_x1_new_cosgtos(iz, jz, B10, B01, B00, t2, n_pt) + do i = 1, n_pt + t1(i) = t1(i) * t2(i) + enddo + endif + + I_f = (0.d0, 0.d0) + do i = 1, n_pt + I_f += gauleg_w(i, j) * t1(i) + enddo + +end subroutine integrale_new_cosgtos + +! --- + +recursive subroutine I_x1_new_cosgtos(a, c, B_10, B_01, B_00, res, n_pt) + + BEGIN_DOC + ! recursive function involved in the two-electron integral + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: a, c, n_pt + complex*16, intent(in) :: B_10(n_pt_max_integrals), B_01(n_pt_max_integrals), B_00(n_pt_max_integrals) + complex*16, intent(out) :: res(n_pt_max_integrals) + + integer :: i + complex*16 :: res2(n_pt_max_integrals) + + if(c < 0) then + + do i = 1, n_pt + res(i) = (0.d0, 0.d0) + enddo + + else if (a == 0) then + + call I_x2_new_cosgtos(c, B_10, B_01, B_00, res, n_pt) + + else if (a == 1) then + + call I_x2_new_cosgtos(c-1, B_10, B_01, B_00, res, n_pt) + do i = 1, n_pt + res(i) = dble(c) * B_00(i) * res(i) + enddo + + else + + call I_x1_new_cosgtos(a-2, c , B_10, B_01, B_00, res , n_pt) + call I_x1_new_cosgtos(a-1, c-1, B_10, B_01, B_00, res2, n_pt) + do i = 1, n_pt + res(i) = dble(a-1) * B_10(i) * res(i) + dble(c) * B_00(i) * res2(i) + enddo + + endif + +end subroutine I_x1_new_cosgtos + +! --- + +recursive subroutine I_x2_new_cosgtos(c, B_10, B_01, B_00, res, n_pt) + + BEGIN_DOC + ! recursive function involved in the two-electron integral + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: c, n_pt + complex*16, intent(in) :: B_10(n_pt_max_integrals), B_01(n_pt_max_integrals), B_00(n_pt_max_integrals) + complex*16, intent(out) :: res(n_pt_max_integrals) + + integer :: i + + if(c == 1) then + + do i = 1, n_pt + res(i) = (0.d0, 0.d0) + enddo + + elseif(c == 0) then + + do i = 1, n_pt + res(i) = (1.d0, 0.d0) + enddo + + else + + call I_x1_new_cosgtos(0, c-2, B_10, B_01, B_00, res, n_pt) + do i = 1, n_pt + res(i) = dble(c-1) * B_01(i) * res(i) + enddo + + endif + +end subroutine I_x2_new_cosgtos + +! --- + +subroutine give_cpolynom_mult_center_x( P_center, Q_center, a_x, d_x, p, q, n_pt_in & + , pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, d, n_pt_out) + + BEGIN_DOC + ! subroutine that returns the explicit polynom in term of the "t" + ! variable of the following polynoms : + ! + ! $I_{x_1}(a_x,d_x,p,q) \, I_{x_1}(a_y,d_y,p,q) \ I_{x_1}(a_z,d_z,p,q)$ + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: n_pt_in, a_x, d_x + complex*16, intent(in) :: P_center, Q_center, p, q, pq_inv, p10_1, p01_1, p10_2, p01_2, pq_inv_2 + integer, intent(out) :: n_pt_out + complex*16, intent(out) :: d(0:max_dim) + + integer :: n_pt1, i + complex*16 :: B10(0:2), B01(0:2), B00(0:2), C00(0:2), D00(0:2) + + ASSERT (n_pt_in >= 0) + + B10(0) = p10_1 + B10(1) = (0.d0, 0.d0) + B10(2) = -p10_2 + + B01(0) = p01_1 + B01(1) = (0.d0, 0.d0) + B01(2) = -p01_2 + + B00(0) = (0.d0, 0.d0) + B00(1) = (0.d0, 0.d0) + B00(2) = pq_inv + + C00(0) = (0.d0, 0.d0) + C00(1) = (0.d0, 0.d0) + C00(2) = -q * (P_center - Q_center) * pq_inv_2 + + D00(0) = (0.d0, 0.d0) + D00(1) = (0.d0, 0.d0) + D00(2) = -p * (Q_center - P_center) * pq_inv_2 + + do i = 0, n_pt_in + d(i) = (0.d0, 0.d0) + enddo + + n_pt1 = n_pt_in + + !DIR$ FORCEINLINE + call I_x1_pol_mult_cosgtos(a_x, d_x, B10, B01, B00, C00, D00, d, n_pt1, n_pt_in) + n_pt_out = n_pt1 + +! print *, ' ' +! print *, a_x, d_x +! print *, real(B10), real(B01), real(B00), real(C00), real(D00) +! print *, n_pt1, real(d(0:n_pt1)) +! print *, ' ' + + if(n_pt1 < 0) then + n_pt_out = -1 + do i = 0, n_pt_in + d(i) = (0.d0, 0.d0) + enddo + return + endif + +end subroutine give_cpolynom_mult_center_x + +! --- + +subroutine I_x1_pol_mult_cosgtos(a, c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in) + + BEGIN_DOC + ! Recursive function involved in the two-electron integral + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: n_pt_in, a, c + complex*16, intent(in) :: B_10(0:2), B_01(0:2), B_00(0:2), C_00(0:2), D_00(0:2) + integer, intent(inout) :: nd + complex*16, intent(inout) :: d(0:max_dim) + + if( (c >= 0) .and. (nd >= 0) ) then + + if(a == 1) then + call I_x1_pol_mult_a1_cosgtos(c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in) + else if(a == 2) then + call I_x1_pol_mult_a2_cosgtos(c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in) + else if(a > 2) then + call I_x1_pol_mult_recurs_cosgtos(a, c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in) + else ! a == 0 + + if(c == 0)then + nd = 0 + d(0) = (1.d0, 0.d0) + return + endif + + call I_x2_pol_mult_cosgtos(c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in) + endif + + else + + nd = -1 + + endif + +end subroutine I_x1_pol_mult_cosgtos + +! --- + +recursive subroutine I_x1_pol_mult_recurs_cosgtos(a, c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in) + + BEGIN_DOC + ! Recursive function involved in the two-electron integral + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: n_pt_in, a, c + complex*16, intent(in) :: B_10(0:2), B_01(0:2), B_00(0:2), C_00(0:2), D_00(0:2) + integer, intent(inout) :: nd + complex*16, intent(inout) :: d(0:max_dim) + + integer :: nx, ix, iy, ny + complex*16 :: X(0:max_dim) + complex*16 :: Y(0:max_dim) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y + + ASSERT (a > 2) + + !DIR$ LOOP COUNT(8) + do ix = 0, n_pt_in + X(ix) = (0.d0, 0.d0) + enddo + + nx = 0 + if(a == 3) then + call I_x1_pol_mult_a1_cosgtos(c, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in) + elseif(a == 4) then + call I_x1_pol_mult_a2_cosgtos(c, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in) + else + ASSERT (a >= 5) + call I_x1_pol_mult_recurs_cosgtos(a-2, c, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in) + endif + + !DIR$ LOOP COUNT(8) + do ix = 0, nx + X(ix) *= dble(a-1) + enddo + + !DIR$ FORCEINLINE + call multiply_cpoly(X, nx, B_10, 2, d, nd) + nx = nd + + !DIR$ LOOP COUNT(8) + do ix = 0, n_pt_in + X(ix) = (0.d0, 0.d0) + enddo + + if(c > 0) then + + if(a == 3) then + call I_x1_pol_mult_a2_cosgtos(c-1, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in) + else + ASSERT(a >= 4) + call I_x1_pol_mult_recurs_cosgtos(a-1, c-1, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in) + endif + + if(c > 1) then + !DIR$ LOOP COUNT(8) + do ix = 0, nx + X(ix) *= dble(c) + enddo + endif + !DIR$ FORCEINLINE + call multiply_cpoly(X, nx, B_00, 2, d, nd) + + endif + + ny = 0 + + !DIR$ LOOP COUNT(8) + do ix = 0, n_pt_in + Y(ix) = (0.d0, 0.d0) + enddo + + ASSERT (a > 2) + + if(a == 3) then + call I_x1_pol_mult_a2_cosgtos(c, B_10, B_01, B_00, C_00, D_00, Y, ny, n_pt_in) + else + ASSERT(a >= 4) + call I_x1_pol_mult_recurs_cosgtos(a-1, c, B_10, B_01, B_00, C_00, D_00, Y, ny, n_pt_in) + endif + + !DIR$ FORCEINLINE + call multiply_cpoly(Y, ny, C_00, 2, d, nd) + +end subroutine I_x1_pol_mult_recurs_cosgtos + +! --- + +recursive subroutine I_x1_pol_mult_a1_cosgtos(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) + + BEGIN_DOC + ! Recursive function involved in the two-electron integral + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: n_pt_in, c + complex*16, intent(in) :: B_10(0:2), B_01(0:2), B_00(0:2), C_00(0:2), D_00(0:2) + integer, intent(inout) :: nd + complex*16, intent(inout) :: d(0:max_dim) + + integer :: nx, ix, iy, ny + complex*16 :: X(0:max_dim) + complex*16 :: Y(0:max_dim) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y + + if( (c < 0) .or. (nd < 0) ) then + nd = -1 + return + endif + + nx = nd + !DIR$ LOOP COUNT(8) + do ix = 0, n_pt_in + X(ix) = (0.d0, 0.d0) + enddo + call I_x2_pol_mult_cosgtos(c-1, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in) + + if(c > 1) then + !DIR$ LOOP COUNT(8) + do ix = 0, nx + X(ix) *= dble(c) + enddo + endif + + !DIR$ FORCEINLINE + call multiply_cpoly(X, nx, B_00, 2, d, nd) + + ny = 0 + + !DIR$ LOOP COUNT(8) + do ix = 0, n_pt_in + Y(ix) = (0.d0, 0.d0) + enddo + call I_x2_pol_mult_cosgtos(c, B_10, B_01, B_00, C_00, D_00, Y, ny, n_pt_in) + + !DIR$ FORCEINLINE + call multiply_cpoly(Y, ny, C_00, 2, d, nd) + +end subroutine I_x1_pol_mult_a1_cosgtos + +! --- + +recursive subroutine I_x1_pol_mult_a2_cosgtos(c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in) + + BEGIN_DOC + ! Recursive function involved in the two-electron integral + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: n_pt_in, c + complex*16, intent(in) :: B_10(0:2), B_01(0:2), B_00(0:2), C_00(0:2), D_00(0:2) + integer, intent(inout) :: nd + complex*16, intent(inout) :: d(0:max_dim) + + integer :: nx, ix, iy, ny + complex*16 :: X(0:max_dim) + complex*16 :: Y(0:max_dim) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y + + !DIR$ LOOP COUNT(8) + do ix = 0, n_pt_in + X(ix) = (0.d0, 0.d0) + enddo + + nx = 0 + call I_x2_pol_mult_cosgtos(c, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in) + + !DIR$ FORCEINLINE + call multiply_cpoly(X, nx, B_10, 2, d, nd) + + nx = nd + !DIR$ LOOP COUNT(8) + do ix = 0, n_pt_in + X(ix) = (0.d0, 0.d0) + enddo + + !DIR$ FORCEINLINE + call I_x1_pol_mult_a1_cosgtos(c-1, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in) + + if (c>1) then + !DIR$ LOOP COUNT(8) + do ix = 0, nx + X(ix) *= dble(c) + enddo + endif + + !DIR$ FORCEINLINE + call multiply_cpoly(X, nx, B_00, 2, d, nd) + + ny = 0 + !DIR$ LOOP COUNT(8) + do ix = 0, n_pt_in + Y(ix) = 0.d0 + enddo + !DIR$ FORCEINLINE + call I_x1_pol_mult_a1_cosgtos(c, B_10, B_01, B_00, C_00, D_00, Y, ny, n_pt_in) + + !DIR$ FORCEINLINE + call multiply_cpoly(Y, ny, C_00, 2, d, nd) + +end subroutine I_x1_pol_mult_a2_cosgtos + +! --- + +recursive subroutine I_x2_pol_mult_cosgtos(c, B_10, B_01, B_00, C_00, D_00, d, nd, dim) + + BEGIN_DOC + ! Recursive function involved in the two-electron integral + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: dim, c + complex*16, intent(in) :: B_10(0:2), B_01(0:2), B_00(0:2), C_00(0:2), D_00(0:2) + integer, intent(inout) :: nd + complex*16, intent(inout) :: d(0:max_dim) + + integer :: i + integer :: nx, ix, ny + complex*16 :: X(0:max_dim), Y(0:max_dim) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X, Y + + select case (c) + + case (0) + nd = 0 + d(0) = (1.d0, 0.d0) + return + + case (:-1) + nd = -1 + return + + case (1) + nd = 2 + d(0) = D_00(0) + d(1) = D_00(1) + d(2) = D_00(2) + return + + case (2) + nd = 2 + d(0) = B_01(0) + d(1) = B_01(1) + d(2) = B_01(2) + + ny = 2 + Y(0) = D_00(0) + Y(1) = D_00(1) + Y(2) = D_00(2) + + !DIR$ FORCEINLINE + call multiply_cpoly(Y, ny, D_00, 2, d, nd) + return + + case default + + !DIR$ LOOP COUNT(6) + do ix = 0, c+c + X(ix) = (0.d0, 0.d0) + enddo + nx = 0 + call I_x2_pol_mult_cosgtos(c-2, B_10, B_01, B_00, C_00, D_00, X, nx, dim) + + !DIR$ LOOP COUNT(6) + do ix = 0, nx + X(ix) *= dble(c-1) + enddo + + !DIR$ FORCEINLINE + call multiply_cpoly(X, nx, B_01, 2, d, nd) + + ny = 0 + !DIR$ LOOP COUNT(6) + do ix = 0, c+c + Y(ix) = 0.d0 + enddo + call I_x2_pol_mult_cosgtos(c-1, B_10, B_01, B_00, C_00, D_00, Y, ny, dim) + + !DIR$ FORCEINLINE + call multiply_cpoly(Y, ny, D_00, 2, d, nd) + + end select + +end subroutine I_x2_pol_mult_cosgtos + +! --- + + diff --git a/src/utils/cgtos_one_e.irp.f b/src/utils/cgtos_one_e.irp.f new file mode 100644 index 00000000..43ca8224 --- /dev/null +++ b/src/utils/cgtos_one_e.irp.f @@ -0,0 +1,120 @@ + +! --- + +complex*16 function overlap_cgaussian_x(A_center, B_center, alpha, beta, power_A, power_B, dim) + + BEGIN_DOC + ! + ! \int_{-infty}^{+infty} (x-A_x)^ax (x-B_x)^bx exp(-alpha (x-A_x)^2) exp(- beta(x-B_X)^2) dx + ! with complex arguments + ! + END_DOC + + implicit none + include 'constants.include.F' + + integer, intent(in) :: dim, power_A, power_B + complex*16, intent(in) :: A_center, B_center, alpha, beta + + integer :: i, iorder_p + double precision :: fact_p_mod + complex*16 :: P_new(0:max_dim), P_center, fact_p, p, inv_sq_p + + complex*16 :: Fc_integral + + + call give_explicit_cpoly_and_cgaussian_x( P_new, P_center, p, fact_p, iorder_p & + , alpha, beta, power_A, power_B, A_center, B_center, dim) + + fact_p_mod = dsqrt(real(fact_p)*real(fact_p) + aimag(fact_p)*aimag(fact_p)) + if(fact_p_mod .lt. 1.d-14) then + overlap_cgaussian_x = (0.d0, 0.d0) + return + endif + + + inv_sq_p = (1.d0, 0.d0) / zsqrt(p) + + overlap_cgaussian_x = (0.d0, 0.d0) + do i = 0, iorder_p + overlap_cgaussian_x += P_new(i) * Fc_integral(i, inv_sq_p) + enddo + + overlap_cgaussian_x *= fact_p + +end function overlap_cgaussian_x + +! --- + +subroutine overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B & + , overlap_x, overlap_y, overlap_z, overlap, dim ) + + BEGIN_DOC + ! + ! S_x = \int (x-A_x)^{a_x} exp(-\alpha(x-A_x)^2) (x-B_x)^{b_x} exp(-beta(x-B_x)^2) dx + ! S = S_x S_y S_z + ! for complex arguments + ! + END_DOC + + implicit none + include 'constants.include.F' + + integer, intent(in) :: dim, power_A(3), power_B(3) + complex*16, intent(in) :: A_center(3), B_center(3), alpha, beta + complex*16, intent(out) :: overlap_x, overlap_y, overlap_z, overlap + + integer :: i, nmax, iorder_p(3) + double precision :: fact_p_mod + complex*16 :: P_new(0:max_dim,3), P_center(3), fact_p, p, inv_sq_p + complex*16 :: F_integral_tab(0:max_dim) + + complex*16 :: Fc_integral + + call give_explicit_cpoly_and_cgaussian(P_new, P_center, p, fact_p, iorder_p, alpha, beta, power_A, power_B, A_center, B_center, dim) + + fact_p_mod = dsqrt(real(fact_p)*real(fact_p) + aimag(fact_p)*aimag(fact_p)) + if(fact_p_mod .lt. 1.d-14) then + overlap_x = (1.d-10, 0.d0) + overlap_y = (1.d-10, 0.d0) + overlap_z = (1.d-10, 0.d0) + overlap = (1.d-10, 0.d0) + return + endif + + nmax = maxval(iorder_p) + + inv_sq_p = (1.d0, 0.d0) / zsqrt(p) + do i = 0, nmax + F_integral_tab(i) = Fc_integral(i, inv_sq_p) + enddo + + overlap_x = P_new(0,1) * F_integral_tab(0) + overlap_y = P_new(0,2) * F_integral_tab(0) + overlap_z = P_new(0,3) * F_integral_tab(0) + + do i = 1, iorder_p(1) + overlap_x = overlap_x + P_new(i,1) * F_integral_tab(i) + enddo + call cgaussian_product_x(alpha, A_center(1), beta, B_center(1), fact_p, p, P_center(1)) + overlap_x *= fact_p + + do i = 1, iorder_p(2) + overlap_y = overlap_y + P_new(i,2) * F_integral_tab(i) + enddo + call cgaussian_product_x(alpha, A_center(2), beta, B_center(2), fact_p, p, P_center(2)) + overlap_y *= fact_p + + do i = 1, iorder_p(3) + overlap_z = overlap_z + P_new(i,3) * F_integral_tab(i) + enddo + call cgaussian_product_x(alpha, A_center(3), beta, B_center(3), fact_p, p, P_center(3)) + overlap_z *= fact_p + + overlap = overlap_x * overlap_y * overlap_z + +end subroutine overlap_cgaussian_xyz + +! --- + + diff --git a/src/utils/cgtos_utils.irp.f b/src/utils/cgtos_utils.irp.f new file mode 100644 index 00000000..a820d5f2 --- /dev/null +++ b/src/utils/cgtos_utils.irp.f @@ -0,0 +1,780 @@ + +! --- + +subroutine give_explicit_cpoly_and_cgaussian_x(P_new, P_center, p, fact_k, iorder, alpha, beta, a, b, A_center, B_center, dim) + + BEGIN_DOC + ! Transform the product of + ! (x-x_A)^a (x-x_B)^b exp(-(r-A)^2 alpha) exp(-(r-B)^2 beta) + ! into + ! fact_k \sum_{i=0}^{iorder} (x-x_P)^i exp(-p(r-P)^2) + END_DOC + + implicit none + include 'constants.include.F' + + integer, intent(in) :: dim + integer, intent(in) :: a, b + complex*16, intent(in) :: alpha, beta, A_center, B_center + integer, intent(out) :: iorder + complex*16, intent(out) :: p, P_center, fact_k + complex*16, intent(out) :: P_new(0:max_dim) + + integer :: n_new, i, j + double precision :: tmp_mod + complex*16 :: P_a(0:max_dim), P_b(0:max_dim) + complex*16 :: p_inv, ab, d_AB, tmp + + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: P_a, P_b + + P_new = (0.d0, 0.d0) + + ! new exponent + p = alpha + beta + + ! new center + p_inv = (1.d0, 0.d0) / p + ab = alpha * beta + P_center = (alpha * A_center + beta * B_center) * p_inv + + ! get the factor + d_AB = (A_center - B_center) * (A_center - B_center) + tmp = ab * p_inv * d_AB + tmp_mod = dsqrt(REAL(tmp)*REAL(tmp) + AIMAG(tmp)*AIMAG(tmp)) + if(tmp_mod .lt. 50.d0) then + fact_k = zexp(-tmp) + else + fact_k = (0.d0, 0.d0) + endif + + ! Recenter the polynomials P_a and P_b on P_center + !DIR$ FORCEINLINE + call recentered_cpoly2(P_a(0), A_center, P_center, a, P_b(0), B_center, P_center, b) + n_new = 0 + + !DIR$ FORCEINLINE + call multiply_cpoly(P_a(0), a, P_b(0), b, P_new(0), n_new) + iorder = a + b + +end subroutine give_explicit_cpoly_and_cgaussian_x + +! --- + +subroutine give_explicit_cpoly_and_cgaussian(P_new, P_center, p, fact_k, iorder, alpha, beta, a, b, A_center, B_center, dim) + + BEGIN_DOC + ! Transforms the product of + ! (x-x_A)^a(1) (x-x_B)^b(1) (y-y_A)^a(2) (y-y_B)^b(2) (z-z_A)^a(3) (z-z_B)^b(3) exp(-(r-A)^2 alpha) exp(-(r-B)^2 beta) + ! into + ! fact_k * [ sum (l_x = 0,i_order(1)) P_new(l_x,1) * (x-P_center(1))^l_x ] exp (- p (x-P_center(1))^2 ) + ! * [ sum (l_y = 0,i_order(2)) P_new(l_y,2) * (y-P_center(2))^l_y ] exp (- p (y-P_center(2))^2 ) + ! * [ sum (l_z = 0,i_order(3)) P_new(l_z,3) * (z-P_center(3))^l_z ] exp (- p (z-P_center(3))^2 ) + ! + ! WARNING ::: IF fact_k is too smal then: + ! returns a "s" function centered in zero + ! with an inifinite exponent and a zero polynom coef + END_DOC + + implicit none + include 'constants.include.F' + + integer, intent(in) :: dim, a(3), b(3) + complex*16, intent(in) :: alpha, beta, A_center(3), B_center(3) + integer, intent(out) :: iorder(3) + complex*16, intent(out) :: p, P_center(3), fact_k, P_new(0:max_dim,3) + + integer :: n_new, i, j + double precision :: tmp_mod + complex*16 :: P_a(0:max_dim,3), P_b(0:max_dim,3) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: P_a, P_b + + iorder(1) = 0 + iorder(2) = 0 + iorder(3) = 0 + + P_new(0,1) = (0.d0, 0.d0) + P_new(0,2) = (0.d0, 0.d0) + P_new(0,3) = (0.d0, 0.d0) + + !DIR$ FORCEINLINE + call cgaussian_product(alpha, A_center, beta, B_center, fact_k, p, P_center) + + ! IF fact_k is too smal then: returns a "s" function centered in zero + ! with an inifinite exponent and a zero polynom coef + tmp_mod = dsqrt(REAL(fact_k)*REAL(fact_k) + AIMAG(fact_k)*AIMAG(fact_k)) + if(tmp_mod < 1d-14) then + iorder = 0 + p = (1.d+14, 0.d0) + fact_k = (0.d0 , 0.d0) + P_new(0:max_dim,1:3) = (0.d0 , 0.d0) + P_center(1:3) = (0.d0 , 0.d0) + return + endif + + !DIR$ FORCEINLINE + call recentered_cpoly2(P_a(0,1), A_center(1), P_center(1), a(1), P_b(0,1), B_center(1), P_center(1), b(1)) + iorder(1) = a(1) + b(1) + do i = 0, iorder(1) + P_new(i,1) = 0.d0 + enddo + n_new = 0 + !DIR$ FORCEINLINE + call multiply_cpoly(P_a(0,1), a(1), P_b(0,1), b(1), P_new(0,1), n_new) + + !DIR$ FORCEINLINE + call recentered_cpoly2(P_a(0,2), A_center(2), P_center(2), a(2), P_b(0,2), B_center(2), P_center(2), b(2)) + iorder(2) = a(2) + b(2) + do i = 0, iorder(2) + P_new(i,2) = 0.d0 + enddo + n_new = 0 + !DIR$ FORCEINLINE + call multiply_cpoly(P_a(0,2), a(2), P_b(0,2), b(2), P_new(0,2), n_new) + + !DIR$ FORCEINLINE + call recentered_cpoly2(P_a(0,3), A_center(3), P_center(3), a(3), P_b(0,3), B_center(3), P_center(3), b(3)) + iorder(3) = a(3) + b(3) + do i = 0, iorder(3) + P_new(i,3) = 0.d0 + enddo + n_new = 0 + !DIR$ FORCEINLINE + call multiply_cpoly(P_a(0,3), a(3), P_b(0,3), b(3), P_new(0,3), n_new) + +end subroutine give_explicit_cpoly_and_cgaussian + +! --- + +!subroutine give_explicit_poly_and_gaussian_double(P_new,P_center,p,fact_k,iorder,alpha,beta,gama,a,b,A_center,B_center,Nucl_center,dim) +! BEGIN_DOC +! ! Transforms the product of +! ! (x-x_A)^a(1) (x-x_B)^b(1) (x-x_A)^a(2) (y-y_B)^b(2) (z-z_A)^a(3) (z-z_B)^b(3) +! ! exp(-(r-A)^2 alpha) exp(-(r-B)^2 beta) exp(-(r-Nucl_center)^2 gama +! ! +! ! into +! ! fact_k * [ sum (l_x = 0,i_order(1)) P_new(l_x,1) * (x-P_center(1))^l_x ] exp (- p (x-P_center(1))^2 ) +! ! * [ sum (l_y = 0,i_order(2)) P_new(l_y,2) * (y-P_center(2))^l_y ] exp (- p (y-P_center(2))^2 ) +! ! * [ sum (l_z = 0,i_order(3)) P_new(l_z,3) * (z-P_center(3))^l_z ] exp (- p (z-P_center(3))^2 ) +! END_DOC +! implicit none +! include 'constants.include.F' +! integer, intent(in) :: dim +! integer, intent(in) :: a(3),b(3) ! powers : (x-xa)**a_x = (x-A(1))**a(1) +! double precision, intent(in) :: alpha, beta, gama ! exponents +! double precision, intent(in) :: A_center(3) ! A center +! double precision, intent(in) :: B_center (3) ! B center +! double precision, intent(in) :: Nucl_center(3) ! B center +! double precision, intent(out) :: P_center(3) ! new center +! double precision, intent(out) :: p ! new exponent +! double precision, intent(out) :: fact_k ! constant factor +! double precision, intent(out) :: P_new(0:max_dim,3)! polynomial +! integer , intent(out) :: iorder(3) ! i_order(i) = order of the polynomials +! +! double precision :: P_center_tmp(3) ! new center +! double precision :: p_tmp ! new exponent +! double precision :: fact_k_tmp,fact_k_bis ! constant factor +! double precision :: P_new_tmp(0:max_dim,3)! polynomial +! integer :: i,j +! double precision :: binom_func +! +! ! First you transform the two primitives into a sum of primitive with the same center P_center_tmp and gaussian exponent p_tmp +! call give_explicit_cpoly_and_cgaussian(P_new_tmp,P_center_tmp,p_tmp,fact_k_tmp,iorder,alpha,beta,a,b,A_center,B_center,dim) +! ! Then you create the new gaussian from the product of the new one per the Nuclei one +! call cgaussian_product(p_tmp,P_center_tmp,gama,Nucl_center,fact_k_bis,p,P_center) +! fact_k = fact_k_bis * fact_k_tmp +! +! ! Then you build the coefficient of the new polynom +! do i = 0, iorder(1) +! P_new(i,1) = 0.d0 +! do j = i,iorder(1) +! P_new(i,1) = P_new(i,1) + P_new_tmp(j,1) * binom_func(j,j-i) * (P_center(1) - P_center_tmp(1))**(j-i) +! enddo +! enddo +! do i = 0, iorder(2) +! P_new(i,2) = 0.d0 +! do j = i,iorder(2) +! P_new(i,2) = P_new(i,2) + P_new_tmp(j,2) * binom_func(j,j-i) * (P_center(2) - P_center_tmp(2))**(j-i) +! enddo +! enddo +! do i = 0, iorder(3) +! P_new(i,3) = 0.d0 +! do j = i,iorder(3) +! P_new(i,3) = P_new(i,3) + P_new_tmp(j,3) * binom_func(j,j-i) * (P_center(3) - P_center_tmp(3))**(j-i) +! enddo +! enddo +! +!end + +! --- + +subroutine cgaussian_product(a, xa, b, xb, k, p, xp) + + BEGIN_DOC + ! complex Gaussian product + ! e^{-a (r-r_A)^2} e^{-b (r-r_B)^2} = k e^{-p (r-r_P)^2} + END_DOC + + implicit none + complex*16, intent(in) :: a, b, xa(3), xb(3) + complex*16, intent(out) :: p, k, xp(3) + + double precision :: tmp_mod + complex*16 :: p_inv, xab(3), ab + + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xab + + ASSERT (REAL(a) > 0.) + ASSERT (REAL(b) > 0.) + + ! new exponent + p = a + b + + xab(1) = xa(1) - xb(1) + xab(2) = xa(2) - xb(2) + xab(3) = xa(3) - xb(3) + + p_inv = (1.d0, 0.d0) / p + ab = a * b * p_inv + + k = ab * (xab(1)*xab(1) + xab(2)*xab(2) + xab(3)*xab(3)) + tmp_mod = dsqrt(REAL(k)*REAL(k) + AIMAG(k)*AIMAG(k)) + if(tmp_mod .gt. 40.d0) then + k = (0.d0, 0.d0) + xp(1:3) = (0.d0, 0.d0) + return + endif + + k = zexp(-k) + xp(1) = ( a * xa(1) + b * xb(1) ) * p_inv + xp(2) = ( a * xa(2) + b * xb(2) ) * p_inv + xp(3) = ( a * xa(3) + b * xb(3) ) * p_inv + +end subroutine cgaussian_product + +! --- + +subroutine cgaussian_product_x(a, xa, b, xb, k, p, xp) + + BEGIN_DOC + ! complex Gaussian product in 1D. + ! e^{-a (x-x_A)^2} e^{-b (x-x_B)^2} = K e^{-p (x-x_P)^2} + END_DOC + + implicit none + complex*16, intent(in) :: a, b, xa, xb + complex*16, intent(out) :: p, k, xp + + double precision :: tmp_mod + complex*16 :: p_inv + complex*16 :: xab, ab + + ASSERT (REAL(a) > 0.) + ASSERT (REAL(b) > 0.) + + ! new center + p = a + b + + xab = xa - xb + + p_inv = (1.d0, 0.d0) / p + ab = a * b * p_inv + + k = ab * xab*xab + tmp_mod = dsqrt(REAL(k)*REAL(k) + AIMAG(k)*AIMAG(k)) + if(tmp_mod > 40.d0) then + k = (0.d0, 0.d0) + xp = (0.d0, 0.d0) + return + endif + + k = zexp(-k) + xp = (a*xa + b*xb) * p_inv + +end subroutine cgaussian_product_x + +! --- + +subroutine multiply_cpoly(b, nb, c, nc, d, nd) + + BEGIN_DOC + ! Multiply two complex polynomials + ! D(t) += B(t) * C(t) + END_DOC + + implicit none + + integer, intent(in) :: nb, nc + complex*16, intent(in) :: b(0:nb), c(0:nc) + complex*16, intent(inout) :: d(0:nb+nc) + integer, intent(out) :: nd + + integer :: ndtmp, ib, ic + double precision :: tmp_mod + complex*16 :: tmp + + if(ior(nc, nb) >= 0) then ! True if nc>=0 and nb>=0 + continue + else + return + endif + + ndtmp = nb + nc + + do ic = 0, nc + d(ic) = d(ic) + c(ic) * b(0) + enddo + + do ib = 1, nb + d(ib) = d(ib) + c(0) * b(ib) + do ic = 1, nc + d(ib+ic) = d(ib+ic) + c(ic) * b(ib) + enddo + enddo + + do nd = ndtmp, 0, -1 + tmp = d(nd) + tmp_mod = dsqrt(REAL(tmp)*REAL(tmp) + AIMAG(tmp)*AIMAG(tmp)) + if(tmp_mod .lt. 1.d-15) cycle + exit + enddo + +end subroutine multiply_cpoly + +! --- + +subroutine add_cpoly(b, nb, c, nc, d, nd) + + BEGIN_DOC + ! Add two complex polynomials + ! D(t) += B(t) + C(t) + END_DOC + + implicit none + complex*16, intent(in) :: b(0:nb), c(0:nc) + integer, intent(inout) :: nb, nc + integer, intent(out) :: nd + complex*16, intent(out) :: d(0:nb+nc) + + integer :: ib + double precision :: tmp_mod + complex*16 :: tmp + + nd = nb + nc + do ib = 0, max(nb, nc) + d(ib) = d(ib) + c(ib) + b(ib) + enddo + + tmp = d(nd) + tmp_mod = dsqrt(REAL(tmp)*REAL(tmp) + AIMAG(tmp)*AIMAG(tmp)) + do while( (tmp_mod .lt. 1.d-15) .and. (nd >= 0) ) + nd -= 1 + tmp = d(nd) + tmp_mod = dsqrt(REAL(tmp)*REAL(tmp) + AIMAG(tmp)*AIMAG(tmp)) + if(nd < 0) exit + enddo + +end subroutine add_cpoly + +! --- + +subroutine add_cpoly_multiply(b, nb, cst, d, nd) + + BEGIN_DOC + ! Add a complex polynomial multiplied by a complex constant + ! D(t) += cst * B(t) + END_DOC + + implicit none + + integer, intent(in) :: nb + complex*16, intent(in) :: b(0:nb), cst + integer, intent(inout) :: nd + complex*16, intent(inout) :: d(0:max(nb, nd)) + + integer :: ib + double precision :: tmp_mod + complex*16 :: tmp + + nd = max(nd, nb) + if(nd /= -1) then + + do ib = 0, nb + d(ib) = d(ib) + cst * b(ib) + enddo + + tmp = d(nd) + tmp_mod = dsqrt(REAL(tmp)*REAL(tmp) + AIMAG(tmp)*AIMAG(tmp)) + do while(tmp_mod .lt. 1.d-15) + nd -= 1 + if(nd < 0) exit + tmp = d(nd) + tmp_mod = dsqrt(REAL(tmp)*REAL(tmp) + AIMAG(tmp)*AIMAG(tmp)) + enddo + + endif + +end subroutine add_cpoly_multiply + +! --- + +subroutine recentered_cpoly2(P_A, x_A, x_P, a, P_B, x_B, x_Q, b) + + BEGIN_DOC + ! + ! write two complex polynomials (x-x_A)^a (x-x_B)^b + ! as P_A(x-x_P) and P_B(x-x_Q) + ! + END_DOC + + implicit none + + integer, intent(in) :: a, b + complex*16, intent(in) :: x_A, x_P, x_B, x_Q + complex*16, intent(out) :: P_A(0:a), P_B(0:b) + + integer :: i, minab, maxab + complex*16 :: pows_a(-2:a+b+4), pows_b(-2:a+b+4) + + double precision :: binom_func + + if((a<0) .or. (b<0)) return + + maxab = max(a, b) + minab = max(min(a, b), 0) + + pows_a(0) = (1.d0, 0.d0) + pows_a(1) = x_P - x_A + + pows_b(0) = (1.d0, 0.d0) + pows_b(1) = x_Q - x_B + + do i = 2, maxab + pows_a(i) = pows_a(i-1) * pows_a(1) + pows_b(i) = pows_b(i-1) * pows_b(1) + enddo + + P_A(0) = pows_a(a) + P_B(0) = pows_b(b) + + do i = 1, min(minab, 20) + P_A(i) = binom_transp(a-i,a) * pows_a(a-i) + P_B(i) = binom_transp(b-i,b) * pows_b(b-i) + enddo + + do i = minab+1, min(a, 20) + P_A(i) = binom_transp(a-i,a) * pows_a(a-i) + enddo + do i = minab+1, min(b, 20) + P_B(i) = binom_transp(b-i,b) * pows_b(b-i) + enddo + + do i = 101, a + P_A(i) = binom_func(a,a-i) * pows_a(a-i) + enddo + do i = 101, b + P_B(i) = binom_func(b,b-i) * pows_b(b-i) + enddo + +end subroutine recentered_cpoly2 + +! --- + +complex*16 function Fc_integral(n, inv_sq_p) + + BEGIN_DOC + ! function that calculates the following integral + ! \int_{\-infty}^{+\infty} x^n \exp(-p x^2) dx + ! for complex valued p + END_DOC + + implicit none + include 'constants.include.F' + + integer, intent(in) :: n + complex*16, intent(in) :: inv_sq_p + + ! (n)! + double precision :: fact + + if(n < 0) then + Fc_integral = (0.d0, 0.d0) + return + endif + + ! odd n + if(iand(n, 1) .ne. 0) then + Fc_integral = (0.d0, 0.d0) + return + endif + + if(n == 0) then + Fc_integral = sqpi * inv_sq_p + return + endif + + Fc_integral = sqpi * 0.5d0**n * inv_sq_p**dble(n+1) * fact(n) / fact(shiftr(n, 1)) + +end function Fc_integral + +! --- + +complex*16 function crint(n, rho) + + implicit none + include 'constants.include.F' + + integer, intent(in) :: n + complex*16, intent(in) :: rho + + integer :: i, mmax + double precision :: rho_mod, rho_re, rho_im + double precision :: sq_rho_re, sq_rho_im + double precision :: n_tmp + complex*16 :: sq_rho, rho_inv, rho_exp + + complex*16 :: crint_smallz, cpx_erf + + rho_re = REAL (rho) + rho_im = AIMAG(rho) + rho_mod = dsqrt(rho_re*rho_re + rho_im*rho_im) + + if(rho_mod < 10.d0) then + ! small z + + if(rho_mod .lt. 1.d-10) then + crint = 1.d0 / dble(n + n + 1) + else + crint = crint_smallz(n, rho) + endif + + else + ! large z + + if(rho_mod .gt. 40.d0) then + + n_tmp = dble(n) + 0.5d0 + crint = 0.5d0 * gamma(n_tmp) / (rho**n_tmp) + + else + + ! get \sqrt(rho) + sq_rho_re = sq_op5 * dsqrt(rho_re + rho_mod) + sq_rho_im = 0.5d0 * rho_im / sq_rho_re + sq_rho = sq_rho_re + (0.d0, 1.d0) * sq_rho_im + + rho_exp = 0.5d0 * zexp(-rho) + rho_inv = (1.d0, 0.d0) / rho + + crint = 0.5d0 * sqpi * cpx_erf(sq_rho_re, sq_rho_im) / sq_rho + mmax = n + if(mmax .gt. 0) then + do i = 0, mmax-1 + crint = ((dble(i) + 0.5d0) * crint - rho_exp) * rho_inv + enddo + endif + + ! *** + + endif + + endif + +! print *, n, real(rho), real(crint) + +end function crint + +! --- + +complex*16 function crint_sum(n_pt_out, rho, d1) + + implicit none + include 'constants.include.F' + + integer, intent(in) :: n_pt_out + complex*16, intent(in) :: rho, d1(0:n_pt_out) + + integer :: n, i, mmax + double precision :: rho_mod, rho_re, rho_im + double precision :: sq_rho_re, sq_rho_im + complex*16 :: sq_rho, F0 + complex*16 :: rho_tmp, rho_inv, rho_exp + complex*16, allocatable :: Fm(:) + + complex*16 :: crint_smallz, cpx_erf + + rho_re = REAL (rho) + rho_im = AIMAG(rho) + rho_mod = dsqrt(rho_re*rho_re + rho_im*rho_im) + + if(rho_mod < 10.d0) then + ! small z + + if(rho_mod .lt. 1.d-10) then + +! print *, ' 111' +! print *, ' rho = ', rho + + crint_sum = d1(0) +! print *, 0, 1 + + do i = 2, n_pt_out, 2 + + n = shiftr(i, 1) + crint_sum = crint_sum + d1(i) / dble(n+n+1) + +! print *, n, 1.d0 / dble(n+n+1) + enddo + + ! *** + + else + +! print *, ' 222' +! print *, ' rho = ', real(rho) +! if(abs(aimag(rho)) .gt. 1d-15) then +! print *, ' complex rho', rho +! stop +! endif + + crint_sum = d1(0) * crint_smallz(0, rho) + +! print *, 0, real(d1(0)), real(crint_smallz(0, rho)) +! if(abs(aimag(d1(0))) .gt. 1d-15) then +! print *, ' complex d1(0)', d1(0) +! stop +! endif + + do i = 2, n_pt_out, 2 + n = shiftr(i, 1) + crint_sum = crint_sum + d1(i) * crint_smallz(n, rho) + +! print *, n, real(d1(i)), real(crint_smallz(n, rho)) +! if(abs(aimag(d1(i))) .gt. 1d-15) then +! print *, ' complex d1(i)', i, d1(i) +! stop +! endif + + enddo + +! print *, 'sum = ', real(crint_sum) +! if(abs(aimag(crint_sum)) .gt. 1d-15) then +! print *, ' complex crint_sum', crint_sum +! stop +! endif + + ! *** + + endif + + else + ! large z + + if(rho_mod .gt. 40.d0) then + +! print *, ' 333' +! print *, ' rho = ', rho + + rho_inv = (1.d0, 0.d0) / rho + rho_tmp = 0.5d0 * sqpi * zsqrt(rho_inv) + crint_sum = rho_tmp * d1(0) +! print *, 0, rho_tmp + + do i = 2, n_pt_out, 2 + n = shiftr(i, 1) + rho_tmp = rho_tmp * (dble(n) + 0.5d0) * rho_inv + crint_sum = crint_sum + rho_tmp * d1(i) +! print *, n, rho_tmp + enddo + + ! *** + + else + +! print *, ' 444' +! print *, ' rho = ', rho + + ! get \sqrt(rho) + sq_rho_re = sq_op5 * dsqrt(rho_re + rho_mod) + sq_rho_im = 0.5d0 * rho_im / sq_rho_re + sq_rho = sq_rho_re + (0.d0, 1.d0) * sq_rho_im + !sq_rho = zsqrt(rho) + + + F0 = 0.5d0 * sqpi * cpx_erf(sq_rho_re, sq_rho_im) / sq_rho + crint_sum = F0 * d1(0) +! print *, 0, F0 + + rho_exp = 0.5d0 * zexp(-rho) + rho_inv = (1.d0, 0.d0) / rho + + mmax = shiftr(n_pt_out, 1) + if(mmax .gt. 0) then + + allocate( Fm(mmax) ) + Fm(1:mmax) = (0.d0, 0.d0) + + do n = 0, mmax-1 + F0 = ((dble(n) + 0.5d0) * F0 - rho_exp) * rho_inv + Fm(n+1) = F0 +! print *, n, F0 + enddo + + do i = 2, n_pt_out, 2 + n = shiftr(i, 1) + crint_sum = crint_sum + Fm(n) * d1(i) + enddo + deallocate(Fm) + endif + + ! *** + + endif + + endif + +end function crint_sum + +! --- + +complex*16 function crint_smallz(n, rho) + + BEGIN_DOC + ! Standard version of rint + END_DOC + + implicit none + integer, intent(in) :: n + complex*16, intent(in) :: rho + + integer, parameter :: kmax = 40 + double precision, parameter :: eps = 1.d-13 + + integer :: k + double precision :: delta_mod + complex*16 :: rho_k, ct, delta_k + + ct = 0.5d0 * zexp(-rho) * gamma(dble(n) + 0.5d0) + rho_k = (1.d0, 0.d0) + crint_smallz = ct * rho_k / gamma(dble(n) + 1.5d0) + + do k = 1, kmax + + rho_k = rho_k * rho + delta_k = ct * rho_k / gamma(dble(n+k) + 1.5d0) + crint_smallz = crint_smallz + delta_k + + delta_mod = dsqrt(REAL(delta_k)*REAL(delta_k) + AIMAG(delta_k)*AIMAG(delta_k)) + if(delta_mod .lt. eps) return + enddo + + if(delta_mod > eps) then + write(*,*) ' pb in crint_smallz !' + write(*,*) ' n, rho = ', n, rho + write(*,*) ' delta_mod = ', delta_mod + stop 1 + endif + +end function crint_smallz + +! --- + diff --git a/src/utils/cpx_erf.irp.f b/src/utils/cpx_erf.irp.f new file mode 100644 index 00000000..61f81055 --- /dev/null +++ b/src/utils/cpx_erf.irp.f @@ -0,0 +1,204 @@ + +! --- + +complex*16 function cpx_erf(x, y) + + BEGIN_DOC + ! + ! compute erf(z) for z = x + i y + ! + ! REF: Abramowitz and Stegun + ! + END_DOC + + implicit none + + double precision, intent(in) :: x, y + + double precision :: yabs + complex*16 :: erf_tmp1, erf_tmp2, erf_tmp3, erf_tot + + double precision :: erf_F + complex*16 :: erf_E, erf_G, erf_H + + yabs = dabs(y) + + if(yabs .lt. 1.d-15) then + + cpx_erf = (1.d0, 0.d0) * derf(x) + return + + else + + erf_tmp1 = (1.d0, 0.d0) * derf(x) + erf_tmp2 = erf_E(x, yabs) + erf_F(x, yabs) + erf_tmp3 = zexp(-(0.d0, 2.d0) * x * yabs) * ( erf_G(x, yabs) + erf_H(x, yabs) ) + erf_tot = erf_tmp1 + erf_tmp2 - erf_tmp3 + + endif + + if(y .gt. 0.d0) then + cpx_erf = erf_tot + else + cpx_erf = CONJG(erf_tot) + endif + +end function cpx_erf + +! --- + +complex*16 function erf_E(x, yabs) + + implicit none + include 'constants.include.F' + + double precision, intent(in) :: x, yabs + + if( (dabs(x).gt.6.d0) .or. (x==0.d0) ) then + erf_E = (0.d0, 0.d0) + return + endif + + if(dabs(x) .lt. 1.d-7) then + + erf_E = -inv_pi * (0.d0, 1.d0) * yabs + + else + + erf_E = 0.5d0 * inv_pi * dexp(-x*x) & + * ((1.d0, 0.d0) - zexp(-(2.d0, 0.d0) * x * yabs)) / x + + endif + +end function erf_E + +! --- + +double precision function erf_F(x, yabs) + + implicit none + include 'constants.include.F' + + double precision, intent(in) :: x, yabs + + integer, parameter :: Nmax = 13 + + integer :: i + double precision :: tmp1, tmp2, x2, ct + + + if(dabs(x) .gt. 5.8d0) then + + erf_F = 0.d0 + + else + + x2 = x * x + ct = x * inv_pi + + erf_F = 0.d0 + do i = 1, Nmax + + tmp1 = 0.25d0 * dble(i) * dble(i) + x2 + tmp2 = dexp(-tmp1) / tmp1 + erf_F = erf_F + tmp2 + + if(dabs(tmp2) .lt. 1d-15) exit + enddo + erf_F = ct * erf_F + + endif + +end function erf_F + +! --- + +complex*16 function erf_G(x, yabs) + + implicit none + include 'constants.include.F' + + double precision, intent(in) :: x, yabs + + integer, parameter :: Nmax = 13 + + integer :: i, tmpi, imin, imax + double precision :: tmp0, tmp1, x2, idble + complex*16 :: tmp2 + + if(x .eq. 0.d0) then + erf_G = (0.d0, 0.d0) + return + endif + + tmpi = int(2.d0 * yabs) + imin = max(1, tmpi-Nmax) + imax = tmpi + Nmax + + x2 = x * x + + erf_G = 0.d0 + do i = imin, imax + + idble = dble(i) + tmp0 = 0.5d0 * idble + tmp1 = tmp0 * tmp0 + x2 + tmp2 = dexp( idble * yabs - tmp1 - dlog(tmp1) - dlog_2pi) * (x - (0.d0, 1.d0)*tmp0) + + erf_G = erf_G + tmp2 + + enddo + +end function erf_G + +! --- + +complex*16 function erf_H(x, yabs) + + implicit none + include 'constants.include.F' + + double precision, intent(in) :: x, yabs + + integer, parameter :: Nmax = 13 + + integer :: i + double precision :: tmp0, tmp1, tmp_mod, x2, ct, idble + complex*16 :: tmp2 + + if(x .eq. 0.d0) then + erf_H = (0.d0, 0.d0) + return + endif + + + if( (dabs(x) .lt. 10d0) .and. (yabs .lt. 6.1d0) ) then + + x2 = x * x + ct = 0.5d0 * inv_pi + + erf_H = 0.d0 + do i = 1, Nmax + + idble = dble(i) + tmp0 = 0.5d0 * idble + tmp1 = tmp0 * tmp0 + x2 + tmp2 = dexp(-tmp1-idble*yabs) * (x + (0.d0, 1.d0)*tmp0) / tmp1 + erf_H = erf_H + tmp2 + + tmp_mod = dsqrt(REAL(tmp2)*REAL(tmp2) + AIMAG(tmp2)*AIMAG(tmp2)) + if(tmp_mod .lt. 1d-15) exit + enddo + erf_H = ct * erf_H + + else + + erf_H = (0.d0, 0.d0) + + endif + +end function erf_H + +! --- + + diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f index 15d79622..ff17ee4e 100644 --- a/src/utils/integration.irp.f +++ b/src/utils/integration.irp.f @@ -133,7 +133,7 @@ subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center, p, fact_k, io BEGIN_DOC ! Transforms the product of - ! (x-x_A)^a(1) (x-x_B)^b(1) (x-x_A)^a(2) (y-y_B)^b(2) (z-z_A)^a(3) (z-z_B)^b(3) exp(-(r-A)^2 alpha) exp(-(r-B)^2 beta) + ! (x-x_A)^a(1) (x-x_B)^b(1) (y-y_A)^a(2) (y-y_B)^b(2) (z-z_A)^a(3) (z-z_B)^b(3) exp(-(r-A)^2 alpha) exp(-(r-B)^2 beta) ! into ! fact_k * [ sum (l_x = 0,i_order(1)) P_new(l_x,1) * (x-P_center(1))^l_x ] exp (- p (x-P_center(1))^2 ) ! * [ sum (l_y = 0,i_order(2)) P_new(l_y,2) * (y-P_center(2))^l_y ] exp (- p (y-P_center(2))^2 ) @@ -427,6 +427,46 @@ subroutine gaussian_product_x(a,xa,b,xb,k,p,xp) end subroutine +!- + +subroutine gaussian_product_x_v(a,xa,b,xb,k,p,xp,n_points) + implicit none + BEGIN_DOC + ! Gaussian product in 1D with multiple xa + ! e^{-a (x-x_A)^2} e^{-b (x-x_B)^2} = K_{ab}^x e^{-p (x-x_P)^2} + END_DOC + + integer, intent(in) :: n_points + double precision , intent(in) :: a,b ! Exponents + double precision , intent(in) :: xa(n_points),xb ! Centers + double precision , intent(out) :: p(n_points) ! New exponent + double precision , intent(out) :: xp(n_points) ! New center + double precision , intent(out) :: k(n_points) ! Constant + + double precision :: p_inv + integer :: ipoint + + ASSERT (a>0.) + ASSERT (b>0.) + + double precision :: xab, ab + + p = a+b + p_inv = 1.d0/(a+b) + ab = a*b*p_inv + do ipoint = 1, n_points + xab = xa(ipoint)-xb + k(ipoint) = ab*xab*xab + if (k(ipoint) > 40.d0) then + k(ipoint)=0.d0 + cycle + endif + k(ipoint) = exp(-k(ipoint)) + xp(ipoint) = (a*xa(ipoint)+b*xb)*p_inv + enddo +end subroutine + + @@ -506,8 +546,10 @@ subroutine multiply_poly_v(b,nb,c,nc,d,nd,n_points) enddo enddo enddo + end + subroutine add_poly(b,nb,c,nc,d,nd) implicit none BEGIN_DOC @@ -1041,3 +1083,94 @@ double precision function rint1(n,rho) write(*,*)'pb in rint1 k too large!' stop 1 end + +! --- + +double precision function V_phi(n, m) + + BEGIN_DOC + ! Computes the angular $\phi$ part of the nuclear attraction integral: + ! + ! $\int_{0}^{2 \pi} \cos(\phi)^n \sin(\phi)^m d\phi$. + END_DOC + + implicit none + integer, intent(in) :: n, m + + integer :: i + double precision :: prod + + double precision :: Wallis + + prod = 1.d0 + do i = 0, shiftr(n, 1)-1 + prod = prod/ (1.d0 + dfloat(m+1)/dfloat(n-i-i-1)) + enddo + V_phi = 4.d0 * prod * Wallis(m) + +end function V_phi + +! --- + +double precision function V_theta(n, m) + + BEGIN_DOC + ! Computes the angular $\theta$ part of the nuclear attraction integral: + ! + ! $\int_{0}^{\pi} \cos(\theta)^n \sin(\theta)^m d\theta$ + END_DOC + + implicit none + include 'utils/constants.include.F' + integer, intent(in) :: n, m + + integer :: i + double precision :: prod + + double precision :: Wallis + + V_theta = 0.d0 + prod = 1.d0 + do i = 0, shiftr(n, 1)-1 + prod = prod / (1.d0 + dfloat(m+1)/dfloat(n-i-i-1)) + enddo + V_theta = (prod + prod) * Wallis(m) + +end function V_theta + +! --- + +double precision function Wallis(n) + + BEGIN_DOC + ! Wallis integral: + ! + ! $\int_{0}^{\pi} \cos(\theta)^n d\theta$. + END_DOC + + implicit none + include 'utils/constants.include.F' + + integer, intent(in) :: n + + integer :: p + + double precision :: fact + + if(iand(n, 1) .eq. 0) then + + Wallis = fact(shiftr(n, 1)) + Wallis = pi * fact(n) / (dble(ibset(0_8, n)) * (Wallis + Wallis) * Wallis) + + else + + p = shiftr(n, 1) + Wallis = fact(p) + Wallis = dble(ibset(0_8, p+p)) * Wallis * Wallis / fact(p+p+1) + + endif + +end function Wallis + +! --- + diff --git a/src/utils/one_e_integration.irp.f b/src/utils/one_e_integration.irp.f index 081adee3..c797c87e 100644 --- a/src/utils/one_e_integration.irp.f +++ b/src/utils/one_e_integration.irp.f @@ -32,9 +32,8 @@ double precision function overlap_gaussian_x(A_center,B_center,alpha,beta,power_ end -subroutine overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,& - power_B,overlap_x,overlap_y,overlap_z,overlap,dim) - implicit none +subroutine overlap_gaussian_xyz(A_center, B_center, alpha, beta, power_A, power_B, overlap_x, overlap_y, overlap_z, overlap, dim) + BEGIN_DOC !.. math:: ! @@ -42,7 +41,10 @@ subroutine overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,& ! S = S_x S_y S_z ! END_DOC + include 'constants.include.F' + + implicit none integer,intent(in) :: dim ! dimension maximum for the arrays representing the polynomials double precision,intent(in) :: A_center(3),B_center(3) ! center of the x1 functions double precision, intent(in) :: alpha,beta @@ -51,17 +53,18 @@ subroutine overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,& double precision :: P_new(0:max_dim,3),P_center(3),fact_p,p double precision :: F_integral_tab(0:max_dim) integer :: iorder_p(3) - - call give_explicit_poly_and_gaussian(P_new,P_center,p,fact_p,iorder_p,alpha,beta,power_A,power_B,A_center,B_center,dim) - if(fact_p.lt.1d-20)then - overlap_x = 1.d-10 - overlap_y = 1.d-10 - overlap_z = 1.d-10 - overlap = 1.d-10 - return - endif integer :: nmax double precision :: F_integral + + call give_explicit_poly_and_gaussian(P_new, P_center, p, fact_p, iorder_p, alpha, beta, power_A, power_B, A_center, B_center, dim) + if(fact_p.lt.1d-20)then + overlap_x = 1.d-10 + overlap_y = 1.d-10 + overlap_z = 1.d-10 + overlap = 1.d-10 + return + endif + nmax = maxval(iorder_p) do i = 0,nmax F_integral_tab(i) = F_integral(i,p) @@ -93,40 +96,47 @@ subroutine overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,& end +! --- + +subroutine overlap_x_abs(A_center, B_center, alpha, beta, power_A, power_B, overlap_x, lower_exp_val, dx, nx) -subroutine overlap_x_abs(A_center,B_center,alpha,beta,power_A,power_B,overlap_x,lower_exp_val,dx,nx) - implicit none BEGIN_DOC ! .. math :: ! ! \int_{-infty}^{+infty} (x-A_center)^(power_A) * (x-B_center)^power_B * exp(-alpha(x-A_center)^2) * exp(-beta(x-B_center)^2) dx ! END_DOC - integer :: i,j,k,l - integer,intent(in) :: power_A,power_B - double precision, intent(in) :: lower_exp_val - double precision,intent(in) :: A_center, B_center,alpha,beta - double precision, intent(out) :: overlap_x,dx - integer, intent(in) :: nx - double precision :: x_min,x_max,domain,x,factor,dist,p,p_inv,rho - double precision :: P_center - if(power_A.lt.0.or.power_B.lt.0)then + + implicit none + + integer, intent(in) :: power_A, power_B, nx + double precision, intent(in) :: lower_exp_val, A_center, B_center, alpha, beta + double precision, intent(out) :: overlap_x, dx + + integer :: i, j, k, l + double precision :: x_min, x_max, domain, x, factor, dist, p, p_inv, rho + double precision :: P_center + double precision :: tmp + + if(power_A.lt.0 .or. power_B.lt.0) then overlap_x = 0.d0 dx = 0.d0 return endif - p = alpha + beta - p_inv= 1.d0/p - rho = alpha * beta * p_inv - dist = (A_center - B_center)*(A_center - B_center) + + p = alpha + beta + p_inv = 1.d0/p + rho = alpha * beta * p_inv + dist = (A_center - B_center)*(A_center - B_center) P_center = (alpha * A_center + beta * B_center) * p_inv - if(rho*dist.gt.80.d0)then + + if(rho*dist.gt.80.d0) then overlap_x= 0.d0 return endif + factor = dexp(-rho * dist) - double precision :: tmp tmp = dsqrt(lower_exp_val/p) x_min = P_center - tmp @@ -143,7 +153,7 @@ subroutine overlap_x_abs(A_center,B_center,alpha,beta,power_A,power_B,overlap_x, overlap_x = factor * dx * overlap_x end - +! --- subroutine overlap_gaussian_xyz_v(A_center, B_center, alpha, beta, power_A, power_B, overlap, n_points) @@ -173,7 +183,7 @@ subroutine overlap_gaussian_xyz_v(A_center, B_center, alpha, beta, power_A, powe double precision :: F_integral double precision, allocatable :: P_new(:,:,:), P_center(:,:), fact_p(:) - ldp = maxval(power_A(1:3) + power_B(1:3)) + ldp = maxval( power_A(1:3) + power_B(1:3) ) allocate(P_new(n_points,0:ldp,3), P_center(n_points,3), fact_p(n_points)) diff --git a/src/utils/util.irp.f b/src/utils/util.irp.f index 41e7cad6..aba99c2b 100644 --- a/src/utils/util.irp.f +++ b/src/utils/util.irp.f @@ -460,6 +460,33 @@ subroutine v2_over_x(v,x,res) end +! --- + +subroutine check_sym(A, n) + + implicit none + integer, intent(in) :: n + double precision, intent(in) :: A(n,n) + integer :: i, j + double precision :: dev_sym, norm, tmp + + dev_sym = 0.d0 + norm = 0.d0 + do i = 1, n + do j = i+1, n + tmp = A(j,i) - A(i,j) + dev_sym += tmp * tmp + norm += A(j,i) * A(j,i) + enddo + enddo + + print*, ' deviation from sym = ', dev_sym + print*, ' norm = ', norm + +end subroutine check_sym + +! --- + subroutine sum_A_At(A, N) !BEGIN_DOC From e2162d282d51271fa4db8b7237fbd57250b49354 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Wed, 15 Mar 2023 10:23:48 +0100 Subject: [PATCH 002/337] non_sym dress: comb --- src/davidson/EZFIO.cfg | 55 +- src/davidson/NEED | 1 + src/davidson/davidson_parallel.irp.f | 15 - .../diagonalization_hs2_dressed.irp.f | 18 +- .../diagonalization_nonsym_h_dressed.irp.f | 541 ++++++++++++++++++ src/davidson/overlap_states.irp.f | 40 ++ .../nonsym_diagonalize_ci.irp.f | 188 ++++++ src/davidson_keywords/EZFIO.cfg | 54 ++ src/davidson_keywords/README.rst | 5 + .../input.irp.f | 10 +- src/davidson_keywords/usef.irp.f | 33 ++ .../null_dressing_vector.irp.f | 2 + .../spindeterminants.ezfio_config | 5 +- ...uet => save_bitcpsileft_for_qmcchem.irp.f} | 0 14 files changed, 883 insertions(+), 84 deletions(-) create mode 100644 src/davidson/diagonalization_nonsym_h_dressed.irp.f create mode 100644 src/davidson/overlap_states.irp.f create mode 100644 src/davidson_dressed/nonsym_diagonalize_ci.irp.f create mode 100644 src/davidson_keywords/EZFIO.cfg create mode 100644 src/davidson_keywords/README.rst rename src/{davidson => davidson_keywords}/input.irp.f (79%) create mode 100644 src/davidson_keywords/usef.irp.f rename src/tc_bi_ortho/{save_bitcpsileft_for_qmcchem.irp.pouet => save_bitcpsileft_for_qmcchem.irp.f} (100%) diff --git a/src/davidson/EZFIO.cfg b/src/davidson/EZFIO.cfg index bfa55526..1152560f 100644 --- a/src/davidson/EZFIO.cfg +++ b/src/davidson/EZFIO.cfg @@ -1,71 +1,18 @@ -[threshold_davidson] -type: Threshold -doc: Thresholds of Davidson's algorithm if threshold_davidson_from_pt2 is false. -interface: ezfio,provider,ocaml -default: 1.e-10 - -[threshold_nonsym_davidson] -type: Threshold -doc: Thresholds of non-symetric Davidson's algorithm -interface: ezfio,provider,ocaml -default: 1.e-10 - -[threshold_davidson_from_pt2] -type: logical -doc: Thresholds of Davidson's algorithm is set to E(rPT2)*threshold_davidson_from_pt2 -interface: ezfio,provider,ocaml -default: false - -[n_states_diag] -type: States_number -doc: Controls the number of states to consider during the Davdison diagonalization. The number of states is n_states * n_states_diag -default: 4 -interface: ezfio,ocaml - -[davidson_sze_max] -type: Strictly_positive_int -doc: Number of micro-iterations before re-contracting -default: 15 -interface: ezfio,provider,ocaml - -[state_following] -type: logical -doc: If |true|, the states are re-ordered to match the input states -default: False -interface: ezfio,provider,ocaml - -[disk_based_davidson] -type: logical -doc: If |true|, a memory-mapped file may be used to store the W and S2 vectors if not enough RAM is available -default: True -interface: ezfio,provider,ocaml - [csf_based] type: logical doc: If |true|, use the CSF-based algorithm default: False interface: ezfio,provider,ocaml -[distributed_davidson] -type: logical -doc: If |true|, use the distributed algorithm -default: True -interface: ezfio,provider,ocaml - [only_expected_s2] type: logical doc: If |true|, use filter out all vectors with bad |S^2| values default: True interface: ezfio,provider,ocaml -[n_det_max_full] -type: Det_number_max -doc: Maximum number of determinants where |H| is fully diagonalized -interface: ezfio,provider,ocaml -default: 1000 - [without_diagonal] type: logical doc: If |true|, don't use denominator default: False interface: ezfio,provider,ocaml + diff --git a/src/davidson/NEED b/src/davidson/NEED index bfe31bd0..bd0abe2f 100644 --- a/src/davidson/NEED +++ b/src/davidson/NEED @@ -1 +1,2 @@ csf +davidson_keywords diff --git a/src/davidson/davidson_parallel.irp.f b/src/davidson/davidson_parallel.irp.f index e627dfc9..4f88506a 100644 --- a/src/davidson/davidson_parallel.irp.f +++ b/src/davidson/davidson_parallel.irp.f @@ -546,21 +546,6 @@ end -BEGIN_PROVIDER [ integer, nthreads_davidson ] - implicit none - BEGIN_DOC - ! Number of threads for Davidson - END_DOC - nthreads_davidson = nproc - character*(32) :: env - call getenv('QP_NTHREADS_DAVIDSON',env) - if (trim(env) /= '') then - read(env,*) nthreads_davidson - call write_int(6,nthreads_davidson,'Target number of threads for ') - endif -END_PROVIDER - - integer function zmq_put_N_states_diag(zmq_to_qp_run_socket,worker_id) use f77_zmq implicit none diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index d37b7386..f4c05595 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -14,15 +14,6 @@ BEGIN_PROVIDER [ character*(64), diag_algorithm ] endif END_PROVIDER -BEGIN_PROVIDER [ double precision, threshold_davidson_pt2 ] - implicit none - BEGIN_DOC - ! Threshold of Davidson's algorithm, using PT2 as a guide - END_DOC - threshold_davidson_pt2 = threshold_davidson - -END_PROVIDER - BEGIN_PROVIDER [ integer, dressed_column_idx, (N_states) ] @@ -66,7 +57,7 @@ subroutine davidson_diag_hs2(dets_in,u_in,s2_out,dim_in,energies,sze,N_st,N_st_d double precision, allocatable :: H_jj(:) double precision, external :: diag_H_mat_elem, diag_S_mat_elem - integer :: i,k + integer :: i,k,l ASSERT (N_st > 0) ASSERT (sze > 0) ASSERT (Nint > 0) @@ -87,9 +78,14 @@ subroutine davidson_diag_hs2(dets_in,u_in,s2_out,dim_in,energies,sze,N_st,N_st_d if (dressing_state > 0) then do k=1,N_st + do i=1,sze - H_jj(i) += u_in(i,k) * dressing_column_h(i,k) + H_jj(i) += u_in(i,k) * dressing_column_h(i,k) enddo + + !l = dressed_column_idx(k) + !H_jj(l) += u_in(l,k) * dressing_column_h(l,k) + enddo endif diff --git a/src/davidson/diagonalization_nonsym_h_dressed.irp.f b/src/davidson/diagonalization_nonsym_h_dressed.irp.f new file mode 100644 index 00000000..3ff060a6 --- /dev/null +++ b/src/davidson/diagonalization_nonsym_h_dressed.irp.f @@ -0,0 +1,541 @@ + +! --- + +subroutine davidson_diag_nonsym_h(dets_in, u_in, dim_in, energies, sze, N_st, N_st_diag, Nint, dressing_state, converged) + + BEGIN_DOC + ! + ! non-sym Davidson diagonalization. + ! + ! dets_in : bitmasks corresponding to determinants + ! + ! u_in : guess coefficients on the various states. Overwritten on exit + ! + ! dim_in : leftmost dimension of u_in + ! + ! sze : Number of determinants + ! + ! N_st : Number of eigenstates + ! + ! Initial guess vectors are not necessarily orthonormal + ! + END_DOC + + use bitmasks + + implicit none + + integer, intent(in) :: dim_in, sze, N_st, N_st_diag, Nint + integer, intent(in) :: dressing_state + integer(bit_kind), intent(in) :: dets_in(Nint,2,sze) + logical, intent(out) :: converged + double precision, intent(out) :: energies(N_st_diag) + double precision, intent(inout) :: u_in(dim_in,N_st_diag) + + integer :: i, k, l + double precision :: f + double precision, allocatable :: H_jj(:) + + double precision, external :: diag_H_mat_elem + + ASSERT (N_st > 0) + ASSERT (sze > 0) + ASSERT (Nint > 0) + ASSERT (Nint == N_int) + PROVIDE mo_two_e_integrals_in_map + + allocate(H_jj(sze)) + + H_jj(1) = diag_H_mat_elem(dets_in(1,1,1), Nint) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(sze, H_jj, dets_in, Nint) & + !$OMP PRIVATE(i) + !$OMP DO SCHEDULE(static) + do i = 2, sze + H_jj(i) = diag_H_mat_elem(dets_in(1,1,i), Nint) + enddo + !$OMP END DO + !$OMP END PARALLEL + + if(dressing_state > 0) then + do k = 1, N_st + do l = 1, N_st + f = overlap_states_inv(k,l) + + !do i = 1, N_det + ! H_jj(i) += f * dressing_delta(i,k) * psi_coef(i,l) + do i = 1, dim_in + H_jj(i) += f * dressing_delta(i,k) * u_in(i,l) + enddo + + enddo + enddo + endif + + call davidson_diag_nonsym_hjj(dets_in, u_in, H_jj, energies, dim_in, sze, N_st, N_st_diag, Nint, dressing_state, converged) + + deallocate(H_jj) + +end subroutine davidson_diag_nonsym_h + +! --- + +subroutine davidson_diag_nonsym_hjj(dets_in, u_in, H_jj, energies, dim_in, sze, N_st, N_st_diag_in, Nint, dressing_state, converged) + + BEGIN_DOC + ! + ! non-sym Davidson diagonalization with specific diagonal elements of the H matrix + ! + ! H_jj : specific diagonal H matrix elements to diagonalize de Davidson + ! + ! dets_in : bitmasks corresponding to determinants + ! + ! u_in : guess coefficients on the various states. Overwritten on exit + ! + ! dim_in : leftmost dimension of u_in + ! + ! sze : Number of determinants + ! + ! N_st : Number of eigenstates + ! + ! N_st_diag_in : Number of states in which H is diagonalized. Assumed > sze + ! + ! Initial guess vectors are not necessarily orthonormal + ! + END_DOC + + include 'constants.include.F' + + use bitmasks + use mmap_module + + implicit none + + integer, intent(in) :: dim_in, sze, N_st, N_st_diag_in, Nint + integer, intent(in) :: dressing_state + integer(bit_kind), intent(in) :: dets_in(Nint,2,sze) + double precision, intent(in) :: H_jj(sze) + double precision, intent(out) :: energies(N_st_diag_in) + logical, intent(inout) :: converged + double precision, intent(inout) :: u_in(dim_in,N_st_diag_in) + + logical :: disk_based + character*(16384) :: write_buffer + integer :: i, j, k, l, m + integer :: iter, N_st_diag, itertot, shift, shift2, itermax, istate + integer :: nproc_target + integer :: order(N_st_diag_in) + integer :: maxab + double precision :: rss + double precision :: cmax + double precision :: to_print(2,N_st) + double precision :: r1, r2 + double precision :: f + double precision, allocatable :: y(:,:), h(:,:), lambda(:) + double precision, allocatable :: s_tmp(:,:), u_tmp(:,:) + double precision, allocatable :: residual_norm(:) + double precision, allocatable :: U(:,:), overlap(:,:) + double precision, pointer :: W(:,:) + + double precision, external :: u_dot_u + + + N_st_diag = N_st_diag_in + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, y, h, lambda + if(N_st_diag*3 > sze) then + print *, 'error in Davidson :' + print *, 'Increase n_det_max_full to ', N_st_diag*3 + stop -1 + endif + + itermax = max(2, min(davidson_sze_max, sze/N_st_diag)) + 1 + itertot = 0 + + if(state_following) then + allocate(overlap(N_st_diag*itermax, N_st_diag*itermax)) + else + allocate(overlap(1,1)) ! avoid 'if' for deallocate + endif + overlap = 0.d0 + + PROVIDE nuclear_repulsion expected_s2 psi_bilinear_matrix_order psi_bilinear_matrix_order_reverse threshold_davidson_pt2 threshold_davidson_from_pt2 + PROVIDE threshold_nonsym_davidson + + call write_time(6) + write(6,'(A)') '' + write(6,'(A)') 'Davidson Diagonalization' + write(6,'(A)') '------------------------' + write(6,'(A)') '' + + ! Find max number of cores to fit in memory + ! ----------------------------------------- + + nproc_target = nproc + maxab = max(N_det_alpha_unique, N_det_beta_unique) + 1 + + m=1 + disk_based = .False. + call resident_memory(rss) + do + r1 = 8.d0 * &! bytes + ( dble(sze)*(N_st_diag*itermax) &! U + + 1.0d0*dble(sze*m)*(N_st_diag*itermax) &! W + + 3.0d0*(N_st_diag*itermax)**2 &! h,y,s_tmp + + 1.d0*(N_st_diag*itermax) &! lambda + + 1.d0*(N_st_diag) &! residual_norm + ! In H_u_0_nstates_zmq + + 2.d0*(N_st_diag*N_det) &! u_t, v_t, on collector + + 2.d0*(N_st_diag*N_det) &! u_t, v_t, on slave + + 0.5d0*maxab &! idx0 in H_u_0_nstates_openmp_work_* + + nproc_target * &! In OMP section + ( 1.d0*(N_int*maxab) &! buffer + + 3.5d0*(maxab) ) &! singles_a, singles_b, doubles, idx + ) / 1024.d0**3 + + if(nproc_target == 0) then + call check_mem(r1, irp_here) + nproc_target = 1 + exit + endif + + if(r1+rss < qp_max_mem) then + exit + endif + + if(itermax > 4) then + itermax = itermax - 1 + else if(m==1 .and. disk_based_davidson) then + m = 0 + disk_based = .True. + itermax = 6 + else + nproc_target = nproc_target - 1 + endif + + enddo + + nthreads_davidson = nproc_target + TOUCH nthreads_davidson + + call write_int(6, N_st, 'Number of states') + call write_int(6, N_st_diag, 'Number of states in diagonalization') + call write_int(6, sze, 'Number of determinants') + call write_int(6, nproc_target, 'Number of threads for diagonalization') + call write_double(6, r1, 'Memory(Gb)') + if(disk_based) then + print *, 'Using swap space to reduce RAM' + endif + + !--------------- + + write(6,'(A)') '' + write_buffer = '=====' + do i = 1, N_st + write_buffer = trim(write_buffer)//' ================ ===========' + enddo + write(6, '(A)') write_buffer(1:6+41*N_st) + write_buffer = 'Iter' + do i = 1, N_st + write_buffer = trim(write_buffer)//' Energy Residual ' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + write_buffer = '=====' + do i = 1, N_st + write_buffer = trim(write_buffer)//' ================ ===========' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + + + if(disk_based) then + ! Create memory-mapped files for W and S + type(c_ptr) :: ptr_w, ptr_s + integer :: fd_s, fd_w + call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),& + 8, fd_w, .False., ptr_w) + call c_f_pointer(ptr_w, w, (/sze,N_st_diag*itermax/)) + else + allocate(W(sze,N_st_diag*itermax)) + endif + + allocate( & + ! Large + U(sze,N_st_diag*itermax), & + ! Small + h(N_st_diag*itermax,N_st_diag*itermax), & + y(N_st_diag*itermax,N_st_diag*itermax), & + s_tmp(N_st_diag*itermax,N_st_diag*itermax), & + residual_norm(N_st_diag), & + lambda(N_st_diag*itermax), & + u_tmp(N_st,N_st_diag)) + + h = 0.d0 + U = 0.d0 + y = 0.d0 + s_tmp = 0.d0 + + + ASSERT (N_st > 0) + ASSERT (N_st_diag >= N_st) + ASSERT (sze > 0) + ASSERT (Nint > 0) + ASSERT (Nint == N_int) + + ! Davidson iterations + ! =================== + + converged = .False. + + do k = N_st+1, N_st_diag + do i = 1, sze + call random_number(r1) + call random_number(r2) + r1 = dsqrt(-2.d0*dlog(r1)) + r2 = dtwo_pi*r2 + u_in(i,k) = r1*dcos(r2) * u_in(i,k-N_st) + enddo + u_in(k,k) = u_in(k,k) + 10.d0 + enddo + do k = 1, N_st_diag + call normalize(u_in(1,k), sze) + enddo + + do k = 1, N_st_diag + do i = 1, sze + U(i,k) = u_in(i,k) + enddo + enddo + + + do while (.not.converged) + itertot = itertot + 1 + if(itertot == 8) then + exit + endif + + do iter = 1, itermax-1 + + shift = N_st_diag*(iter-1) + shift2 = N_st_diag*iter + +! if( (iter > 1) .or. (itertot == 1) ) then + + ! Gram-Schmidt to orthogonalize all new guess with the previous vectors + call ortho_qr(U, size(U, 1), sze, shift2) + call ortho_qr(U, size(U, 1), sze, shift2) + + ! Compute |W_k> = \sum_i |i> + ! ----------------------------------- + + if( (sze > 100000) .and. distributed_davidson ) then + call H_u_0_nstates_zmq (W(1,shift+1), U(1,shift+1), N_st_diag, sze) + else + call H_u_0_nstates_openmp(W(1,shift+1), U(1,shift+1), N_st_diag, sze) + endif +! else +! ! Already computed in update below +! continue +! endif + + if(dressing_state > 0) then + + call dgemm( 'T', 'N', N_st, N_st_diag, sze, 1.d0 & + , psi_coef, size(psi_coef, 1), U(1, shift+1), size(U, 1) & + , 0.d0, u_tmp, size(u_tmp, 1)) + + do istate = 1, N_st_diag + do k = 1, N_st + do l = 1, N_st + f = overlap_states_inv(k,l) + do i = 1, sze + W(i,shift+istate) += f * dressing_delta(i,k) * u_tmp(l,istate) + enddo + enddo + enddo + enddo + + endif + + ! Compute h_kl = = + ! ------------------------------------------- + + call dgemm( 'T', 'N', shift2, shift2, sze, 1.d0 & + , U, size(U, 1), W, size(W, 1) & + , 0.d0, h, size(h, 1)) + + ! Diagonalize h + ! --------------- + call diag_nonsym_right(shift2, h(1,1), size(h, 1), y(1,1), size(y, 1), lambda(1), size(lambda, 1)) + + + if (state_following) then + + overlap = -1.d0 + do k = 1, shift2 + do i = 1, shift2 + overlap(k,i) = dabs(y(k,i)) + enddo + enddo + do k = 1, N_st + cmax = -1.d0 + do i = 1, N_st + if(overlap(i,k) > cmax) then + cmax = overlap(i,k) + order(k) = i + endif + enddo + do i = 1, N_st_diag + overlap(order(k),i) = -1.d0 + enddo + enddo + overlap = y + do k = 1, N_st + l = order(k) + if (k /= l) then + y(1:shift2,k) = overlap(1:shift2,l) + endif + enddo + do k = 1, N_st + overlap(k,1) = lambda(k) + enddo + + endif + + ! Express eigenvectors of h in the determinant basis + ! -------------------------------------------------- + + call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 & + , U, size(U, 1), y, size(y, 1) & + , 0.d0, U(1,shift2+1), size(U, 1)) + + do k = 1, N_st_diag + call normalize(U(1,shift2+k), sze) + enddo + + call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 & + , W, size(W, 1), y, size(y, 1) & + , 0.d0, W(1,shift2+1), size(W,1)) + + ! Compute residual vector and davidson step + ! ----------------------------------------- + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k) + do k = 1, N_st_diag + do i = 1, sze + U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k)) / max(H_jj(i)-lambda(k), 1.d-2) + enddo + + if(k <= N_st) then + residual_norm(k) = u_dot_u(U(1,shift2+k), sze) + to_print(1,k) = lambda(k) + nuclear_repulsion + to_print(2,k) = residual_norm(k) + endif + enddo + !$OMP END PARALLEL DO + + if((itertot>1).and.(iter == 1)) then + !don't print + continue + else + write(*, '(1X, I3, 1X, 100(1X, F16.10, 1X, E11.3))') iter-1, to_print(1:2,1:N_st) + endif + + ! Check convergence + if(iter > 1) then + if(threshold_davidson_from_pt2) then + converged = dabs(maxval(residual_norm(1:N_st))) < threshold_davidson_pt2 + else + converged = dabs(maxval(residual_norm(1:N_st))) < threshold_nonsym_davidson + endif + endif + + do k = 1, N_st + if(residual_norm(k) > 1.d8) then + print *, 'Davidson failed' + stop -1 + endif + enddo + if(converged) then + exit + endif + + logical, external :: qp_stop + if(qp_stop()) then + converged = .True. + exit + endif + + + enddo + + ! Re-contract U and update W + ! -------------------------------- + + call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 & + , W, size(W, 1), y, size(y, 1) & + , 0.d0, u_in, size(u_in, 1)) + do k = 1, N_st_diag + do i = 1, sze + W(i,k) = u_in(i,k) + enddo + enddo + + call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 & + , U, size(U, 1), y, size(y, 1), 0.d0 & + , u_in, size(u_in, 1)) + + do k = 1, N_st_diag + do i = 1, sze + U(i,k) = u_in(i,k) + enddo + enddo + + enddo + + + call nullify_small_elements(sze, N_st_diag, U, size(U, 1), threshold_davidson_pt2) + do k = 1, N_st_diag + do i = 1, sze + u_in(i,k) = U(i,k) + enddo + enddo + + do k = 1, N_st_diag + energies(k) = lambda(k) + enddo + write_buffer = '======' + do i = 1, N_st + write_buffer = trim(write_buffer)//' ================ ===========' + enddo + write(6,'(A)') trim(write_buffer) + write(6,'(A)') '' + call write_time(6) + + if(disk_based) then + ! Remove temp files + integer, external :: getUnitAndOpen + call munmap( (/int(sze,8),int(N_st_diag*itermax,8)/), 8, fd_w, ptr_w ) + fd_w = getUnitAndOpen(trim(ezfio_work_dir)//'davidson_w','r') + close(fd_w,status='delete') + else + deallocate(W) + endif + + deallocate ( & + residual_norm, & + U, overlap, & + h, y, s_tmp, & + lambda, & + u_tmp & + ) + FREE nthreads_davidson + +end subroutine davidson_diag_nonsym_hjj + +! --- + + + + + + + diff --git a/src/davidson/overlap_states.irp.f b/src/davidson/overlap_states.irp.f new file mode 100644 index 00000000..797d1210 --- /dev/null +++ b/src/davidson/overlap_states.irp.f @@ -0,0 +1,40 @@ + +! --- + + BEGIN_PROVIDER [ double precision, overlap_states, (N_states,N_states) ] +&BEGIN_PROVIDER [ double precision, overlap_states_inv, (N_states,N_states) ] + + BEGIN_DOC + ! + ! S_kl = ck.T x cl + ! = psi_coef(:,k).T x psi_coef(:,l) + ! + END_DOC + + implicit none + integer :: i + double precision :: o_tmp + + if(N_states == 1) then + + o_tmp = 0.d0 + do i = 1, N_det + o_tmp = o_tmp + psi_coef(i,1) * psi_coef(i,1) + enddo + overlap_states (1,1) = o_tmp + overlap_states_inv(1,1) = 1.d0 / o_tmp + + else + + call dgemm( 'T', 'N', N_states, N_states, N_det, 1.d0 & + , psi_coef, size(psi_coef, 1), psi_coef, size(psi_coef, 1) & + , 0.d0, overlap_states, size(overlap_states, 1) ) + + call get_inverse(overlap_states, N_states, N_states, overlap_states_inv, N_states) + + endif + +END_PROVIDER + +! --- + diff --git a/src/davidson_dressed/nonsym_diagonalize_ci.irp.f b/src/davidson_dressed/nonsym_diagonalize_ci.irp.f new file mode 100644 index 00000000..fa4b8b33 --- /dev/null +++ b/src/davidson_dressed/nonsym_diagonalize_ci.irp.f @@ -0,0 +1,188 @@ + +! --- + +BEGIN_PROVIDER [ double precision, CI_energy_nonsym_dressed, (N_states_diag) ] + + BEGIN_DOC + ! N_states lowest eigenvalues of the CI matrix + END_DOC + + implicit none + integer :: j + character*(8) :: st + + call write_time(6) + do j = 1, min(N_det, N_states_diag) + CI_energy_nonsym_dressed(j) = CI_electronic_energy_nonsym_dressed(j) + nuclear_repulsion + enddo + + do j = 1, min(N_det, N_states) + write(st, '(I4)') j + call write_double(6, CI_energy_nonsym_dressed(j), 'Energy of state '//trim(st)) + enddo + +END_PROVIDER + +! --- + + BEGIN_PROVIDER [ double precision, CI_electronic_energy_nonsym_dressed, (N_states_diag) ] +&BEGIN_PROVIDER [ double precision, CI_eigenvectors_nonsym_dressed, (N_det,N_states_diag) ] + + BEGIN_DOC + ! Eigenvectors/values of the CI matrix + END_DOC + + implicit none + logical :: converged + integer :: i, j, k + integer :: i_other_state + integer :: i_state + logical, allocatable :: good_state_array(:) + integer, allocatable :: index_good_state_array(:) + double precision, allocatable :: eigenvectors(:,:), eigenvalues(:) + + PROVIDE threshold_nonsym_davidson nthreads_davidson + + ! Guess values for the "N_states" states of the CI_eigenvectors_nonsym_dressed + do j = 1, min(N_states, N_det) + do i = 1, N_det + CI_eigenvectors_nonsym_dressed(i,j) = psi_coef(i,j) + enddo + enddo + + do j = min(N_states, N_det)+1, N_states_diag + do i = 1, N_det + CI_eigenvectors_nonsym_dressed(i,j) = 0.d0 + enddo + enddo + + ! --- + + if(diag_algorithm == "Davidson") then + + ASSERT(n_states_diag .lt. n_states) + + do j = 1, min(N_states, N_det) + do i = 1, N_det + CI_eigenvectors_nonsym_dressed(i,j) = psi_coef(i,j) + enddo + enddo + + converged = .False. + call davidson_diag_nonsym_h( psi_det, CI_eigenvectors_nonsym_dressed & + , size(CI_eigenvectors_nonsym_dressed, 1) & + , CI_electronic_energy_nonsym_dressed & + , N_det, min(N_det, N_states), min(N_det, N_states_diag), N_int, 1, converged ) + + else if(diag_algorithm == "Lapack") then + + allocate(eigenvectors(size(H_matrix_nonsym_dressed, 1),N_det)) + allocate(eigenvalues(N_det)) + + call diag_nonsym_right( N_det, H_matrix_nonsym_dressed, size(H_matrix_nonsym_dressed, 1) & + , eigenvectors, size(eigenvectors, 1), eigenvalues, size(eigenvalues, 1) ) + + CI_electronic_energy_nonsym_dressed(:) = 0.d0 + + ! Select the "N_states_diag" states of lowest energy + do j = 1, min(N_det, N_states_diag) + do i = 1, N_det + CI_eigenvectors_nonsym_dressed(i,j) = eigenvectors(i,j) + enddo + CI_electronic_energy_nonsym_dressed(j) = eigenvalues(j) + enddo + + deallocate(eigenvectors, eigenvalues) + + ! --- --- + + endif + + ! --- + +END_PROVIDER + +! --- + +subroutine diagonalize_CI_nonsym_dressed() + + BEGIN_DOC + ! Replace the coefficients of the CI states by the coefficients of the + ! eigenstates of the CI matrix + END_DOC + + implicit none + integer :: i, j + + PROVIDE dressing_delta + + do j = 1, N_states + do i = 1, N_det + psi_coef(i,j) = CI_eigenvectors_nonsym_dressed(i,j) + enddo + enddo + + SOFT_TOUCH psi_coef + +end subroutine diagonalize_CI_nonsym_dressed + +! --- + +BEGIN_PROVIDER [ double precision, H_matrix_nonsym_dressed, (N_det,N_det) ] + + BEGIN_DOC + ! Dressed H with Delta_ij + END_DOC + + implicit none + integer :: i, j, l, k + double precision :: f + + H_matrix_nonsym_dressed(1:N_det,1:N_det) = h_matrix_all_dets(1:N_det,1:N_det) + + if(N_states == 1) then + +! !symmetric formula +! l = dressed_column_idx(1) +! f = 1.0d0/psi_coef(l,1) +! do i=1,N_det +! h_matrix_nonsym_dressed(i,l) += dressing_column_h(i,1) *f +! h_matrix_nonsym_dressed(l,i) += dressing_column_h(i,1) *f +! enddo + +! l = dressed_column_idx(1) +! f = 1.0d0 / psi_coef(l,1) +! do j = 1, N_det +! H_matrix_nonsym_dressed(j,l) += f * dressing_delta(j,1) +! enddo + + k = 1 + l = 1 + f = overlap_states_inv(k,l) + do j = 1, N_det + do i = 1, N_det + H_matrix_nonsym_dressed(i,j) = H_matrix_nonsym_dressed(i,j) + f * dressing_delta(i,k) * psi_coef(j,l) + enddo + enddo + + else + + do k = 1, N_states + do l = 1, N_states + f = overlap_states_inv(k,l) + + do j = 1, N_det + do i = 1, N_det + H_matrix_nonsym_dressed(i,j) = H_matrix_nonsym_dressed(i,j) + f * dressing_delta(i,k) * psi_coef(j,l) + enddo + enddo + + enddo + enddo + + endif + +END_PROVIDER + +! --- + diff --git a/src/davidson_keywords/EZFIO.cfg b/src/davidson_keywords/EZFIO.cfg new file mode 100644 index 00000000..6337b96f --- /dev/null +++ b/src/davidson_keywords/EZFIO.cfg @@ -0,0 +1,54 @@ +[threshold_davidson] +type: Threshold +doc: Thresholds of Davidson's algorithm if threshold_davidson_from_pt2 is false. +interface: ezfio,provider,ocaml +default: 1.e-10 + +[threshold_nonsym_davidson] +type: Threshold +doc: Thresholds of non-symetric Davidson's algorithm +interface: ezfio,provider,ocaml +default: 1.e-10 + +[davidson_sze_max] +type: Strictly_positive_int +doc: Number of micro-iterations before re-contracting +default: 15 +interface: ezfio,provider,ocaml + +[state_following] +type: logical +doc: If |true|, the states are re-ordered to match the input states +default: False +interface: ezfio,provider,ocaml + +[disk_based_davidson] +type: logical +doc: If |true|, a memory-mapped file may be used to store the W and S2 vectors if not enough RAM is availabl +default: True +interface: ezfio,provider,ocaml + +[n_states_diag] +type: States_number +doc: Controls the number of states to consider during the Davdison diagonalization. The number of states is n_states * n_states_diag +default: 4 +interface: ezfio,ocaml + +[n_det_max_full] +type: Det_number_max +doc: Maximum number of determinants where |H| is fully diagonalized +interface: ezfio,provider,ocaml +default: 1000 + +[threshold_davidson_from_pt2] +type: logical +doc: Thresholds of Davidson's algorithm is set to E(rPT2)*threshold_davidson_from_pt2 +interface: ezfio,provider,ocaml +default: false + +[distributed_davidson] +type: logical +doc: If |true|, use the distributed algorithm +default: True +interface: ezfio,provider,ocaml + diff --git a/src/davidson_keywords/README.rst b/src/davidson_keywords/README.rst new file mode 100644 index 00000000..9567cdb1 --- /dev/null +++ b/src/davidson_keywords/README.rst @@ -0,0 +1,5 @@ +================= +davidson_keywords +================= + +Keywords used for Davidson algorithms. diff --git a/src/davidson/input.irp.f b/src/davidson_keywords/input.irp.f similarity index 79% rename from src/davidson/input.irp.f rename to src/davidson_keywords/input.irp.f index aba88ae9..4bd79036 100644 --- a/src/davidson/input.irp.f +++ b/src/davidson_keywords/input.irp.f @@ -1,3 +1,6 @@ + +! --- + BEGIN_PROVIDER [ integer, n_states_diag ] implicit none BEGIN_DOC @@ -8,11 +11,11 @@ BEGIN_PROVIDER [ integer, n_states_diag ] PROVIDE ezfio_filename if (mpi_master) then - call ezfio_has_davidson_n_states_diag(has) + call ezfio_has_davidson_keywords_n_states_diag(has) if (has) then - call ezfio_get_davidson_n_states_diag(n_states_diag) + call ezfio_get_davidson_keywords_n_states_diag(n_states_diag) else - print *, 'davidson/n_states_diag not found in EZFIO file' + print *, 'davidson_keywords/n_states_diag not found in EZFIO file' stop 1 endif n_states_diag = max(2,N_states * N_states_diag) @@ -37,3 +40,4 @@ BEGIN_PROVIDER [ integer, n_states_diag ] END_PROVIDER +! --- diff --git a/src/davidson_keywords/usef.irp.f b/src/davidson_keywords/usef.irp.f new file mode 100644 index 00000000..fed2ba9b --- /dev/null +++ b/src/davidson_keywords/usef.irp.f @@ -0,0 +1,33 @@ +use bitmasks +use f77_zmq + + +! --- + +BEGIN_PROVIDER [ integer, nthreads_davidson ] + implicit none + BEGIN_DOC + ! Number of threads for Davidson + END_DOC + nthreads_davidson = nproc + character*(32) :: env + call getenv('QP_NTHREADS_DAVIDSON',env) + if (trim(env) /= '') then + read(env,*) nthreads_davidson + call write_int(6,nthreads_davidson,'Target number of threads for ') + endif +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, threshold_davidson_pt2 ] + implicit none + BEGIN_DOC + ! Threshold of Davidson's algorithm, using PT2 as a guide + END_DOC + threshold_davidson_pt2 = threshold_davidson + +END_PROVIDER + +! --- + diff --git a/src/davidson_undressed/null_dressing_vector.irp.f b/src/davidson_undressed/null_dressing_vector.irp.f index faffe964..1989bb6d 100644 --- a/src/davidson_undressed/null_dressing_vector.irp.f +++ b/src/davidson_undressed/null_dressing_vector.irp.f @@ -1,10 +1,12 @@ BEGIN_PROVIDER [ double precision, dressing_column_h, (N_det,N_states) ] &BEGIN_PROVIDER [ double precision, dressing_column_s, (N_det,N_states) ] +&BEGIN_PROVIDER [ double precision, dressing_delta , (N_det,N_states) ] implicit none BEGIN_DOC ! Null dressing vectors END_DOC dressing_column_h(:,:) = 0.d0 dressing_column_s(:,:) = 0.d0 + dressing_delta (:,:) = 0.d0 END_PROVIDER diff --git a/src/determinants/spindeterminants.ezfio_config b/src/determinants/spindeterminants.ezfio_config index 39ccb82b..4fe1333a 100644 --- a/src/determinants/spindeterminants.ezfio_config +++ b/src/determinants/spindeterminants.ezfio_config @@ -9,8 +9,11 @@ spindeterminants psi_det_beta integer*8 (spindeterminants_n_int*spindeterminants_bit_kind/8,spindeterminants_n_det_beta) psi_coef_matrix_rows integer (spindeterminants_n_det) psi_coef_matrix_columns integer (spindeterminants_n_det) - psi_coef_matrix_values double precision (spindeterminants_n_det,spindeterminants_n_states) + psi_coef_matrix_values double precision (spindeterminants_n_det,spindeterminants_n_states) + psi_left_coef_matrix_values double precision (spindeterminants_n_det,spindeterminants_n_states) n_svd_coefs integer + n_svd_alpha integer + n_svd_beta integer psi_svd_alpha double precision (spindeterminants_n_det_alpha,spindeterminants_n_svd_coefs,spindeterminants_n_states) psi_svd_beta double precision (spindeterminants_n_det_beta,spindeterminants_n_svd_coefs,spindeterminants_n_states) psi_svd_coefs double precision (spindeterminants_n_svd_coefs,spindeterminants_n_states) diff --git a/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.pouet b/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f similarity index 100% rename from src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.pouet rename to src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f From 002371f29c0040a740325829984915c755b34cf1 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Thu, 16 Mar 2023 09:01:34 +0100 Subject: [PATCH 003/337] save left_part for qmcchem correctly --- src/tc_bi_ortho/psi_left_qmc.irp.f | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/tc_bi_ortho/psi_left_qmc.irp.f b/src/tc_bi_ortho/psi_left_qmc.irp.f index 25048f82..4e3b8e86 100644 --- a/src/tc_bi_ortho/psi_left_qmc.irp.f +++ b/src/tc_bi_ortho/psi_left_qmc.irp.f @@ -17,6 +17,8 @@ BEGIN_PROVIDER [ double precision, psi_bitcleft_bilinear_matrix_values, (N_det, implicit none integer :: k, l + !print *, ' providing psi_bitcleft_bilinear_matrix_values' + if(N_det .eq. 1) then do l = 1, N_states @@ -38,6 +40,8 @@ BEGIN_PROVIDER [ double precision, psi_bitcleft_bilinear_matrix_values, (N_det, endif + !print *, ' psi_bitcleft_bilinear_matrix_values OK' + END_PROVIDER ! --- From e710d2623765dce23896f576cb0b1a37b393a9f1 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 17 Mar 2023 19:23:07 +0100 Subject: [PATCH 004/337] Improve I/O on TC integrals --- src/non_h_ints_mu/grad_squared.irp.f | 70 ++++++--------- src/non_h_ints_mu/grad_squared_manu.irp.f | 76 +++++++--------- src/non_h_ints_mu/new_grad_tc.irp.f | 28 ++---- src/non_h_ints_mu/new_grad_tc_manu.irp.f | 103 ++++++++-------------- src/tc_keywords/EZFIO.cfg | 34 +++---- 5 files changed, 118 insertions(+), 193 deletions(-) diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f index 7925fa7c..1fd39f6a 100644 --- a/src/non_h_ints_mu/grad_squared.irp.f +++ b/src/non_h_ints_mu/grad_squared.irp.f @@ -2,7 +2,7 @@ ! --- ! TODO : strong optmization : write the loops in a different way -! : for each couple of AO, the gaussian product are done once for all +! : for each couple of AO, the gaussian product are done once for all BEGIN_PROVIDER [ double precision, gradu_squared_u_ij_mu, (ao_num, ao_num, n_points_final_grid) ] @@ -20,14 +20,14 @@ BEGIN_PROVIDER [ double precision, gradu_squared_u_ij_mu, (ao_num, ao_num, n_poi ! gradu_squared_u_ij_mu = -0.50 x \int r2 \phi_i(2) \phi_j(2) [ v1^2 v2^2 ((grad_1 u12)^2 + (grad_2 u12^2)]) + u12^2 v2^2 (grad_1 v1)^2 + 2 u12 v1 v2^2 (grad_1 u12) . (grad_1 v1) ] ! = -0.25 x v1^2 \int r2 \phi_i(2) \phi_j(2) [1 - erf(mu r12)]^2 v2^2 ! + -0.50 x (grad_1 v1)^2 \int r2 \phi_i(2) \phi_j(2) u12^2 v2^2 - ! + -1.00 x v1 (grad_1 v1) \int r2 \phi_i(2) \phi_j(2) (grad_1 u12) v2^2 + ! + -1.00 x v1 (grad_1 v1) \int r2 \phi_i(2) \phi_j(2) (grad_1 u12) v2^2 ! = v1^2 x int2_grad1u2_grad2u2_j1b2 ! + -0.5 x (grad_1 v1)^2 x int2_u2_j1b2 ! + -1.0 X V1 x (grad_1 v1) \cdot [ int2_u_grad1u_j1b2 x r - int2_u_grad1u_x_j1b ] ! ! END_DOC - + implicit none integer :: ipoint, i, j, m, igauss double precision :: x, y, z, r(3), delta, coef @@ -100,7 +100,7 @@ BEGIN_PROVIDER [ double precision, gradu_squared_u_ij_mu, (ao_num, ao_num, n_poi call wall_time(time1) print*, ' Wall time for gradu_squared_u_ij_mu = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- @@ -151,7 +151,7 @@ END_PROVIDER ! ! deallocate(ac_mat) ! -!END_PROVIDER +!END_PROVIDER ! --- @@ -214,12 +214,12 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_loop, (ao_num, ao_num, ao_nu call wall_time(time1) print*, ' Wall time for tc_grad_square_ao_loop = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- BEGIN_PROVIDER [ double precision, grad12_j12, (ao_num, ao_num, n_points_final_grid) ] - + implicit none integer :: ipoint, i, j, m, igauss double precision :: r(3), delta, coef @@ -267,7 +267,7 @@ BEGIN_PROVIDER [ double precision, grad12_j12, (ao_num, ao_num, n_points_final_g call wall_time(time1) print*, ' Wall time for grad12_j12 = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- @@ -297,12 +297,12 @@ BEGIN_PROVIDER [ double precision, u12sq_j1bsq, (ao_num, ao_num, n_points_final_ call wall_time(time1) print*, ' Wall time for u12sq_j1bsq = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b, (ao_num, ao_num, n_points_final_grid) ] - + implicit none integer :: ipoint, i, j, m, igauss double precision :: x, y, z @@ -347,7 +347,7 @@ BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b, (ao_num, ao_num, call wall_time(time1) print*, ' Wall time for u12_grad1_u12_j1b_grad1_j1b = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- @@ -370,26 +370,18 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao if(read_tc_integ) then - open(unit=11, form="unformatted", file='tc_grad_square_ao', action="read") - do i = 1, ao_num - do j = 1, ao_num - do k = 1, ao_num - do l = 1, ao_num - read(11) tc_grad_square_ao(l,k,j,i) - enddo - enddo - enddo - enddo + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_square_ao', action="read") + read(11) tc_grad_square_ao close(11) else allocate(b_mat(n_points_final_grid,ao_num,ao_num), tmp(ao_num,ao_num,n_points_final_grid)) - + b_mat = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint) & + !$OMP PRIVATE (i, k, ipoint) & !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) !$OMP DO SCHEDULE (static) do i = 1, ao_num @@ -401,11 +393,11 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao enddo !$OMP END DO !$OMP END PARALLEL - + tmp = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (j, l, ipoint) & + !$OMP PRIVATE (j, l, ipoint) & !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq, u12_grad1_u12_j1b_grad1_j1b, grad12_j12) !$OMP DO SCHEDULE (static) do ipoint = 1, n_points_final_grid @@ -417,25 +409,25 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao enddo !$OMP END DO !$OMP END PARALLEL - + tc_grad_square_ao = 0.d0 call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & , tmp(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & , 1.d0, tc_grad_square_ao, ao_num*ao_num) deallocate(tmp, b_mat) - + call sum_A_At(tc_grad_square_ao(1,1,1,1), ao_num*ao_num) - + !!$OMP PARALLEL & !!$OMP DEFAULT (NONE) & - !!$OMP PRIVATE (i, j, k, l) & + !!$OMP PRIVATE (i, j, k, l) & !!$OMP SHARED (ac_mat, tc_grad_square_ao, ao_num) !!$OMP DO SCHEDULE (static) ! do j = 1, ao_num ! do l = 1, ao_num ! do i = 1, ao_num ! do k = 1, ao_num - ! tc_grad_square_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) + ! tc_grad_square_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) ! enddo ! enddo ! enddo @@ -444,23 +436,17 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao !!$OMP END PARALLEL endif - if(write_tc_integ) then - open(unit=11, form="unformatted", file='tc_grad_square_ao', action="write") - do i = 1, ao_num - do j = 1, ao_num - do k = 1, ao_num - do l = 1, ao_num - write(11) tc_grad_square_ao(l,k,j,i) - enddo - enddo - enddo - enddo + if(write_tc_integ.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_square_ao', action="write") + call ezfio_set_work_empty(.False.) + write(11) tc_grad_square_ao close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') endif call wall_time(time1) print*, ' Wall time for tc_grad_square_ao = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- diff --git a/src/non_h_ints_mu/grad_squared_manu.irp.f b/src/non_h_ints_mu/grad_squared_manu.irp.f index cb9e15c4..66f3c693 100644 --- a/src/non_h_ints_mu/grad_squared_manu.irp.f +++ b/src/non_h_ints_mu/grad_squared_manu.irp.f @@ -17,29 +17,21 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu call wall_time(time0) if(read_tc_integ) then - - open(unit=11, form="unformatted", file='tc_grad_square_ao_test', action="read") - do i = 1, ao_num - do j = 1, ao_num - do k = 1, ao_num - do l = 1, ao_num - read(11) tc_grad_square_ao_test(l,k,j,i) - enddo - enddo - enddo - enddo + + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_square_ao_test', action="read") + read(11) tc_grad_square_ao_test close(11) else provide u12sq_j1bsq_test u12_grad1_u12_j1b_grad1_j1b_test grad12_j12_test - + allocate(b_mat(n_points_final_grid,ao_num,ao_num), tmp(ao_num,ao_num,n_points_final_grid)) - + b_mat = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint) & + !$OMP PRIVATE (i, k, ipoint) & !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) !$OMP DO SCHEDULE (static) do i = 1, ao_num @@ -51,11 +43,11 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu enddo !$OMP END DO !$OMP END PARALLEL - + tmp = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (j, l, ipoint) & + !$OMP PRIVATE (j, l, ipoint) & !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq_test, u12_grad1_u12_j1b_grad1_j1b_test, grad12_j12_test) !$OMP DO SCHEDULE (static) do ipoint = 1, n_points_final_grid @@ -67,23 +59,23 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu enddo !$OMP END DO !$OMP END PARALLEL - + tc_grad_square_ao_test = 0.d0 call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & , tmp(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & , 1.d0, tc_grad_square_ao_test, ao_num*ao_num) deallocate(tmp, b_mat) - + call sum_A_At(tc_grad_square_ao_test(1,1,1,1), ao_num*ao_num) !do i = 1, ao_num ! do j = 1, ao_num ! do k = i, ao_num - + ! do l = max(j,k), ao_num ! tc_grad_square_ao_test(i,j,k,l) = 0.5d0 * (tc_grad_square_ao_test(i,j,k,l) + tc_grad_square_ao_test(k,l,i,j)) ! tc_grad_square_ao_test(k,l,i,j) = tc_grad_square_ao_test(i,j,k,l) ! end do - + ! !if (j.eq.k) then ! ! do l = j+1, ao_num ! ! tc_grad_square_ao_test(i,j,k,l) = 0.5d0 * (tc_grad_square_ao_test(i,j,k,l) + tc_grad_square_ao_test(k,l,i,j)) @@ -95,14 +87,14 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu ! ! tc_grad_square_ao_test(k,l,i,j) = tc_grad_square_ao_test(i,j,k,l) ! ! enddo ! !endif - + ! enddo ! enddo !enddo !tc_grad_square_ao_test = 2.d0 * tc_grad_square_ao_test ! !$OMP PARALLEL & ! !$OMP DEFAULT (NONE) & - ! !$OMP PRIVATE (i, j, k, l) & + ! !$OMP PRIVATE (i, j, k, l) & ! !$OMP SHARED (tc_grad_square_ao_test, ao_num) ! !$OMP DO SCHEDULE (static) ! integer :: ii @@ -121,10 +113,10 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu ! print *, ' ii =', ii ! !$OMP END DO ! !$OMP END PARALLEL - + ! !$OMP PARALLEL & ! !$OMP DEFAULT (NONE) & - ! !$OMP PRIVATE (i, j, k, l) & + ! !$OMP PRIVATE (i, j, k, l) & ! !$OMP SHARED (tc_grad_square_ao_test, ao_num) ! !$OMP DO SCHEDULE (static) ! do j = 1, ao_num @@ -144,24 +136,18 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu endif - if(write_tc_integ) then - open(unit=11, form="unformatted", file='tc_grad_square_ao_test', action="write") - do i = 1, ao_num - do j = 1, ao_num - do k = 1, ao_num - do l = 1, ao_num - write(11) tc_grad_square_ao_test(l,k,j,i) - enddo - enddo - enddo - enddo + if(write_tc_integ.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_square_ao_test', action="write") + call ezfio_set_work_empty(.False.) + write(11) tc_grad_square_ao_test close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') endif call wall_time(time1) print*, ' Wall time for tc_grad_square_ao_test = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- @@ -189,7 +175,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test_ref, (ao_num, ao_num, a b_mat = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint) & + !$OMP PRIVATE (i, k, ipoint) & !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) !$OMP DO SCHEDULE (static) do i = 1, ao_num @@ -205,7 +191,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test_ref, (ao_num, ao_num, a tmp = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (j, l, ipoint) & + !$OMP PRIVATE (j, l, ipoint) & !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq_test, u12_grad1_u12_j1b_grad1_j1b_test, grad12_j12_test) !$OMP DO SCHEDULE (static) do ipoint = 1, n_points_final_grid @@ -226,7 +212,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test_ref, (ao_num, ao_num, a !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, j, k, l) & + !$OMP PRIVATE (i, j, k, l) & !$OMP SHARED (ac_mat, tc_grad_square_ao_test_ref, ao_num) !$OMP DO SCHEDULE (static) do j = 1, ao_num @@ -246,7 +232,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test_ref, (ao_num, ao_num, a call wall_time(time1) print*, ' Wall time for tc_grad_square_ao_test_ref = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- @@ -276,12 +262,12 @@ BEGIN_PROVIDER [ double precision, u12sq_j1bsq_test, (ao_num, ao_num, n_points_f call wall_time(time1) print*, ' Wall time for u12sq_j1bsq_test = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b_test, (ao_num, ao_num, n_points_final_grid) ] - + implicit none integer :: ipoint, i, j, m, igauss double precision :: x, y, z @@ -328,12 +314,12 @@ BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b_test, (ao_num, ao call wall_time(time1) print*, ' Wall time for u12_grad1_u12_j1b_grad1_j1b_test = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- BEGIN_PROVIDER [ double precision, grad12_j12_test, (ao_num, ao_num, n_points_final_grid) ] - + implicit none integer :: ipoint, i, j, m, igauss double precision :: r(3), delta, coef @@ -381,7 +367,7 @@ BEGIN_PROVIDER [ double precision, grad12_j12_test, (ao_num, ao_num, n_points_fi call wall_time(time1) print*, ' Wall time for grad12_j12_test = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- diff --git a/src/non_h_ints_mu/new_grad_tc.irp.f b/src/non_h_ints_mu/new_grad_tc.irp.f index a15f690a..754e1240 100644 --- a/src/non_h_ints_mu/new_grad_tc.irp.f +++ b/src/non_h_ints_mu/new_grad_tc.irp.f @@ -36,16 +36,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_ if(read_tc_integ) then - open(unit=11, form="unformatted", file='int2_grad1_u12_ao', action="read") - do m = 1, 3 - do ipoint = 1, n_points_final_grid - do j = 1, ao_num - do i = 1, ao_num - read(11) int2_grad1_u12_ao(i,j,ipoint,m) - enddo - enddo - enddo - enddo + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="read") + read(11) int2_grad1_u12_ao close(11) else @@ -89,18 +81,12 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_ endif - if(write_tc_integ) then - open(unit=11, form="unformatted", file='int2_grad1_u12_ao', action="write") - do m = 1, 3 - do ipoint = 1, n_points_final_grid - do j = 1, ao_num - do i = 1, ao_num - write(11) int2_grad1_u12_ao(i,j,ipoint,m) - enddo - enddo - enddo - enddo + if(write_tc_integ.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="write") + call ezfio_set_work_empty(.False.) + write(11) int2_grad1_u12_ao close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') endif call wall_time(time1) diff --git a/src/non_h_ints_mu/new_grad_tc_manu.irp.f b/src/non_h_ints_mu/new_grad_tc_manu.irp.f index 47b05e52..901e3048 100644 --- a/src/non_h_ints_mu/new_grad_tc_manu.irp.f +++ b/src/non_h_ints_mu/new_grad_tc_manu.irp.f @@ -3,7 +3,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (ao_num, ao_num, n_po BEGIN_DOC ! - ! int2_grad1_u12_ao_test(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) + ! int2_grad1_u12_ao_test(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) ! ! where r1 = r(ipoint) ! @@ -15,9 +15,9 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (ao_num, ao_num, n_po ! if J(r1,r2) = u12 x v1 x v2 ! ! int2_grad1_u12_ao_test(i,j,ipoint,:) = v1 x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ] - ! - \grad_1 v1 x [ \int dr2 u12 v2 \phi_i(r2) \phi_j(r2) ] - ! = 0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) - ! - 0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) + ! - \grad_1 v1 x [ \int dr2 u12 v2 \phi_i(r2) \phi_j(r2) ] + ! = 0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) + ! - 0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) ! - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint) ! ! @@ -35,25 +35,18 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (ao_num, ao_num, n_po if(read_tc_integ) then - open(unit=11, form="unformatted", file='int2_grad1_u12_ao_test', action="read") - do m = 1, 3 - do ipoint = 1, n_points_final_grid - do j = 1, ao_num - do i = 1, ao_num - read(11) int2_grad1_u12_ao_test(i,j,ipoint,m) - enddo - enddo - enddo - enddo + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao_test', action="read") + read(11) int2_grad1_u12_ao_test close(11) + else - + if(j1b_type .eq. 3) then do ipoint = 1, n_points_final_grid x = final_grid_points(1,ipoint) y = final_grid_points(2,ipoint) - z = final_grid_points(3,ipoint) + z = final_grid_points(3,ipoint) tmp0 = 0.5d0 * v_1b(ipoint) tmp_x = v_1b_grad(1,ipoint) tmp_y = v_1b_grad(2,ipoint) @@ -87,24 +80,18 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (ao_num, ao_num, n_po endif - if(write_tc_integ) then - open(unit=11, form="unformatted", file='int2_grad1_u12_ao_test', action="write") - do m = 1, 3 - do ipoint = 1, n_points_final_grid - do j = 1, ao_num - do i = 1, ao_num - write(11) int2_grad1_u12_ao_test(i,j,ipoint,m) - enddo - enddo - enddo - enddo + if(write_tc_integ.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao_test', action="write") + call ezfio_set_work_empty(.False.) + write(11) int2_grad1_u12_ao_test close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') endif call wall_time(time1) print*, ' Wall time for int2_grad1_u12_ao_test = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- @@ -114,9 +101,9 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_ ! ! tc_grad_and_lapl_ao_test(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) | ij > ! - ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) + ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) ! - ! This is obtained by integration by parts. + ! This is obtained by integration by parts. ! END_DOC @@ -131,40 +118,32 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_ call wall_time(time0) if(read_tc_integ) then - - open(unit=11, form="unformatted", file='tc_grad_and_lapl_ao_test', action="read") - do i = 1, ao_num - do j = 1, ao_num - do k = 1, ao_num - do l = 1, ao_num - read(11) tc_grad_and_lapl_ao_test(l,k,j,i) - enddo - enddo - enddo - enddo + + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_and_lapl_ao_test', action="read") + read(11) tc_grad_and_lapl_ao_test close(11) else - provide int2_grad1_u12_ao_test - + provide int2_grad1_u12_ao_test + allocate(b_mat(n_points_final_grid,ao_num,ao_num,3), ac_mat(ao_num,ao_num,ao_num,ao_num)) - + b_mat = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & - !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & + !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & + !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & !$OMP ao_num, n_points_final_grid, final_weight_at_r_vector) !$OMP DO SCHEDULE (static) do i = 1, ao_num do k = 1, ao_num do ipoint = 1, n_points_final_grid - + weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) ao_i_r = aos_in_r_array_transp(ipoint,i) ao_k_r = aos_in_r_array_transp(ipoint,k) - + b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) @@ -173,19 +152,19 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_ enddo !$OMP END DO !$OMP END PARALLEL - + ac_mat = 0.d0 do m = 1, 3 call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & , int2_grad1_u12_ao_test(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid & , 1.d0, ac_mat, ao_num*ao_num) - + enddo deallocate(b_mat) - + !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, j, k, l) & + !$OMP PRIVATE (i, j, k, l) & !$OMP SHARED (ac_mat, tc_grad_and_lapl_ao_test, ao_num) !$OMP DO SCHEDULE (static) do j = 1, ao_num @@ -199,29 +178,23 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_ enddo !$OMP END DO !$OMP END PARALLEL - + deallocate(ac_mat) endif - if(write_tc_integ) then - open(unit=11, form="unformatted", file='tc_grad_and_lapl_ao_test', action="write") - do i = 1, ao_num - do j = 1, ao_num - do k = 1, ao_num - do l = 1, ao_num - write(11) tc_grad_and_lapl_ao_test(l,k,j,i) - enddo - enddo - enddo - enddo + if(write_tc_integ.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_and_lapl_ao_test', action="write") + call ezfio_set_work_empty(.False.) + write(11) tc_grad_and_lapl_ao_test close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') endif call wall_time(time1) print*, ' Wall time for tc_grad_and_lapl_ao_test = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index e397e700..3f34c088 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -6,7 +6,7 @@ default: False [comp_left_eigv] type: logical -doc: If |true|, computes also the left-eigenvector +doc: If |true|, computes also the left-eigenvector interface: ezfio,provider,ocaml default: False @@ -14,7 +14,7 @@ default: False type: logical doc: If |true|, three-body terms are included interface: ezfio,provider,ocaml -default: True +default: True [pure_three_body_h_tc] type: logical @@ -30,13 +30,13 @@ default: False [core_tc_op] type: logical -doc: If |true|, takes the usual Hamiltonian for core orbitals (assumed to be doubly occupied) +doc: If |true|, takes the usual Hamiltonian for core orbitals (assumed to be doubly occupied) interface: ezfio,provider,ocaml default: False [full_tc_h_solver] type: logical -doc: If |true|, you diagonalize the full TC H matrix +doc: If |true|, you diagonalize the full TC H matrix interface: ezfio,provider,ocaml default: False @@ -60,11 +60,11 @@ default: 0.000005 [thresh_psi_r_norm] type: logical -doc: If |true|, you prune the WF to compute the PT1 coef based on the norm. If False, the pruning is done through the amplitude on the right-coefficient. +doc: If |true|, you prune the WF to compute the PT1 coef based on the norm. If False, the pruning is done through the amplitude on the right-coefficient. interface: ezfio,provider,ocaml default: False -[state_following_tc] +[state_following_tc] type: logical doc: If |true|, the states are re-ordered to match the input states default: False @@ -78,7 +78,7 @@ default: True [symetric_fock_tc] type: logical -doc: If |true|, using F+F^t as Fock TC +doc: If |true|, using F+F^t as Fock TC interface: ezfio,provider,ocaml default: False @@ -126,7 +126,7 @@ default: 1.e-6 [maxovl_tc] type: logical -doc: If |true|, maximize the overlap between orthogonalized left- and right eigenvectors +doc: If |true|, maximize the overlap between orthogonalized left- and right eigenvectors interface: ezfio,provider,ocaml default: False @@ -152,7 +152,7 @@ default: 0. type: character*(32) doc: Type of TCSCF algorithm used. Possible choices are [Simple | DIIS] interface: ezfio,provider,ocaml -default: Simple +default: DIIS [im_thresh_tcscf] type: Threshold @@ -180,21 +180,15 @@ default: 1.e-6 [var_tc] type: logical -doc: If |true|, use VAR-TC +doc: If |true|, use VAR-TC interface: ezfio,provider,ocaml default: False -[read_tc_integ] -type: logical -doc: If |true|, read integrals: int2_grad1_u12_ao, tc_grad_square_ao and tc_grad_and_lapl_ao +[io_tc_integ] +type: Disk_access +doc: Read/Write integrals int2_grad1_u12_ao, tc_grad_square_ao and tc_grad_and_lapl_ao from/to disk [ Write | Read | None ] interface: ezfio,provider,ocaml -default: False - -[write_tc_integ] -type: logical -doc: If |true|, write integrals: int2_grad1_u12_ao, tc_grad_square_ao and tc_grad_and_lapl_ao -interface: ezfio,provider,ocaml -default: False +default: None [debug_tc_pt2] type: integer From ba447be2e8627df7cbafba7e0ccbac23c2071552 Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 23 Mar 2023 11:12:02 +0100 Subject: [PATCH 005/337] added sort_wf --- src/tools/sort_wf.irp.f | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 src/tools/sort_wf.irp.f diff --git a/src/tools/sort_wf.irp.f b/src/tools/sort_wf.irp.f new file mode 100644 index 00000000..2e0014ea --- /dev/null +++ b/src/tools/sort_wf.irp.f @@ -0,0 +1,7 @@ +program sort_wf + implicit none + read_wf = .true. + touch read_wf + call save_wavefunction + +end From 2ef2e8044d338c6c823d7f965f6a62a0c55640b8 Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 23 Mar 2023 11:24:26 +0100 Subject: [PATCH 006/337] added sort_wf.irp.f --- external/qp2-dependencies | 2 +- src/tools/sort_wf.irp.f | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/external/qp2-dependencies b/external/qp2-dependencies index ce14f57b..6e23ebac 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit ce14f57b50511825a9fedb096749200779d3f4d4 +Subproject commit 6e23ebac001acae91d1c762ca934e09a9b7d614a diff --git a/src/tools/sort_wf.irp.f b/src/tools/sort_wf.irp.f index 2e0014ea..038b24c8 100644 --- a/src/tools/sort_wf.irp.f +++ b/src/tools/sort_wf.irp.f @@ -2,6 +2,19 @@ program sort_wf implicit none read_wf = .true. touch read_wf - call save_wavefunction + call routine + +end + +subroutine routine + implicit none + integer :: i + character*(128) :: output + integer :: i_unit_output,getUnitAndOpen + output=trim(ezfio_filename)//'.wf_sorted' + i_unit_output = getUnitAndOpen(output,'w') + do i = 1, N_det + write(i_unit_output, *)i,dabs(psi_coef_sorted(i,1)) + enddo end From 7aee93997a9e98a6ebe8798a8e3a00ca10f1119d Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 23 Mar 2023 13:08:09 +0100 Subject: [PATCH 007/337] added Hn.py --- scripts/Hn.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 scripts/Hn.py diff --git a/scripts/Hn.py b/scripts/Hn.py new file mode 100644 index 00000000..a4119145 --- /dev/null +++ b/scripts/Hn.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python +import sys +from math import * +arg = sys.argv +#f = open('data_dft','r') +n = int(sys.argv[1]) +r = float(sys.argv[2]) +f = open('H'+str(n)+'_'+str(r),'w') +string=str(n)+"\n" +f.write(string) +string="\n" +f.write(string) +for i in range(n): + x = r * cos(2.* i* pi/n) + y = r * sin(2.* i* pi/n) + z = 0. + string="H "+str(x)+" "+str(y)+" "+str(z)+"\n" + f.write(string) + +#lines = f.readlines() +#cipsi_dft= [] +# +#dissoc = [] +#dissoc.append(float(-76.0179223470363)) +#dissoc.append(float(-76.0592367866993)) +#dissoc.append(float(-76.0678739715659)) +#delta_e = [] +# +#for line in lines: +# data = line.split() +# if(len(data)>0): +# dft=float(data[1]) +# fci=float(data[2]) +# e=fci+dft +# cipsi_dft.append(e) +# +#print(*cipsi_dft,sep=" & ") +# +#for i in 0,1,2: +# delta_e.append(1000.*(dissoc[i] - cipsi_dft[i])) +# +#print(*delta_e,sep=" & ") +# From 436b8815807c3ad04dc5035244c6ed9e7c4f2580 Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 28 Mar 2023 11:21:19 +0200 Subject: [PATCH 008/337] added thresh_cycle --- src/ao_many_one_e_ints/grad2_jmu_manu.irp.f | 48 ++-- .../grad_lapl_jmu_manu.irp.f | 46 ++-- src/ao_many_one_e_ints/listj1b_sorted.irp.f | 42 ++-- src/non_h_ints_mu/total_tc_int.irp.f | 19 ++ src/tc_keywords/EZFIO.cfg | 6 + src/tc_scf/tc_scf.irp.f | 4 +- src/tc_scf/test_int.irp.f | 214 +++++++++++------- src/tools/sort_wf.irp.f | 2 +- 8 files changed, 212 insertions(+), 169 deletions(-) diff --git a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f index 8e253d75..14170ede 100644 --- a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f +++ b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f @@ -38,7 +38,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo, & !$OMP List_comb_thr_b3_cent, int2_grad1u2_grad2u2_j1b2_test, ao_abs_comb_b3_j1b, & - !$OMP ao_overlap_abs,sq_pi_3_2) + !$OMP ao_overlap_abs,sq_pi_3_2,thrsh_cycle_tc) !$OMP DO SCHEDULE(dynamic) do ipoint = 1, n_points_final_grid r(1) = final_grid_points(1,ipoint) @@ -46,7 +46,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n r(3) = final_grid_points(3,ipoint) do i = 1, ao_num do j = i, ao_num - if(ao_overlap_abs(j,i) .lt. 1.d-12) then + if(ao_overlap_abs(j,i) .lt. thrsh_cycle_tc) then cycle endif @@ -58,7 +58,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n do i_fit = 1, ng_fit_jast expo_fit = expo_gauss_1_erf_x_2(i_fit) coef_fit = -0.25d0 * coef_gauss_1_erf_x_2(i_fit) - if(dabs(coef_fit*int_j1b*sq_pi_3_2*(expo_fit)**(-1.5d0)).lt.1.d-10)cycle +! if(dabs(coef_fit*int_j1b*sq_pi_3_2*(expo_fit)**(-1.5d0)).lt.thrsh_cycle_tc)cycle int_gauss = overlap_gauss_r12_ao(r, expo_fit, i, j) int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) += coef_fit * int_gauss enddo @@ -81,8 +81,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n !DIR$ FORCEINLINE call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s) coef_fit = -0.25d0 * coef_gauss_1_erf_x_2(i_fit) * coef -! if(dabs(coef_fit*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version - if(dabs(coef_fit*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-1.5d0)).lt.1.d-10)cycle +! if(dabs(coef_fit*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-1.5d0)).lt.thrsh_cycle_tc)cycle ! call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, & ! expo_fit, i, j, int_fit_v, n_points_final_grid) int_gauss = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) @@ -145,14 +144,14 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_v, (ao_num, ao !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo, & !$OMP List_comb_thr_b3_cent, big_array,& - !$OMP ao_abs_comb_b3_j1b,ao_overlap_abs) + !$OMP ao_abs_comb_b3_j1b,ao_overlap_abs,thrsh_cycle_tc) ! allocate(int_fit_v(n_points_final_grid)) !$OMP DO SCHEDULE(dynamic) do i = 1, ao_num do j = i, ao_num - if(ao_overlap_abs(j,i) .lt. 1.d-12) then + if(ao_overlap_abs(j,i) .lt. thrsh_cycle_tc) then cycle endif @@ -161,7 +160,6 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_v, (ao_num, ao coef = List_comb_thr_b3_coef (i_1s,j,i) beta = List_comb_thr_b3_expo (i_1s,j,i) int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i) -! if(dabs(coef)*dabs(int_j1b).lt.1.d-15)cycle B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i) @@ -243,7 +241,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_ !$OMP final_grid_points, ng_fit_jast, & !$OMP expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2, & !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo,sq_pi_3_2, & - !$OMP List_comb_thr_b3_cent, int2_u2_j1b2_test,ao_abs_comb_b3_j1b) + !$OMP List_comb_thr_b3_cent, int2_u2_j1b2_test,ao_abs_comb_b3_j1b,thrsh_cycle_tc) !$OMP DO do ipoint = 1, n_points_final_grid r(1) = final_grid_points(1,ipoint) @@ -260,11 +258,11 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_ ! --- --- --- int_j1b = ao_abs_comb_b3_j1b(1,j,i) - if(dabs(int_j1b).lt.1.d-10) cycle + if(dabs(int_j1b).lt.thrsh_cycle_tc) cycle do i_fit = 1, ng_fit_jast expo_fit = expo_gauss_j_mu_x_2(i_fit) coef_fit = coef_gauss_j_mu_x_2(i_fit) - if(dabs(coef_fit*int_j1b*sq_pi_3_2*(expo_fit)**(-1.5d0)).lt.1.d-10)cycle +! if(dabs(coef_fit*int_j1b*sq_pi_3_2*(expo_fit)**(-1.5d0)).lt.thrsh_cycle_tc)cycle int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j) tmp += coef_fit * int_fit enddo @@ -278,7 +276,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_ coef = List_comb_thr_b3_coef (i_1s,j,i) beta = List_comb_thr_b3_expo (i_1s,j,i) int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle +! if(dabs(coef)*dabs(int_j1b).lt.thrsh_cycle_tc)cycle B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i) @@ -288,8 +286,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_ coef_fit = coef_gauss_j_mu_x_2(i_fit) !DIR$ FORCEINLINE call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s) -! if(dabs(coef_fit*coef*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version - if(dabs(coef_fit*coef*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-1.5d0)).lt.1.d-10)cycle +! if(dabs(coef_fit*coef*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-1.5d0)).lt.thrsh_cycle_tc)cycle int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) tmp += coef * coef_fit * int_fit enddo @@ -350,7 +347,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (ao_num, ao_num, n !$OMP final_grid_points, ng_fit_jast, & !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo, & - !$OMP List_comb_thr_b3_cent, int2_u_grad1u_x_j1b2_test,ao_abs_comb_b3_j1b,sq_pi_3_2) + !$OMP List_comb_thr_b3_cent, int2_u_grad1u_x_j1b2_test,ao_abs_comb_b3_j1b,sq_pi_3_2,thrsh_cycle_tc) !$OMP DO do ipoint = 1, n_points_final_grid @@ -369,7 +366,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (ao_num, ao_num, n coef = List_comb_thr_b3_coef (i_1s,j,i) beta = List_comb_thr_b3_expo (i_1s,j,i) int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle + if(dabs(coef)*dabs(int_j1b).lt.thrsh_cycle_tc)cycle B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i) @@ -392,8 +389,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (ao_num, ao_num, n expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist coef_tmp = coef * coef_fit * dexp(-expo_coef_1s) sq_alpha = alpha_1s_inv * dsqrt(alpha_1s_inv) -! if(dabs(coef_tmp*int_j1b) .lt. 1d-10) cycle ! old version - if(dabs(coef_tmp*int_j1b*sq_pi_3_2*sq_alpha) .lt. 1d-10) cycle +! if(dabs(coef_tmp*int_j1b*sq_pi_3_2*sq_alpha) .lt. thrsh_cycle_tc) cycle call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit) @@ -470,13 +466,13 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & !$OMP ao_prod_dist_grid, ao_prod_sigma, ao_overlap_abs_grid,ao_prod_center,dsqpi_3_2, & !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo, ao_abs_comb_b3_j1b, & - !$OMP List_comb_thr_b3_cent, int2_u_grad1u_j1b2_test) + !$OMP List_comb_thr_b3_cent, int2_u_grad1u_j1b2_test,thrsh_cycle_tc) !$OMP DO do ipoint = 1, n_points_final_grid do i = 1, ao_num do j = i, ao_num - if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10) cycle + if(dabs(ao_overlap_abs_grid(j,i)).lt.thrsh_cycle_tc) cycle r(1) = final_grid_points(1,ipoint) r(2) = final_grid_points(2,ipoint) @@ -489,10 +485,10 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p ! --- --- --- int_j1b = ao_abs_comb_b3_j1b(1,j,i) - if(dabs(int_j1b).lt.1.d-10) cycle +! if(dabs(int_j1b).lt.thrsh_cycle_tc) cycle do i_fit = 1, ng_fit_jast expo_fit = expo_gauss_j_mu_1_erf(i_fit) - if(dabs(int_j1b)*dsqpi_3_2*expo_fit**(-1.5d0).lt.1.d-15) cycle +! if(dabs(int_j1b)*dsqpi_3_2*expo_fit**(-1.5d0).lt.thrsh_cycle_tc) cycle coef_fit = coef_gauss_j_mu_1_erf(i_fit) int_fit = NAI_pol_mult_erf_ao_with1s(i, j, expo_fit, r, 1.d+9, r) tmp += coef_fit * int_fit @@ -507,7 +503,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p coef = List_comb_thr_b3_coef (i_1s,j,i) beta = List_comb_thr_b3_expo (i_1s,j,i) int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle +! if(dabs(coef)*dabs(int_j1b).lt.thrsh_cycle_tc)cycle B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i) @@ -517,7 +513,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p do i_fit = 1, ng_fit_jast expo_fit = expo_gauss_j_mu_1_erf(i_fit) call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s) - if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-1.5d0).lt.1.d-15)cycle +! if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-1.5d0).lt.thrsh_cycle_tc)cycle coef_fit = coef_gauss_j_mu_1_erf(i_fit) alpha_1s = beta + expo_fit @@ -527,9 +523,9 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p centr_1s(3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3)) expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist - if(expo_coef_1s .gt. 20.d0) cycle +! if(expo_coef_1s .gt. 20.d0) cycle coef_tmp = coef * coef_fit * dexp(-expo_coef_1s) - if(dabs(coef_tmp) .lt. 1d-08) cycle +! if(dabs(coef_tmp) .lt. 1d-08) cycle int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r) diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f index 5c9f81e9..66a2b961 100644 --- a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f +++ b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f @@ -31,7 +31,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num, !$OMP SHARED (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points, & !$OMP List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent,ao_abs_comb_b2_j1b, & !$OMP v_ij_erf_rk_cst_mu_j1b_test, mu_erf, & - !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2) + !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2,thrsh_cycle_tc) !$OMP DO !do ipoint = 1, 10 do ipoint = 1, n_points_final_grid @@ -41,7 +41,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num, do i = 1, ao_num do j = i, ao_num - if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle + if(dabs(ao_overlap_abs_grid(j,i)).lt.thrsh_cycle_tc)cycle tmp = 0.d0 do i_1s = 1, List_comb_thr_b2_size(j,i) @@ -49,7 +49,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num, coef = List_comb_thr_b2_coef (i_1s,j,i) beta = List_comb_thr_b2_expo (i_1s,j,i) int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle +! if(dabs(coef)*dabs(int_j1b).lt.thrsh_cycle_tc)cycle B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i) @@ -110,7 +110,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_nu !$OMP SHARED (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points,& !$OMP List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent, & !$OMP x_v_ij_erf_rk_cst_mu_j1b_test, mu_erf,ao_abs_comb_b2_j1b, & - !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma) + !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,thrsh_cycle_tc) ! !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2,expo_erfc_mu_gauss) !$OMP DO do ipoint = 1, n_points_final_grid @@ -120,7 +120,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_nu do i = 1, ao_num do j = i, ao_num - if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle + if(dabs(ao_overlap_abs_grid(j,i)).lt.thrsh_cycle_tc)cycle tmp_x = 0.d0 tmp_y = 0.d0 @@ -130,19 +130,11 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_nu coef = List_comb_thr_b2_coef (i_1s,j,i) beta = List_comb_thr_b2_expo (i_1s,j,i) int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle + ! if(dabs(coef)*dabs(int_j1b).lt.thrsh_cycle_tc)cycle B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i) -! if(ao_prod_center(1,j,i).ne.10000.d0)then -! ! approximate 1 - erf(mu r12) by a gaussian * 10 -! !DIR$ FORCEINLINE -! call gaussian_product(expo_erfc_mu_gauss,r, & -! ao_prod_sigma(j,i),ao_prod_center(1,j,i), & -! factor_ij_1s,beta_ij,center_ij_1s) -! if(dabs(coef * factor_ij_1s*int_j1b*10.d0 * dsqpi_3_2 * beta_ij**(-1.5d0)).lt.1.d-10)cycle -! endif call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints ) call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, 1.d+9, r, ints_coulomb) @@ -216,7 +208,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po !$OMP expo_gauss_j_mu_x, coef_gauss_j_mu_x, & !$OMP List_comb_thr_b2_coef, List_comb_thr_b2_expo,List_comb_thr_b2_size, & !$OMP List_comb_thr_b2_cent, v_ij_u_cst_mu_j1b_test,ao_abs_comb_b2_j1b, & - !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2) + !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2,thrsh_cycle_tc) !$OMP DO do ipoint = 1, n_points_final_grid r(1) = final_grid_points(1,ipoint) @@ -225,7 +217,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po do i = 1, ao_num do j = i, ao_num - if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle + if(dabs(ao_overlap_abs_grid(j,i)).lt.thrsh_cycle_tc)cycle tmp = 0.d0 @@ -234,11 +226,11 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po ! --- --- --- int_j1b = ao_abs_comb_b2_j1b(1,j,i) - if(dabs(int_j1b).lt.1.d-10) cycle + ! if(dabs(int_j1b).lt.thrsh_cycle_tc) cycle do i_fit = 1, ng_fit_jast expo_fit = expo_gauss_j_mu_x(i_fit) coef_fit = coef_gauss_j_mu_x(i_fit) - if(ao_overlap_abs_grid(j,i).lt.1.d-15) cycle + ! if(ao_overlap_abs_grid(j,i).lt.thrsh_cycle_tc) cycle int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j) tmp += coef_fit * int_fit enddo @@ -251,7 +243,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po coef = List_comb_thr_b2_coef (i_1s,j,i) beta = List_comb_thr_b2_expo (i_1s,j,i) int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle +! if(dabs(coef)*dabs(int_j1b).lt.thrsh_cycle_tc)cycle B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i) @@ -259,9 +251,9 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po expo_fit = expo_gauss_j_mu_x(i_fit) coef_fit = coef_gauss_j_mu_x(i_fit) coeftot = coef * coef_fit - if(dabs(coeftot).lt.1.d-15)cycle +! if(dabs(coeftot).lt.thrsh_cycle_tc)cycle call gaussian_product(beta,B_center,expo_fit,r,factor_ij_1s_u,beta_ij_u,center_ij_1s_u) - if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.1.d-15)cycle +! if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.thrsh_cycle_tc)cycle int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) tmp += coef * coef_fit * int_fit enddo @@ -325,7 +317,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, !$OMP expo_gauss_j_mu_x, coef_gauss_j_mu_x, & !$OMP List_comb_thr_b2_coef, List_comb_thr_b2_expo,List_comb_thr_b2_size, & !$OMP List_comb_thr_b2_cent, v_ij_u_cst_mu_j1b_ng_1_test,ao_abs_comb_b2_j1b, & - !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2) + !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2,thrsh_cycle_tc) !$OMP DO do ipoint = 1, n_points_final_grid r(1) = final_grid_points(1,ipoint) @@ -334,7 +326,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, do i = 1, ao_num do j = i, ao_num - if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle + if(dabs(ao_overlap_abs_grid(j,i)).lt.thrsh_cycle_tc)cycle tmp = 0.d0 @@ -343,7 +335,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, ! --- --- --- int_j1b = ao_abs_comb_b2_j1b(1,j,i) - if(dabs(int_j1b).lt.1.d-10) cycle +! if(dabs(int_j1b).lt.thrsh_cycle_tc) cycle expo_fit = expo_good_j_mu_1gauss int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j) tmp += int_fit @@ -356,7 +348,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, coef = List_comb_thr_b2_coef (i_1s,j,i) beta = List_comb_thr_b2_expo (i_1s,j,i) int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle +! if(dabs(coef)*dabs(int_j1b).lt.thrsh_cycle_tc)cycle B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i) @@ -364,9 +356,9 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, expo_fit = expo_good_j_mu_1gauss coef_fit = 1.d0 coeftot = coef * coef_fit - if(dabs(coeftot).lt.1.d-15)cycle + if(dabs(coeftot).lt.thrsh_cycle_tc)cycle call gaussian_product(beta,B_center,expo_fit,r,factor_ij_1s_u,beta_ij_u,center_ij_1s_u) - if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.1.d-15)cycle + if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.thrsh_cycle_tc)cycle int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) tmp += coef * coef_fit * int_fit ! enddo diff --git a/src/ao_many_one_e_ints/listj1b_sorted.irp.f b/src/ao_many_one_e_ints/listj1b_sorted.irp.f index bf493fbb..9bcce449 100644 --- a/src/ao_many_one_e_ints/listj1b_sorted.irp.f +++ b/src/ao_many_one_e_ints/listj1b_sorted.irp.f @@ -3,15 +3,16 @@ &BEGIN_PROVIDER [ integer, max_List_comb_thr_b2_size] implicit none integer :: i_1s,i,j,ipoint - double precision :: coef,beta,center(3),int_j1b,thr + double precision :: coef,beta,center(3),int_j1b double precision :: r(3),weight,dist - thr = 1.d-15 List_comb_thr_b2_size = 0 + print*,'List_all_comb_b2_size = ',List_all_comb_b2_size +! pause do i = 1, ao_num do j = i, ao_num do i_1s = 1, List_all_comb_b2_size coef = List_all_comb_b2_coef (i_1s) - if(dabs(coef).lt.1.d-15)cycle + if(dabs(coef).lt.thrsh_cycle_tc)cycle beta = List_all_comb_b2_expo (i_1s) beta = max(beta,1.d-12) center(1:3) = List_all_comb_b2_cent(1:3,i_1s) @@ -24,7 +25,7 @@ dist += ( center(3) - r(3) )*( center(3) - r(3) ) int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight enddo - if(dabs(coef)*dabs(int_j1b).gt.thr)then + if(dabs(coef)*dabs(int_j1b).gt.thrsh_cycle_tc)then List_comb_thr_b2_size(j,i) += 1 endif enddo @@ -40,6 +41,7 @@ list(i) = maxval(List_comb_thr_b2_size(:,i)) enddo max_List_comb_thr_b2_size = maxval(list) + print*,'max_List_comb_thr_b2_size = ',max_List_comb_thr_b2_size END_PROVIDER @@ -49,16 +51,15 @@ END_PROVIDER &BEGIN_PROVIDER [ double precision, ao_abs_comb_b2_j1b, ( max_List_comb_thr_b2_size ,ao_num, ao_num)] implicit none integer :: i_1s,i,j,ipoint,icount - double precision :: coef,beta,center(3),int_j1b,thr + double precision :: coef,beta,center(3),int_j1b double precision :: r(3),weight,dist - thr = 1.d-15 ao_abs_comb_b2_j1b = 10000000.d0 do i = 1, ao_num do j = i, ao_num icount = 0 do i_1s = 1, List_all_comb_b2_size coef = List_all_comb_b2_coef (i_1s) - if(dabs(coef).lt.1.d-12)cycle + if(dabs(coef).lt.thrsh_cycle_tc)cycle beta = List_all_comb_b2_expo (i_1s) center(1:3) = List_all_comb_b2_cent(1:3,i_1s) int_j1b = 0.d0 @@ -70,7 +71,7 @@ END_PROVIDER dist += ( center(3) - r(3) )*( center(3) - r(3) ) int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight enddo - if(dabs(coef)*dabs(int_j1b).gt.thr)then + if(dabs(coef)*dabs(int_j1b).gt.thrsh_cycle_tc)then icount += 1 List_comb_thr_b2_coef(icount,j,i) = coef List_comb_thr_b2_expo(icount,j,i) = beta @@ -98,17 +99,17 @@ END_PROVIDER &BEGIN_PROVIDER [ integer, max_List_comb_thr_b3_size] implicit none integer :: i_1s,i,j,ipoint - double precision :: coef,beta,center(3),int_j1b,thr + double precision :: coef,beta,center(3),int_j1b double precision :: r(3),weight,dist - thr = 1.d-15 List_comb_thr_b3_size = 0 + print*,'List_all_comb_b3_size = ',List_all_comb_b3_size do i = 1, ao_num do j = 1, ao_num do i_1s = 1, List_all_comb_b3_size coef = List_all_comb_b3_coef (i_1s) beta = List_all_comb_b3_expo (i_1s) center(1:3) = List_all_comb_b3_cent(1:3,i_1s) - if(dabs(coef).lt.thr)cycle + if(dabs(coef).lt.thrsh_cycle_tc)cycle int_j1b = 0.d0 do ipoint = 1, n_points_extra_final_grid r(1:3) = final_grid_points_extra(1:3,ipoint) @@ -118,7 +119,7 @@ END_PROVIDER dist += ( center(3) - r(3) )*( center(3) - r(3) ) int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight enddo - if(dabs(coef)*dabs(int_j1b).gt.thr)then + if(dabs(coef)*dabs(int_j1b).gt.thrsh_cycle_tc)then List_comb_thr_b3_size(j,i) += 1 endif enddo @@ -144,9 +145,8 @@ END_PROVIDER &BEGIN_PROVIDER [ double precision, ao_abs_comb_b3_j1b, ( max_List_comb_thr_b3_size ,ao_num, ao_num)] implicit none integer :: i_1s,i,j,ipoint,icount - double precision :: coef,beta,center(3),int_j1b,thr + double precision :: coef,beta,center(3),int_j1b double precision :: r(3),weight,dist - thr = 1.d-15 ao_abs_comb_b3_j1b = 10000000.d0 do i = 1, ao_num do j = 1, ao_num @@ -156,7 +156,7 @@ END_PROVIDER beta = List_all_comb_b3_expo (i_1s) beta = max(beta,1.d-12) center(1:3) = List_all_comb_b3_cent(1:3,i_1s) - if(dabs(coef).lt.thr)cycle + if(dabs(coef).lt.thrsh_cycle_tc)cycle int_j1b = 0.d0 do ipoint = 1, n_points_extra_final_grid r(1:3) = final_grid_points_extra(1:3,ipoint) @@ -166,7 +166,7 @@ END_PROVIDER dist += ( center(3) - r(3) )*( center(3) - r(3) ) int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight enddo - if(dabs(coef)*dabs(int_j1b).gt.thr)then + if(dabs(coef)*dabs(int_j1b).gt.thrsh_cycle_tc)then icount += 1 List_comb_thr_b3_coef(icount,j,i) = coef List_comb_thr_b3_expo(icount,j,i) = beta @@ -177,15 +177,5 @@ END_PROVIDER enddo enddo -! do i = 1, ao_num -! do j = 1, i-1 -! do icount = 1, List_comb_thr_b3_size(j,i) -! List_comb_thr_b3_coef(icount,j,i) = List_comb_thr_b3_coef(icount,i,j) -! List_comb_thr_b3_expo(icount,j,i) = List_comb_thr_b3_expo(icount,i,j) -! List_comb_thr_b3_cent(1:3,icount,j,i) = List_comb_thr_b3_cent(1:3,icount,i,j) -! enddo -! enddo -! enddo - END_PROVIDER diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f index 2fd2719c..4f8dc74d 100644 --- a/src/non_h_ints_mu/total_tc_int.irp.f +++ b/src/non_h_ints_mu/total_tc_int.irp.f @@ -68,7 +68,26 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao END_PROVIDER +BEGIN_PROVIDER [double precision, ao_tc_int_chemist_no_cycle, (ao_num, ao_num, ao_num, ao_num)] ! --- + implicit none + integer :: i, j, k, l + double precision :: wall1, wall0 + print *, ' providing ao_tc_int_chemist_no_cycle ...' + call wall_time(wall0) + do j = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do k = 1, ao_num + ao_tc_int_chemist_no_cycle(k,i,l,j) = tc_grad_square_ao(k,i,l,j) + tc_grad_and_lapl_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j) +! ao_tc_int_chemist(k,i,l,j) = ao_two_e_coul(k,i,l,j) + enddo + enddo + enddo + enddo + call wall_time(wall1) + print *, ' wall time for ao_tc_int_chemist_no_cycle ', wall1 - wall0 +END_PROVIDER BEGIN_PROVIDER [double precision, ao_tc_int_chemist_test, (ao_num, ao_num, ao_num, ao_num)] diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index e397e700..62b6d2bf 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -46,6 +46,12 @@ doc: Thresholds on the energy for iterative Davidson used in TC interface: ezfio,provider,ocaml default: 1.e-5 +[thrsh_cycle_tc] +type: Threshold +doc: Thresholds to cycle the integrals with the envelop +interface: ezfio,provider,ocaml +default: 1.e-15 + [max_it_dav] type: integer doc: nb max of iteration in Davidson used in TC diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f index 85389f30..ae3b609b 100644 --- a/src/tc_scf/tc_scf.irp.f +++ b/src/tc_scf/tc_scf.irp.f @@ -11,8 +11,8 @@ program tc_scf print *, ' starting ...' my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 + my_n_pt_r_grid = 60 + my_n_pt_a_grid = 110 ! my_n_pt_r_grid = 10 ! small grid for quick debug ! my_n_pt_a_grid = 26 ! small grid for quick debug touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f index 0866cdaf..b9287d58 100644 --- a/src/tc_scf/test_int.irp.f +++ b/src/tc_scf/test_int.irp.f @@ -21,25 +21,22 @@ program test_ints touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid !! OK -!call routine_int2_u_grad1u_j1b2 -!! OK -!call routine_v_ij_erf_rk_cst_mu_j1b -!! OK +! call routine_int2_u_grad1u_j1b2 +! OK +! call routine_v_ij_erf_rk_cst_mu_j1b +! OK ! call routine_x_v_ij_erf_rk_cst_mu_j1b -!! OK -! call routine_v_ij_u_cst_mu_j1b - -!! OK -!call routine_int2_u2_j1b2 - -!! OK -!call routine_int2_u_grad1u_x_j1b2 - -!! OK +! OK +! call routine_int2_u2_j1b2 +! OK +! call routine_int2_u_grad1u_x_j1b2 +! OK ! call routine_int2_grad1u2_grad2u2_j1b2 ! call routine_int2_u_grad1u_j1b2 ! call test_total_grad_lapl ! call test_total_grad_square +! call test_int2_grad1_u12_ao_test +! call routine_v_ij_u_cst_mu_j1b_test ! call test_ao_tc_int_chemist ! call test_grid_points_ao ! call test_tc_scf @@ -53,12 +50,12 @@ program test_ints !call test_two_e_tc_non_hermit_integral() - call test_tc_grad_square_ao_test() - - PROVIDE TC_HF_energy VARTC_HF_energy - print *, ' TC_HF_energy = ', TC_HF_energy - print *, ' VARTC_HF_energy = ', VARTC_HF_energy +! call test_tc_grad_square_ao_test() +!!PROVIDE TC_HF_energy VARTC_HF_energy +!!print *, ' TC_HF_energy = ', TC_HF_energy +!!print *, ' VARTC_HF_energy = ', VARTC_HF_energy + call test_old_ints end ! --- @@ -157,6 +154,9 @@ subroutine routine_int2_u_grad1u_j1b2 enddo enddo enddo + print*,'******' + print*,'******' + print*,'routine_int2_u_grad1u_j1b2' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -169,20 +169,6 @@ subroutine routine_v_ij_erf_rk_cst_mu_j1b integer :: i,j,ipoint,k,l double precision :: weight,accu_relat, accu_abs, contrib double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) -! print*,'ao_overlap_abs = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_overlap_abs(i,:) -! enddo -! print*,'center = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_center(2,i,:) -! enddo -! print*,'sigma = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_sigma(i,:) -! enddo - - allocate(array(ao_num, ao_num, ao_num, ao_num)) array = 0.d0 allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) @@ -215,6 +201,9 @@ subroutine routine_v_ij_erf_rk_cst_mu_j1b enddo enddo enddo + print*,'******' + print*,'******' + print*,'routine_v_ij_erf_rk_cst_mu_j1b' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -228,20 +217,6 @@ subroutine routine_x_v_ij_erf_rk_cst_mu_j1b integer :: i,j,ipoint,k,l,m double precision :: weight,accu_relat, accu_abs, contrib double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) -! print*,'ao_overlap_abs = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_overlap_abs(i,:) -! enddo -! print*,'center = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_center(2,i,:) -! enddo -! print*,'sigma = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_sigma(i,:) -! enddo - - allocate(array(ao_num, ao_num, ao_num, ao_num)) array = 0.d0 allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) @@ -276,6 +251,10 @@ subroutine routine_x_v_ij_erf_rk_cst_mu_j1b enddo enddo enddo + + print*,'******' + print*,'******' + print*,'routine_x_v_ij_erf_rk_cst_mu_j1b' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -290,20 +269,6 @@ subroutine routine_v_ij_u_cst_mu_j1b_test integer :: i,j,ipoint,k,l double precision :: weight,accu_relat, accu_abs, contrib double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) -! print*,'ao_overlap_abs = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_overlap_abs(i,:) -! enddo -! print*,'center = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_center(2,i,:) -! enddo -! print*,'sigma = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_sigma(i,:) -! enddo - - allocate(array(ao_num, ao_num, ao_num, ao_num)) array = 0.d0 allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) @@ -336,6 +301,9 @@ subroutine routine_v_ij_u_cst_mu_j1b_test enddo enddo enddo + print*,'******' + print*,'******' + print*,'routine_v_ij_u_cst_mu_j1b_test' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -427,19 +395,6 @@ subroutine routine_int2_u2_j1b2 integer :: i,j,ipoint,k,l double precision :: weight,accu_relat, accu_abs, contrib double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) -! print*,'ao_overlap_abs = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_overlap_abs(i,:) -! enddo -! print*,'center = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_center(2,i,:) -! enddo -! print*,'sigma = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_sigma(i,:) -! enddo - allocate(array(ao_num, ao_num, ao_num, ao_num)) array = 0.d0 @@ -473,6 +428,9 @@ subroutine routine_int2_u2_j1b2 enddo enddo enddo + print*,'******' + print*,'******' + print*,'routine_int2_u2_j1b2' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -486,19 +444,6 @@ subroutine routine_int2_u_grad1u_x_j1b2 integer :: i,j,ipoint,k,l,m double precision :: weight,accu_relat, accu_abs, contrib double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) -! print*,'ao_overlap_abs = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_overlap_abs(i,:) -! enddo -! print*,'center = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_center(2,i,:) -! enddo -! print*,'sigma = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_sigma(i,:) -! enddo - allocate(array(ao_num, ao_num, ao_num, ao_num)) array = 0.d0 @@ -534,6 +479,9 @@ subroutine routine_int2_u_grad1u_x_j1b2 enddo enddo enddo + print*,'******' + print*,'******' + print*,'routine_int2_u_grad1u_x_j1b2' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -579,6 +527,9 @@ subroutine routine_v_ij_u_cst_mu_j1b enddo enddo enddo + print*,'******' + print*,'******' + print*,'routine_v_ij_u_cst_mu_j1b' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -742,6 +693,9 @@ subroutine test_total_grad_lapl enddo enddo enddo + print*,'******' + print*,'******' + print*,' test_total_grad_lapl' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -767,6 +721,9 @@ subroutine test_total_grad_square enddo enddo enddo + print*,'******' + print*,'******' + print*,'test_total_grad_square' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -1057,3 +1014,86 @@ end ! --- + +subroutine test_old_ints + implicit none + integer :: i,j,k,l + double precision :: old, new, contrib, get_ao_tc_sym_two_e_pot + double precision :: integral_sym , integral_nsym,accu + PROVIDE ao_tc_sym_two_e_pot_in_map + accu = 0.d0 + do j = 1, ao_num + do l= 1, ao_num + do i = 1, ao_num + do k = 1, ao_num +! integral_sym = get_ao_tc_sym_two_e_pot(i, j, k, l, ao_tc_sym_two_e_pot_map) + ! ao_non_hermit_term_chemist(k,i,l,j) = < k l | [erf( mu r12) - 1] d/d_r12 | i j > on the AO basis +! integral_nsym = ao_non_hermit_term_chemist(k,i,l,j) +! old = integral_sym + integral_nsym +! old = tc_grad_square_ao(k,i,l,j) + tc_grad_and_lapl_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j) + new = ao_tc_int_chemist_test(k,i,l,j) + old = ao_tc_int_chemist_no_cycle(k,i,l,j) + contrib = dabs(old - new) + if(contrib.gt.1.d-6)then + print*,'problem !!' + print*,i,j,k,l + print*,old, new, contrib + endif + accu += contrib + enddo + enddo + enddo + enddo + print*,'******' + print*,'******' + print*,'in test_old_ints' + print*,'accu = ',accu/dble(ao_num**4) + +end + +subroutine test_int2_grad1_u12_ao_test + implicit none + integer :: i,j,ipoint,m,k,l + double precision :: weight,accu_relat, accu_abs, contrib + double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) + allocate(array(ao_num, ao_num, ao_num, ao_num)) + array = 0.d0 + allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) + array_ref = 0.d0 + do m = 1, 3 + do ipoint = 1, n_points_final_grid + weight = final_weight_at_r_vector(ipoint) + do k = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do j = 1, ao_num + array(j,i,l,k) += int2_grad1_u12_ao_test(j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight + array_ref(j,i,l,k) += int2_grad1_u12_ao(j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight + enddo + enddo + enddo + enddo + enddo + enddo + + accu_relat = 0.d0 + accu_abs = 0.d0 + do k = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do j = 1, ao_num + contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k)) + accu_abs += contrib + if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then + accu_relat += contrib/dabs(array_ref(j,i,l,k)) + endif + enddo + enddo + enddo + enddo + print*,'******' + print*,'******' + print*,'test_int2_grad1_u12_ao_test' + print*,'accu_abs = ',accu_abs/dble(ao_num)**4 + print*,'accu_relat = ',accu_relat/dble(ao_num)**4 +end diff --git a/src/tools/sort_wf.irp.f b/src/tools/sort_wf.irp.f index 038b24c8..95b1a964 100644 --- a/src/tools/sort_wf.irp.f +++ b/src/tools/sort_wf.irp.f @@ -14,7 +14,7 @@ subroutine routine output=trim(ezfio_filename)//'.wf_sorted' i_unit_output = getUnitAndOpen(output,'w') do i = 1, N_det - write(i_unit_output, *)i,dabs(psi_coef_sorted(i,1)) + write(i_unit_output, *)i,dabs(psi_coef_sorted(i,1))/dabs(psi_coef_sorted(1,1)) enddo end From db7384c18e2b10abb591eda58dda5de7edb82331 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Tue, 28 Mar 2023 12:02:28 +0200 Subject: [PATCH 009/337] few modif for excisted states --- src/tc_bi_ortho/tc_bi_ortho.irp.f | 79 +++++++++++++++++++-------- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 77 ++++++++++++++++---------- 2 files changed, 103 insertions(+), 53 deletions(-) diff --git a/src/tc_bi_ortho/tc_bi_ortho.irp.f b/src/tc_bi_ortho/tc_bi_ortho.irp.f index 2d51f6f0..b0bd6be8 100644 --- a/src/tc_bi_ortho/tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/tc_bi_ortho.irp.f @@ -9,9 +9,13 @@ program tc_bi_ortho my_n_pt_a_grid = 50 read_wf = .True. touch read_wf - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - call routine_diag - call save_tc_bi_ortho_wavefunction + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + print*, ' nb of states = ', N_states + print*, ' nb of det = ', N_det + + call routine_diag() + call save_tc_bi_ortho_wavefunction() end subroutine test @@ -28,26 +32,53 @@ subroutine test end -subroutine routine_diag - implicit none -! provide eigval_right_tc_bi_orth -! provide overlap_bi_ortho -! provide htilde_matrix_elmt_bi_ortho - integer ::i,j - print*,'eigval_right_tc_bi_orth = ',eigval_right_tc_bi_orth(1) - print*,'e_tc_left_right = ',e_tc_left_right - print*,'e_tilde_bi_orth_00 = ',e_tilde_bi_orth_00 - print*,'e_pt2_tc_bi_orth = ',e_pt2_tc_bi_orth - print*,'e_pt2_tc_bi_orth_single = ',e_pt2_tc_bi_orth_single - print*,'e_pt2_tc_bi_orth_double = ',e_pt2_tc_bi_orth_double - print*,'***' - print*,'e_corr_bi_orth = ',e_corr_bi_orth - print*,'e_corr_bi_orth_proj = ',e_corr_bi_orth_proj - print*,'e_corr_single_bi_orth = ',e_corr_single_bi_orth - print*,'e_corr_double_bi_orth = ',e_corr_double_bi_orth - print*,'Left/right eigenvectors' - do i = 1,N_det - write(*,'(I5,X,(100(F12.7,X)))')i,leigvec_tc_bi_orth(i,1),reigvec_tc_bi_orth(i,1),leigvec_tc_bi_orth(i,1)*reigvec_tc_bi_orth(i,1) - enddo +subroutine routine_diag() + + implicit none + integer :: i, j + double precision :: dE + + ! provide eigval_right_tc_bi_orth + ! provide overlap_bi_ortho + ! provide htilde_matrix_elmt_bi_ortho + + if(N_states .eq. 1) then + + print*,'eigval_right_tc_bi_orth = ',eigval_right_tc_bi_orth(1) + print*,'e_tc_left_right = ',e_tc_left_right + print*,'e_tilde_bi_orth_00 = ',e_tilde_bi_orth_00 + print*,'e_pt2_tc_bi_orth = ',e_pt2_tc_bi_orth + print*,'e_pt2_tc_bi_orth_single = ',e_pt2_tc_bi_orth_single + print*,'e_pt2_tc_bi_orth_double = ',e_pt2_tc_bi_orth_double + print*,'***' + print*,'e_corr_bi_orth = ',e_corr_bi_orth + print*,'e_corr_bi_orth_proj = ',e_corr_bi_orth_proj + print*,'e_corr_single_bi_orth = ',e_corr_single_bi_orth + print*,'e_corr_double_bi_orth = ',e_corr_double_bi_orth + print*,'Left/right eigenvectors' + do i = 1,N_det + write(*,'(I5,X,(100(F12.7,X)))')i,leigvec_tc_bi_orth(i,1),reigvec_tc_bi_orth(i,1),leigvec_tc_bi_orth(i,1)*reigvec_tc_bi_orth(i,1) + enddo + + else + + print*,'eigval_right_tc_bi_orth : ' + do i = 1, N_states + print*, i, eigval_right_tc_bi_orth(i) + enddo + + print*,'' + print*,'******************************************************' + print*,'Excitation energies (au) (eV)' + do i = 2, N_states + dE = eigval_right_tc_bi_orth(i) - eigval_right_tc_bi_orth(1) + print*, i, dE, dE/0.0367502d0 + enddo + print*,'' + + endif + end + + diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index f2cbb637..dc010701 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -47,34 +47,36 @@ end integer :: i, idx_dress, j, istate logical :: converged, dagger integer :: n_real_tc_bi_orth_eigval_right,igood_r,igood_l - double precision, allocatable :: reigvec_tc_bi_orth_tmp(:,:),leigvec_tc_bi_orth_tmp(:,:),eigval_right_tmp(:) + integer, allocatable :: iorder(:) + double precision, allocatable :: reigvec_tc_bi_orth_tmp(:,:), leigvec_tc_bi_orth_tmp(:,:), eigval_right_tmp(:) + double precision, allocatable :: coef_hf_r(:),coef_hf_l(:), Stmp(:,:) PROVIDE N_det N_int - if(n_det.le.N_det_max_full)then - allocate(reigvec_tc_bi_orth_tmp(N_det,N_det),leigvec_tc_bi_orth_tmp(N_det,N_det),eigval_right_tmp(N_det)) - call non_hrmt_real_diag(N_det,htilde_matrix_elmt_bi_ortho,& - leigvec_tc_bi_orth_tmp,reigvec_tc_bi_orth_tmp,& - n_real_tc_bi_orth_eigval_right,eigval_right_tmp) - double precision, allocatable :: coef_hf_r(:),coef_hf_l(:) - integer, allocatable :: iorder(:) - allocate(coef_hf_r(N_det),coef_hf_l(N_det),iorder(N_det)) - do i = 1,N_det - iorder(i) = i - coef_hf_r(i) = -dabs(reigvec_tc_bi_orth_tmp(index_HF_psi_det,i)) + if(n_det .le. N_det_max_full) then + + allocate(reigvec_tc_bi_orth_tmp(N_det,N_det), leigvec_tc_bi_orth_tmp(N_det,N_det), eigval_right_tmp(N_det)) + + call non_hrmt_real_diag( N_det, htilde_matrix_elmt_bi_ortho & + , leigvec_tc_bi_orth_tmp, reigvec_tc_bi_orth_tmp, n_real_tc_bi_orth_eigval_right, eigval_right_tmp) + + allocate(coef_hf_r(N_det), coef_hf_l(N_det), iorder(N_det)) + do i = 1, N_det + iorder(i) = i + coef_hf_r(i) = -dabs(reigvec_tc_bi_orth_tmp(index_HF_psi_det,i)) enddo - call dsort(coef_hf_r,iorder,N_det) + call dsort(coef_hf_r, iorder, N_det) igood_r = iorder(1) - print*,'igood_r, coef_hf_r = ',igood_r,coef_hf_r(1) - do i = 1,N_det + print*, 'igood_r, coef_hf_r = ', igood_r, coef_hf_r(1) + do i = 1, N_det iorder(i) = i coef_hf_l(i) = -dabs(leigvec_tc_bi_orth_tmp(index_HF_psi_det,i)) enddo - call dsort(coef_hf_l,iorder,N_det) + call dsort(coef_hf_l, iorder, N_det) igood_l = iorder(1) - print*,'igood_l, coef_hf_l = ',igood_l,coef_hf_l(1) + print*, 'igood_l, coef_hf_l = ', igood_l, coef_hf_l(1) - if(igood_r.ne.igood_l.and.igood_r.ne.1)then + if(igood_r .ne. igood_l .and. igood_r .ne. 1)then print *,'' print *,'Warning, the left and right eigenvectors are "not the same" ' print *,'Warning, the ground state is not dominated by HF...' @@ -83,31 +85,48 @@ end print *,'State with largest LEFT coefficient of HF ',igood_l print *,'coef of HF in LEFT eigenvector = ',leigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_l) endif - if(state_following_tc)then + + if(state_following_tc) then + print *,'Following the states with the largest coef on HF' print *,'igood_r,igood_l',igood_r,igood_l i= igood_r eigval_right_tc_bi_orth(1) = eigval_right_tmp(i) do j = 1, N_det reigvec_tc_bi_orth(j,1) = reigvec_tc_bi_orth_tmp(j,i) -! print*,reigvec_tc_bi_orth(j,1) enddo i= igood_l eigval_left_tc_bi_orth(1) = eigval_right_tmp(i) do j = 1, N_det leigvec_tc_bi_orth(j,1) = leigvec_tc_bi_orth_tmp(j,i) enddo + else - do i = 1, N_states - eigval_right_tc_bi_orth(i) = eigval_right_tmp(i) - eigval_left_tc_bi_orth(i) = eigval_right_tmp(i) - do j = 1, N_det - reigvec_tc_bi_orth(j,i) = reigvec_tc_bi_orth_tmp(j,i) - leigvec_tc_bi_orth(j,i) = leigvec_tc_bi_orth_tmp(j,i) - enddo - enddo + + do i = 1, N_states + eigval_right_tc_bi_orth(i) = eigval_right_tmp(i) + eigval_left_tc_bi_orth(i) = eigval_right_tmp(i) + do j = 1, N_det + reigvec_tc_bi_orth(j,i) = reigvec_tc_bi_orth_tmp(j,i) + leigvec_tc_bi_orth(j,i) = leigvec_tc_bi_orth_tmp(j,i) + enddo + enddo + + ! check bi-orthogonality + allocate(Stmp(N_states,N_states)) + call dgemm( 'T', 'N', N_states, N_states, N_det, 1.d0 & + , leigvec_tc_bi_orth(1,1), size(leigvec_tc_bi_orth, 1), reigvec_tc_bi_orth(1,1), size(reigvec_tc_bi_orth, 1) & + , 0.d0, Stmp, size(Stmp, 1) ) + print *, ' overlap matrix between states:' + do i = 1, N_states + write(*,'(1000(F16.10,X))') Stmp(i,:) + enddo + deallocate(Stmp) + endif - else + + else + double precision, allocatable :: H_jj(:),vec_tmp(:,:) external htc_bi_ortho_calc_tdav external htcdag_bi_ortho_calc_tdav From a38255cec989f639231d3254784787e725961be1 Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 28 Mar 2023 12:43:15 +0200 Subject: [PATCH 010/337] added Hn.py --- external/qp2-dependencies | 2 +- scripts/Hn.py | 2 +- scripts/get_fci_tc_conv.sh | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) create mode 100755 scripts/get_fci_tc_conv.sh diff --git a/external/qp2-dependencies b/external/qp2-dependencies index 6e23ebac..f40bde09 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit 6e23ebac001acae91d1c762ca934e09a9b7d614a +Subproject commit f40bde0925808bbec0424b57bfcef1b26473a1c8 diff --git a/scripts/Hn.py b/scripts/Hn.py index a4119145..0f938510 100644 --- a/scripts/Hn.py +++ b/scripts/Hn.py @@ -5,7 +5,7 @@ arg = sys.argv #f = open('data_dft','r') n = int(sys.argv[1]) r = float(sys.argv[2]) -f = open('H'+str(n)+'_'+str(r),'w') +f = open('H'+str(n)+'_'+str(r)+'.xyz','w') string=str(n)+"\n" f.write(string) string="\n" diff --git a/scripts/get_fci_tc_conv.sh b/scripts/get_fci_tc_conv.sh new file mode 100755 index 00000000..643f3ac0 --- /dev/null +++ b/scripts/get_fci_tc_conv.sh @@ -0,0 +1,2 @@ +file=$1 +grep "Ndet,E,E+PT2,E+RPT2,|PT2|=" $file | cut -d "=" -f 2 > ${file}.conv_fci_tc From 7e617bee131ccd63c90394e26ae82acf454c166e Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 28 Mar 2023 13:24:28 +0200 Subject: [PATCH 011/337] COLLAPSE --- external/qp2-dependencies | 2 +- src/bi_ort_ints/three_body_ijmk.irp.f | 12 ++++++------ src/bi_ort_ints/three_body_ijmkl.irp.f | 12 ++++++------ 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/external/qp2-dependencies b/external/qp2-dependencies index f40bde09..ce14f57b 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit f40bde0925808bbec0424b57bfcef1b26473a1c8 +Subproject commit ce14f57b50511825a9fedb096749200779d3f4d4 diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f index 853972f7..5afd49ab 100644 --- a/src/bi_ort_ints/three_body_ijmk.irp.f +++ b/src/bi_ort_ints/three_body_ijmk.irp.f @@ -27,7 +27,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,integral) & !$OMP SHARED (mo_num,three_e_4_idx_direct_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -74,7 +74,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort, (mo_num, mo_num !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,integral) & !$OMP SHARED (mo_num,three_e_4_idx_cycle_1_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -121,7 +121,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort, (mo_num, mo_num !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,integral) & !$OMP SHARED (mo_num,three_e_4_idx_cycle_2_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -168,7 +168,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,integral) & !$OMP SHARED (mo_num,three_e_4_idx_exch23_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -214,7 +214,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,integral) & !$OMP SHARED (mo_num,three_e_4_idx_exch13_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -261,7 +261,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,integral) & !$OMP SHARED (mo_num,three_e_4_idx_exch12_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index bd5c4977..ae4c9bd5 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -26,7 +26,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_direct_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -75,7 +75,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_cycle_1_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -124,7 +124,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_cycle_2_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -173,7 +173,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_exch23_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -222,7 +222,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_exch13_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -271,7 +271,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_exch12_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num From 6d7d7ccfb4420556a341dd454caaa759bb721927 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 29 Mar 2023 15:47:59 +0200 Subject: [PATCH 012/337] set the threshold_cycle to 1e-10 by default --- src/bi_ort_ints/total_twoe_pot.irp.f | 1 + src/tc_keywords/EZFIO.cfg | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/bi_ort_ints/total_twoe_pot.irp.f b/src/bi_ort_ints/total_twoe_pot.irp.f index 78047d1b..f5f5959a 100644 --- a/src/bi_ort_ints/total_twoe_pot.irp.f +++ b/src/bi_ort_ints/total_twoe_pot.irp.f @@ -57,6 +57,7 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n PROVIDE ao_tc_sym_two_e_pot_in_map + !!! TODO :: OPENMP do j = 1, ao_num do l = 1, ao_num do i = 1, ao_num diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index 62b6d2bf..e65a1400 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -50,7 +50,7 @@ default: 1.e-5 type: Threshold doc: Thresholds to cycle the integrals with the envelop interface: ezfio,provider,ocaml -default: 1.e-15 +default: 1.e-10 [max_it_dav] type: integer From e076975f90ed73ece35cf7ead843b126c4e8847f Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Fri, 31 Mar 2023 10:32:02 +0200 Subject: [PATCH 013/337] few modif in save tc-wf for qmcchem --- .../save_bitcpsileft_for_qmcchem.irp.f | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f b/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f index eb812401..efa4aa2c 100644 --- a/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f +++ b/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f @@ -1,5 +1,18 @@ program save_bitcpsileft_for_qmcchem + implicit none + + read_wf = .True. + TOUCH read_wf + + call main() + +end + + +subroutine main() + + implicit none integer :: iunit logical :: exists double precision :: e_ref @@ -46,7 +59,7 @@ program save_bitcpsileft_for_qmcchem close(iunit) -end +end subroutine main ! -- @@ -61,12 +74,18 @@ subroutine write_lr_spindeterminants() PROVIDE psi_bitcleft_bilinear_matrix_values + print *, ' saving left determinants' + print *, ' assuming save_for_qmc called before to save right determinants' + print *, ' N_det = ', N_det + print *, ' N_states = ', N_states + allocate(buffer(N_det,N_states)) do l = 1, N_states do k = 1, N_det buffer(k,l) = psi_bitcleft_bilinear_matrix_values(k,l) enddo enddo + call ezfio_set_spindeterminants_psi_left_coef_matrix_values(buffer) deallocate(buffer) From 0b431b519271b82bbc56b5142b301bc3f187e4c6 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Sat, 1 Apr 2023 13:24:48 +0200 Subject: [PATCH 014/337] added NEED for dav --- src/davidson_keywords/NEED | 1 + 1 file changed, 1 insertion(+) create mode 100644 src/davidson_keywords/NEED diff --git a/src/davidson_keywords/NEED b/src/davidson_keywords/NEED new file mode 100644 index 00000000..5a3182ed --- /dev/null +++ b/src/davidson_keywords/NEED @@ -0,0 +1 @@ +ezfio_files From 504d46f6934e07a7e8ffb6152d3e43e01b9a963e Mon Sep 17 00:00:00 2001 From: eginer Date: Sat, 1 Apr 2023 18:41:47 +0200 Subject: [PATCH 015/337] added print_mos --- external/qp2-dependencies | 2 +- src/kohn_sham/print_mos.irp.f | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 src/kohn_sham/print_mos.irp.f diff --git a/external/qp2-dependencies b/external/qp2-dependencies index ce14f57b..6e23ebac 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit ce14f57b50511825a9fedb096749200779d3f4d4 +Subproject commit 6e23ebac001acae91d1c762ca934e09a9b7d614a diff --git a/src/kohn_sham/print_mos.irp.f b/src/kohn_sham/print_mos.irp.f new file mode 100644 index 00000000..5e728444 --- /dev/null +++ b/src/kohn_sham/print_mos.irp.f @@ -0,0 +1,30 @@ +program print_mos + implicit none + integer :: i,nx + double precision :: r(3), xmax, dx, accu + double precision, allocatable :: mos_array(:) + double precision:: alpha,envelop + allocate(mos_array(mo_num)) + xmax = 5.d0 + nx = 1000 + dx=xmax/dble(nx) + r = 0.d0 + alpha = 0.5d0 + do i = 1, nx + call give_all_mos_at_r(r,mos_array) + accu = mos_array(3)**2+mos_array(4)**2+mos_array(5)**2 + accu = dsqrt(accu) + envelop = (1.d0 - dexp(-alpha * r(3)**2)) + write(33,'(100(F16.10,X))')r(3), mos_array(1), mos_array(2), accu, envelop + r(3) += dx + enddo + +end + +double precision function f_mu(x) + implicit none + double precision, intent(in) :: x + + + +end From 8e031bfb460e0bc3c15bfaeaa167506191d5b572 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Sat, 1 Apr 2023 22:05:34 +0200 Subject: [PATCH 016/337] tc provide problem solved --- src/tc_bi_ortho/tc_bi_ortho.irp.f | 2 +- src/tc_bi_ortho/tc_hmat.irp.f | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/tc_bi_ortho/tc_bi_ortho.irp.f b/src/tc_bi_ortho/tc_bi_ortho.irp.f index b0bd6be8..c11164b0 100644 --- a/src/tc_bi_ortho/tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/tc_bi_ortho.irp.f @@ -69,7 +69,7 @@ subroutine routine_diag() print*,'' print*,'******************************************************' - print*,'Excitation energies (au) (eV)' + print*,'TC Excitation energies (au) (eV)' do i = 2, N_states dE = eigval_right_tc_bi_orth(i) - eigval_right_tc_bi_orth(1) print*, i, dE, dE/0.0367502d0 diff --git a/src/tc_bi_ortho/tc_hmat.irp.f b/src/tc_bi_ortho/tc_hmat.irp.f index 44e27e7c..3353d3e7 100644 --- a/src/tc_bi_ortho/tc_hmat.irp.f +++ b/src/tc_bi_ortho/tc_hmat.irp.f @@ -12,6 +12,11 @@ double precision :: hmono,htwoe,hthree,htot PROVIDE N_int + + i = 1 + j = 1 + call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + !$OMP PARALLEL DO SCHEDULE(GUIDED) DEFAULT(NONE) PRIVATE(i,j,hmono, htwoe, hthree, htot) & !$OMP SHARED (N_det, psi_det, N_int,htilde_matrix_elmt_bi_ortho) do i = 1, N_det From 450a80e3078d695c931386044ddcd1451b1eaacd Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 3 Apr 2023 14:32:25 +0200 Subject: [PATCH 017/337] fixed stupid problem in pt_charges --- src/nuclei/write_pt_charges.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/nuclei/write_pt_charges.py b/src/nuclei/write_pt_charges.py index 6dbcd5b8..f5007090 100644 --- a/src/nuclei/write_pt_charges.py +++ b/src/nuclei/write_pt_charges.py @@ -21,7 +21,7 @@ def mv_in_ezfio(ezfio,tmp): os.system(cmdmv) -# Getting the EZFIO + ##Getting the EZFIO EZFIO=sys.argv[1] EZFIO=EZFIO.replace("/", "") print(EZFIO) @@ -66,8 +66,20 @@ zip_in_ezfio(EZFIO,tmp) tmp="pts_charge_coord" fcoord = open(tmp,'w') fcoord.write(" 2\n") -fcoord.write(" "+str(n_charges)+' 3\n') -#fcoord.write(" "+' 3 '+str(n_charges)+' \n') +if(n_charges < 10): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <100): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <1000): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <10000): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <100000): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <1000000): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <10000000): + fcoord.write(" "+str(n_charges)+' 3\n') for i in range(n_charges): fcoord.write(' '+coord_x[i]+'\n') for i in range(n_charges): From ad893e4df4c3d31deb809e9081507d9dab39b7ae Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Mon, 3 Apr 2023 14:55:02 +0200 Subject: [PATCH 018/337] TC-orthog problem: ok --- src/tc_bi_ortho/print_tc_energy.irp.f | 47 +++++++++++++++++++++++++++ src/tc_bi_ortho/psi_r_l_prov.irp.f | 13 ++++++-- src/tc_bi_ortho/tc_bi_ortho.irp.f | 44 +++++++++++++++++++++++-- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 40 +++++++++++++++++++---- 4 files changed, 132 insertions(+), 12 deletions(-) create mode 100644 src/tc_bi_ortho/print_tc_energy.irp.f diff --git a/src/tc_bi_ortho/print_tc_energy.irp.f b/src/tc_bi_ortho/print_tc_energy.irp.f new file mode 100644 index 00000000..c9f8cecb --- /dev/null +++ b/src/tc_bi_ortho/print_tc_energy.irp.f @@ -0,0 +1,47 @@ +program print_tc_energy + implicit none + BEGIN_DOC +! TODO : Put the documentation of the program here + END_DOC + print *, 'Hello world' + my_grid_becke = .True. + my_n_pt_r_grid = 30 + my_n_pt_a_grid = 50 + read_wf = .True. + touch read_wf + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + call write_tc_energy +end + +subroutine write_tc_energy() + + implicit none + integer :: i, j, k + double precision :: hmono, htwoe, htot + double precision :: E_TC, O_TC + + do k = 1, n_states + + E_TC = 0.d0 + do i = 1, N_det + do j = 1, N_det + call hmat_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, htot) + E_TC = E_TC + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(j,k) * htot + !E_TC = E_TC + leigvec_tc_bi_orth(i,k) * reigvec_tc_bi_orth(j,k) * htot + enddo + enddo + + O_TC = 0.d0 + do i = 1, N_det + O_TC = O_TC + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(i,k) + !O_TC = O_TC + leigvec_tc_bi_orth(i,k) * reigvec_tc_bi_orth(i,k) + enddo + + print *, ' state :', k + print *, " E_TC = ", E_TC + print *, " O_TC = ", O_TC + + enddo + +end + diff --git a/src/tc_bi_ortho/psi_r_l_prov.irp.f b/src/tc_bi_ortho/psi_r_l_prov.irp.f index 521acff5..05d132d5 100644 --- a/src/tc_bi_ortho/psi_r_l_prov.irp.f +++ b/src/tc_bi_ortho/psi_r_l_prov.irp.f @@ -136,7 +136,7 @@ BEGIN_PROVIDER [ double precision, psi_r_coef_bi_ortho, (psi_det_size,N_states) END_PROVIDER -subroutine save_tc_wavefunction_general(ndet,nstates,psidet,sze,dim_psicoef,psilcoef,psircoef) +subroutine save_tc_wavefunction_general(ndet, nstates, psidet, sze, dim_psicoef, psilcoef, psircoef) implicit none BEGIN_DOC ! Save the wave function into the |EZFIO| file @@ -195,9 +195,16 @@ end subroutine save_tc_bi_ortho_wavefunction implicit none if(save_sorted_tc_wf)then - call save_tc_wavefunction_general(N_det,N_states,psi_det_sorted_tc,size(psi_det_sorted_tc, 3),size(psi_l_coef_sorted_bi_ortho, 1),psi_l_coef_sorted_bi_ortho,psi_r_coef_sorted_bi_ortho) + call save_tc_wavefunction_general( N_det, N_states, psi_det_sorted_tc, N_det & + , size(psi_l_coef_sorted_bi_ortho, 1), psi_l_coef_sorted_bi_ortho & + , psi_r_coef_sorted_bi_ortho ) + !call save_tc_wavefunction_general( N_det, N_states, psi_det_sorted_tc, size(psi_det_sorted_tc, 3) & + ! , size(psi_l_coef_sorted_bi_ortho, 1), psi_l_coef_sorted_bi_ortho & + ! , psi_r_coef_sorted_bi_ortho ) else - call save_tc_wavefunction_general(N_det,N_states,psi_det,size(psi_det, 3), size(psi_l_coef_bi_ortho, 1),psi_l_coef_bi_ortho,psi_r_coef_bi_ortho) + call save_tc_wavefunction_general( N_det, N_states, psi_det, size(psi_det, 3) & + , size(psi_l_coef_bi_ortho, 1), psi_l_coef_bi_ortho & + , psi_r_coef_bi_ortho ) endif call routine_save_right_bi_ortho end diff --git a/src/tc_bi_ortho/tc_bi_ortho.irp.f b/src/tc_bi_ortho/tc_bi_ortho.irp.f index c11164b0..ef2e5659 100644 --- a/src/tc_bi_ortho/tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/tc_bi_ortho.irp.f @@ -1,8 +1,14 @@ program tc_bi_ortho - implicit none + BEGIN_DOC -! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together with the energy. Saves the left-right wave functions at the end. + ! + ! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together + ! with the energy. Saves the left-right wave functions at the end. + ! END_DOC + + implicit none + print *, 'Hello world' my_grid_becke = .True. my_n_pt_r_grid = 30 @@ -15,6 +21,7 @@ program tc_bi_ortho print*, ' nb of det = ', N_det call routine_diag() + call write_tc_energy() call save_tc_bi_ortho_wavefunction() end @@ -82,3 +89,36 @@ end +subroutine write_tc_energy() + + implicit none + integer :: i, j, k + double precision :: hmono, htwoe, htot + double precision :: E_TC, O_TC + + do k = 1, n_states + + E_TC = 0.d0 + do i = 1, N_det + do j = 1, N_det + htot = htilde_matrix_elmt_bi_ortho(j,i) + !call hmat_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, htot) + E_TC = E_TC + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(j,k) * htot + !E_TC = E_TC + leigvec_tc_bi_orth(i,k) * reigvec_tc_bi_orth(j,k) * htot + enddo + enddo + + O_TC = 0.d0 + do i = 1, N_det + O_TC = O_TC + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(i,k) + !O_TC = O_TC + leigvec_tc_bi_orth(i,k) * reigvec_tc_bi_orth(i,k) + enddo + + print *, ' state :', k + print *, " E_TC = ", E_TC + print *, " O_TC = ", O_TC + + enddo + +end + diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index dc010701..fa9d9f5c 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -44,7 +44,7 @@ end END_DOC implicit none - integer :: i, idx_dress, j, istate + integer :: i, idx_dress, j, istate, k logical :: converged, dagger integer :: n_real_tc_bi_orth_eigval_right,igood_r,igood_l integer, allocatable :: iorder(:) @@ -168,13 +168,39 @@ end deallocate(H_jj) endif + call bi_normalize(leigvec_tc_bi_orth,reigvec_tc_bi_orth,size(reigvec_tc_bi_orth,1),N_det,N_states) - print*,'leigvec_tc_bi_orth(1,1),reigvec_tc_bi_orth(1,1) = ',leigvec_tc_bi_orth(1,1),reigvec_tc_bi_orth(1,1) - norm_ground_left_right_bi_orth = 0.d0 - do j = 1, N_det - norm_ground_left_right_bi_orth += leigvec_tc_bi_orth(j,1) * reigvec_tc_bi_orth(j,1) - enddo - print*,'norm l/r = ',norm_ground_left_right_bi_orth + print*,'leigvec_tc_bi_orth(1,1),reigvec_tc_bi_orth(1,1) = ',leigvec_tc_bi_orth(1,1),reigvec_tc_bi_orth(1,1) + do i = 1, N_states + norm_ground_left_right_bi_orth = 0.d0 + do j = 1, N_det + norm_ground_left_right_bi_orth += leigvec_tc_bi_orth(j,i) * reigvec_tc_bi_orth(j,i) + enddo + print*,'norm l/r = ',norm_ground_left_right_bi_orth + enddo + + ! --- + + double precision, allocatable :: buffer(:,:) + allocate(buffer(N_det,N_states)) + + do k = 1, N_states + do i = 1, N_det + buffer(i,k) = leigvec_tc_bi_orth(i,k) + enddo + enddo + call ezfio_set_tc_bi_ortho_psi_l_coef_bi_ortho(buffer) + + do k = 1, N_states + do i = 1, N_det + buffer(i,k) = reigvec_tc_bi_orth(i,k) + enddo + enddo + call ezfio_set_tc_bi_ortho_psi_r_coef_bi_ortho(buffer) + + deallocate(buffer) + + ! --- END_PROVIDER From 5aed62450e000fa8d1840f4287559c2b314241a7 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Wed, 5 Apr 2023 15:59:38 +0200 Subject: [PATCH 019/337] print tc energy: OK --- src/tc_bi_ortho/print_tc_energy.irp.f | 32 -------------------------- src/tc_bi_ortho/tc_bi_ortho.irp.f | 33 --------------------------- src/tc_bi_ortho/tc_utils.irp.f | 32 ++++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 65 deletions(-) create mode 100644 src/tc_bi_ortho/tc_utils.irp.f diff --git a/src/tc_bi_ortho/print_tc_energy.irp.f b/src/tc_bi_ortho/print_tc_energy.irp.f index c9f8cecb..e5f123a7 100644 --- a/src/tc_bi_ortho/print_tc_energy.irp.f +++ b/src/tc_bi_ortho/print_tc_energy.irp.f @@ -13,35 +13,3 @@ program print_tc_energy call write_tc_energy end -subroutine write_tc_energy() - - implicit none - integer :: i, j, k - double precision :: hmono, htwoe, htot - double precision :: E_TC, O_TC - - do k = 1, n_states - - E_TC = 0.d0 - do i = 1, N_det - do j = 1, N_det - call hmat_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, htot) - E_TC = E_TC + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(j,k) * htot - !E_TC = E_TC + leigvec_tc_bi_orth(i,k) * reigvec_tc_bi_orth(j,k) * htot - enddo - enddo - - O_TC = 0.d0 - do i = 1, N_det - O_TC = O_TC + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(i,k) - !O_TC = O_TC + leigvec_tc_bi_orth(i,k) * reigvec_tc_bi_orth(i,k) - enddo - - print *, ' state :', k - print *, " E_TC = ", E_TC - print *, " O_TC = ", O_TC - - enddo - -end - diff --git a/src/tc_bi_ortho/tc_bi_ortho.irp.f b/src/tc_bi_ortho/tc_bi_ortho.irp.f index ef2e5659..98b83329 100644 --- a/src/tc_bi_ortho/tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/tc_bi_ortho.irp.f @@ -89,36 +89,3 @@ end -subroutine write_tc_energy() - - implicit none - integer :: i, j, k - double precision :: hmono, htwoe, htot - double precision :: E_TC, O_TC - - do k = 1, n_states - - E_TC = 0.d0 - do i = 1, N_det - do j = 1, N_det - htot = htilde_matrix_elmt_bi_ortho(j,i) - !call hmat_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, htot) - E_TC = E_TC + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(j,k) * htot - !E_TC = E_TC + leigvec_tc_bi_orth(i,k) * reigvec_tc_bi_orth(j,k) * htot - enddo - enddo - - O_TC = 0.d0 - do i = 1, N_det - O_TC = O_TC + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(i,k) - !O_TC = O_TC + leigvec_tc_bi_orth(i,k) * reigvec_tc_bi_orth(i,k) - enddo - - print *, ' state :', k - print *, " E_TC = ", E_TC - print *, " O_TC = ", O_TC - - enddo - -end - diff --git a/src/tc_bi_ortho/tc_utils.irp.f b/src/tc_bi_ortho/tc_utils.irp.f new file mode 100644 index 00000000..92e8639d --- /dev/null +++ b/src/tc_bi_ortho/tc_utils.irp.f @@ -0,0 +1,32 @@ + +subroutine write_tc_energy() + + implicit none + integer :: i, j, k + double precision :: hmono, htwoe, hthree, htot + double precision :: E_TC, O_TC + + do k = 1, n_states + + E_TC = 0.d0 + do i = 1, N_det + do j = 1, N_det + !htot = htilde_matrix_elmt_bi_ortho(i,j) + call htilde_mu_mat_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) + E_TC = E_TC + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(j,k) * htot + enddo + enddo + + O_TC = 0.d0 + do i = 1, N_det + O_TC = O_TC + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(i,k) + enddo + + print *, ' state :', k + print *, " E_TC = ", E_TC / O_TC + print *, " O_TC = ", O_TC + + enddo + +end + From 2e8ced0eef799853a03b0c08062b14089498f587 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Mon, 10 Apr 2023 16:11:52 +0200 Subject: [PATCH 020/337] overlap with dgemm --- src/bi_ortho_mos/overlap.irp.f | 92 ++++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 38 deletions(-) diff --git a/src/bi_ortho_mos/overlap.irp.f b/src/bi_ortho_mos/overlap.irp.f index d7f45c94..ff5d5c84 100644 --- a/src/bi_ortho_mos/overlap.irp.f +++ b/src/bi_ortho_mos/overlap.irp.f @@ -12,32 +12,27 @@ double precision :: accu_d, accu_nd double precision, allocatable :: tmp(:,:) - ! TODO : re do the DEGEMM +! overlap_bi_ortho = 0.d0 +! do i = 1, mo_num +! do k = 1, mo_num +! do m = 1, ao_num +! do n = 1, ao_num +! overlap_bi_ortho(k,i) += ao_overlap(n,m) * mo_l_coef(n,k) * mo_r_coef(m,i) +! enddo +! enddo +! enddo +! enddo - overlap_bi_ortho = 0.d0 - do i = 1, mo_num - do k = 1, mo_num - do m = 1, ao_num - do n = 1, ao_num - overlap_bi_ortho(k,i) += ao_overlap(n,m) * mo_l_coef(n,k) * mo_r_coef(m,i) - enddo - enddo - enddo - enddo - -! allocate( tmp(mo_num,ao_num) ) -! -! ! tmp <-- L.T x S_ao -! call dgemm( "T", "N", mo_num, ao_num, ao_num, 1.d0 & -! , mo_l_coef, size(mo_l_coef, 1), ao_overlap, size(ao_overlap, 1) & -! , 0.d0, tmp, size(tmp, 1) ) -! -! ! S <-- tmp x R -! call dgemm( "N", "N", mo_num, mo_num, ao_num, 1.d0 & -! , tmp, size(tmp, 1), mo_r_coef, size(mo_r_coef, 1) & -! , 0.d0, overlap_bi_ortho, size(overlap_bi_ortho, 1) ) -! -! deallocate( tmp ) + allocate( tmp(mo_num,ao_num) ) + ! tmp <-- L.T x S_ao + call dgemm( "T", "N", mo_num, ao_num, ao_num, 1.d0 & + , mo_l_coef(1,1), size(mo_l_coef, 1), ao_overlap(1,1), size(ao_overlap, 1) & + , 0.d0, tmp(1,1), size(tmp, 1) ) + ! S <-- tmp x R + call dgemm( "N", "N", mo_num, mo_num, ao_num, 1.d0 & + , tmp(1,1), size(tmp, 1), mo_r_coef(1,1), size(mo_r_coef, 1) & + , 0.d0, overlap_bi_ortho(1,1), size(overlap_bi_ortho, 1) ) + deallocate(tmp) do i = 1, mo_num overlap_diag_bi_ortho(i) = overlap_bi_ortho(i,i) @@ -84,20 +79,41 @@ END_PROVIDER END_DOC implicit none - integer :: i, j, p, q + integer :: i, j, p, q + double precision, allocatable :: tmp(:,:) - overlap_mo_r = 0.d0 - overlap_mo_l = 0.d0 - do i = 1, mo_num - do j = 1, mo_num - do p = 1, ao_num - do q = 1, ao_num - overlap_mo_r(j,i) += mo_r_coef(q,i) * mo_r_coef(p,j) * ao_overlap(q,p) - overlap_mo_l(j,i) += mo_l_coef(q,i) * mo_l_coef(p,j) * ao_overlap(q,p) - enddo - enddo - enddo - enddo + !overlap_mo_r = 0.d0 + !overlap_mo_l = 0.d0 + !do i = 1, mo_num + ! do j = 1, mo_num + ! do p = 1, ao_num + ! do q = 1, ao_num + ! overlap_mo_r(j,i) += mo_r_coef(q,i) * mo_r_coef(p,j) * ao_overlap(q,p) + ! overlap_mo_l(j,i) += mo_l_coef(q,i) * mo_l_coef(p,j) * ao_overlap(q,p) + ! enddo + ! enddo + ! enddo + !enddo + + allocate( tmp(mo_num,ao_num) ) + + tmp = 0.d0 + call dgemm( "T", "N", mo_num, ao_num, ao_num, 1.d0 & + , mo_r_coef(1,1), size(mo_r_coef, 1), ao_overlap(1,1), size(ao_overlap, 1) & + , 0.d0, tmp(1,1), size(tmp, 1) ) + call dgemm( "N", "N", mo_num, mo_num, ao_num, 1.d0 & + , tmp(1,1), size(tmp, 1), mo_r_coef(1,1), size(mo_r_coef, 1) & + , 0.d0, overlap_mo_r(1,1), size(overlap_mo_r, 1) ) + + tmp = 0.d0 + call dgemm( "T", "N", mo_num, ao_num, ao_num, 1.d0 & + , mo_l_coef(1,1), size(mo_l_coef, 1), ao_overlap(1,1), size(ao_overlap, 1) & + , 0.d0, tmp(1,1), size(tmp, 1) ) + call dgemm( "N", "N", mo_num, mo_num, ao_num, 1.d0 & + , tmp(1,1), size(tmp, 1), mo_l_coef(1,1), size(mo_l_coef, 1) & + , 0.d0, overlap_mo_l(1,1), size(overlap_mo_l, 1) ) + + deallocate(tmp) END_PROVIDER From 04715abc640010ce15e5096b504baa5425bb36ab Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 10 Apr 2023 16:12:32 +0200 Subject: [PATCH 021/337] beginning to work on s2 for TC --- src/tc_bi_ortho/h_tc_s2_u0.irp.f | 739 ++++++++++++++++++ .../{u0_h_u0.irp.f => h_tc_u0.irp.f} | 3 - src/tc_bi_ortho/tc_bi_ortho.irp.f | 1 - src/tc_bi_ortho/tc_h_eigvectors.irp.f | 2 - src/tc_scf/tc_scf.irp.f | 4 +- 5 files changed, 741 insertions(+), 8 deletions(-) create mode 100644 src/tc_bi_ortho/h_tc_s2_u0.irp.f rename src/tc_bi_ortho/{u0_h_u0.irp.f => h_tc_u0.irp.f} (99%) diff --git a/src/tc_bi_ortho/h_tc_s2_u0.irp.f b/src/tc_bi_ortho/h_tc_s2_u0.irp.f new file mode 100644 index 00000000..5a9f5e69 --- /dev/null +++ b/src/tc_bi_ortho/h_tc_s2_u0.irp.f @@ -0,0 +1,739 @@ +subroutine H_tc_s2_u_0_opt(v_0,s_0,u_0,N_st,sze) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $v_0 = H | u_0\rangle$. + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + integer, intent(in) :: N_st,sze + double precision, intent(inout) :: v_0(sze,N_st), u_0(sze,N_st), s_0(sze,N_st) + logical :: do_right + do_right = .True. + call H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right) +end + +subroutine H_tc_s2_dagger_u_0_opt(v_0,s_0,u_0,N_st,sze) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $v_0 = H | u_0\rangle$. + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + integer, intent(in) :: N_st,sze + double precision, intent(inout) :: v_0(sze,N_st), u_0(sze,N_st), s_0(sze,N_st) + logical :: do_right + do_right = .False. + call H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right) +end + + +subroutine H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $v_0 = H | u_0\rangle$. + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + ! + ! if do_right == True then you compute H_TC |Psi>, else H_TC^T |Psi> + END_DOC + integer, intent(in) :: N_st,sze + double precision, intent(inout) :: v_0(sze,N_st), u_0(sze,N_st), s_0(sze,N_st) + logical, intent(in) :: do_right + integer :: k + double precision, allocatable :: u_t(:,:), v_t(:,:), s_t(:,:) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t + allocate(u_t(N_st,N_det),v_t(N_st,N_det),s_t(N_st,N_det)) + do k=1,N_st + call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det) + enddo + v_t = 0.d0 + s_t = 0.d0 + call dtranspose( & + u_0, & + size(u_0, 1), & + u_t, & + size(u_t, 1), & + N_det, N_st) + + call H_tc_s2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,1,N_det,0,1, do_right) + deallocate(u_t) + + call dtranspose( & + v_t, & + size(v_t, 1), & + v_0, & + size(v_0, 1), & + N_st, N_det) + call dtranspose( & + s_t, & + size(s_t, 1), & + s_0, & + size(s_0, 1), & + N_st, N_det) + deallocate(v_t,s_t) + + do k=1,N_st + call dset_order(v_0(1,k),psi_bilinear_matrix_order_reverse,N_det) + call dset_order(s_0(1,k),psi_bilinear_matrix_order_reverse,N_det) + call dset_order(u_0(1,k),psi_bilinear_matrix_order_reverse,N_det) + enddo + +end + + +subroutine H_tc_s2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep, do_right) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $v_t = H | u_t\rangle$ + ! + ! Default should be 1,N_det,0,1 + ! + ! if do_right == True then you compute H_TC |Psi>, else H_TC^T |Psi> + END_DOC + integer, intent(in) :: N_st,sze,istart,iend,ishift,istep + double precision, intent(in) :: u_t(N_st,N_det) + logical, intent(in) :: do_right + double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze) + + + PROVIDE ref_bitmask_energy N_int + + select case (N_int) + case (1) + call H_tc_s2_u_0_nstates_openmp_work_1(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + case (2) + call H_tc_s2_u_0_nstates_openmp_work_2(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + case (3) + call H_tc_s2_u_0_nstates_openmp_work_3(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + case (4) + call H_tc_s2_u_0_nstates_openmp_work_4(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + case default + call H_tc_s2_u_0_nstates_openmp_work_N_int(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + end select +end +BEGIN_TEMPLATE + +subroutine H_tc_s2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $v_t = H | u_t \\rangle$ and $s_t = S^2 | u_t\\rangle$ + ! + ! Default should be 1,N_det,0,1 + ! + ! if do_right == True then you compute H_TC |Psi>, else H_TC^T |Psi> + END_DOC + integer, intent(in) :: N_st,sze,istart,iend,ishift,istep + double precision, intent(in) :: u_t(N_st,N_det) + logical, intent(in) :: do_right + double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze) + + double precision :: hij, sij + integer :: i,j,k,l,kk + integer :: k_a, k_b, l_a, l_b, m_a, m_b + integer :: istate + integer :: krow, kcol, krow_b, kcol_b + integer :: lrow, lcol + integer :: mrow, mcol + integer(bit_kind) :: spindet($N_int) + integer(bit_kind) :: tmp_det($N_int,2) + integer(bit_kind) :: tmp_det2($N_int,2) + integer(bit_kind) :: tmp_det3($N_int,2) + integer(bit_kind), allocatable :: buffer(:,:) + integer :: n_doubles + integer, allocatable :: doubles(:) + integer, allocatable :: singles_a(:) + integer, allocatable :: singles_b(:) + integer, allocatable :: idx(:), idx0(:) + integer :: maxab, n_singles_a, n_singles_b, kcol_prev + integer*8 :: k8 + logical :: compute_singles + integer*8 :: last_found, left, right, right_max + double precision :: rss, mem, ratio + double precision, allocatable :: utl(:,:) + integer, parameter :: block_size=128 + logical :: u_is_sparse + +! call resident_memory(rss) +! mem = dble(singles_beta_csc_size) / 1024.d0**3 +! +! compute_singles = (mem+rss > qp_max_mem) +! +! if (.not.compute_singles) then +! provide singles_beta_csc +! endif +compute_singles=.True. + + + maxab = max(N_det_alpha_unique, N_det_beta_unique)+1 + allocate(idx0(maxab)) + + do i=1,maxab + idx0(i) = i + enddo + + ! Prepare the array of all alpha single excitations + ! ------------------------------------------------- + + PROVIDE N_int nthreads_davidson + !$OMP PARALLEL DEFAULT(SHARED) NUM_THREADS(nthreads_davidson) & + !$OMP SHARED(psi_bilinear_matrix_rows, N_det, & + !$OMP psi_bilinear_matrix_columns, & + !$OMP psi_det_alpha_unique, psi_det_beta_unique, & + !$OMP n_det_alpha_unique, n_det_beta_unique, N_int, & + !$OMP psi_bilinear_matrix_transp_rows, & + !$OMP psi_bilinear_matrix_transp_columns, & + !$OMP psi_bilinear_matrix_transp_order, N_st, & + !$OMP psi_bilinear_matrix_order_transp_reverse, & + !$OMP psi_bilinear_matrix_columns_loc, & + !$OMP psi_bilinear_matrix_transp_rows_loc, & + !$OMP istart, iend, istep, irp_here, v_t, s_t, & + !$OMP ishift, idx0, u_t, maxab, compute_singles, & + !$OMP singles_alpha_csc,singles_alpha_csc_idx, & + !$OMP singles_beta_csc,singles_beta_csc_idx) & + !$OMP PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i, & + !$OMP lcol, lrow, l_a, l_b, utl, kk, u_is_sparse, & + !$OMP buffer, doubles, n_doubles, umax, & + !$OMP tmp_det2, hij, sij, idx, l, kcol_prev,hmono, htwoe, hthree, & + !$OMP singles_a, n_singles_a, singles_b, ratio, & + !$OMP n_singles_b, k8, last_found,left,right,right_max) + + ! Alpha/Beta double excitations + ! ============================= + + allocate( buffer($N_int,maxab), & + singles_a(maxab), & + singles_b(maxab), & + doubles(maxab), & + idx(maxab), utl(N_st,block_size)) + + kcol_prev=-1 + + ! Check if u has multiple zeros + kk=1 ! Avoid division by zero + !$OMP DO + do k=1,N_det + umax = 0.d0 + do l=1,N_st + umax = max(umax, dabs(u_t(l,k))) + enddo + if (umax < 1.d-20) then + !$OMP ATOMIC + kk = kk+1 + endif + enddo + !$OMP END DO + u_is_sparse = N_det / kk < 20 ! 5% + + ASSERT (iend <= N_det) + ASSERT (istart > 0) + ASSERT (istep > 0) + + !$OMP DO SCHEDULE(guided,64) + do k_a=istart+ishift,iend,istep ! Loop over all determinants (/!\ not in psidet order) + + krow = psi_bilinear_matrix_rows(k_a) ! Index of alpha part of determinant k_a + ASSERT (krow <= N_det_alpha_unique) + + kcol = psi_bilinear_matrix_columns(k_a) ! Index of beta part of determinant k_a + ASSERT (kcol <= N_det_beta_unique) + + tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + + if (kcol /= kcol_prev) then + tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + if (compute_singles) then + call get_all_spin_singles_$N_int( & + psi_det_beta_unique, idx0, & + tmp_det(1,2), N_det_beta_unique, & + singles_b, n_singles_b) + else + n_singles_b = 0 + !DIR$ LOOP COUNT avg(1000) + do k8=singles_beta_csc_idx(kcol),singles_beta_csc_idx(kcol+1)-1 + n_singles_b = n_singles_b+1 + singles_b(n_singles_b) = singles_beta_csc(k8) + enddo + endif + endif + kcol_prev = kcol + + ! -> Here, tmp_det is determinant k_a + + ! Loop over singly excited beta columns + ! ------------------------------------- + + !DIR$ LOOP COUNT avg(1000) + do i=1,n_singles_b + lcol = singles_b(i) + + tmp_det2(1:$N_int,2) = psi_det_beta_unique(1:$N_int, lcol) + + ! tmp_det2 is a single excitation of tmp_det in the beta spin + ! the alpha part is not defined yet + +!--- +! if (compute_singles) then + + l_a = psi_bilinear_matrix_columns_loc(lcol) + ASSERT (l_a <= N_det) + ! rows : | 1 2 3 4 | 1 3 4 6 | .... | 1 2 4 5 | + ! cols : | 1 1 1 1 | 2 2 2 2 | .... | 8 8 8 8 | + ! index : | 1 2 3 4 | 5 6 7 8 | .... | 58 59 60 61 | + ! ^ ^ + ! | | + ! l_a N_det + ! l_a is the index in the big vector os size Ndet of the position of the first element of column lcol + + ! Below we identify all the determinants with the same beta part + + !DIR$ UNROLL(8) + !DIR$ LOOP COUNT avg(50000) + do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + buffer(1:$N_int,j) = psi_det_alpha_unique(1:$N_int, lrow) ! hot spot + + ASSERT (l_a <= N_det) + idx(j) = l_a + l_a = l_a+1 + enddo + j = j-1 + + ! Get all single excitations from tmp_det(1,1) to buffer(1,?) + + call get_all_spin_singles_$N_int( & + buffer, idx, tmp_det(1,1), j, & + singles_a, n_singles_a ) + + ! Loop over alpha singles + ! ----------------------- + + double precision :: umax + + !DIR$ LOOP COUNT avg(1000) + do k = 1,n_singles_a,block_size + umax = 0.d0 + ! Prefetch u_t(:,l_a) + if (u_is_sparse) then + do kk=0,block_size-1 + if (k+kk > n_singles_a) exit + l_a = singles_a(k+kk) + ASSERT (l_a <= N_det) + + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (k+kk > n_singles_a) exit + l_a = singles_a(k+kk) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (k+kk > n_singles_a) exit + l_a = singles_a(k+kk) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow) +! call i_H_j( tmp_det, tmp_det2, $N_int, hij) ! double alpha-beta + if(do_right)then + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij) + else + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij) + endif + call get_s2(tmp_det,tmp_det2,$N_int,sij) + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + s_t(l,k_a) = s_t(l,k_a) + sij * utl(l,kk+1) + enddo + enddo + enddo + + enddo + + enddo + !$OMP END DO + + !$OMP DO SCHEDULE(guided,64) + do k_a=istart+ishift,iend,istep + + + ! Single and double alpha excitations + ! =================================== + + + ! Initial determinant is at k_a in alpha-major representation + ! ----------------------------------------------------------------------- + + krow = psi_bilinear_matrix_rows(k_a) + ASSERT (krow <= N_det_alpha_unique) + + kcol = psi_bilinear_matrix_columns(k_a) + ASSERT (kcol <= N_det_beta_unique) + + tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + + ! Initial determinant is at k_b in beta-major representation + ! ---------------------------------------------------------------------- + + k_b = psi_bilinear_matrix_order_transp_reverse(k_a) + ASSERT (k_b <= N_det) + + spindet(1:$N_int) = tmp_det(1:$N_int,1) + + ! Loop inside the beta column to gather all the connected alphas + lcol = psi_bilinear_matrix_columns(k_a) + l_a = psi_bilinear_matrix_columns_loc(lcol) + + !DIR$ LOOP COUNT avg(200000) + do i=1,N_det_alpha_unique + if (l_a > N_det) exit + lcol = psi_bilinear_matrix_columns(l_a) + if (lcol /= kcol) exit + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + buffer(1:$N_int,i) = psi_det_alpha_unique(1:$N_int, lrow) ! Hot spot + idx(i) = l_a + l_a = l_a+1 + enddo + i = i-1 + + call get_all_spin_singles_and_doubles_$N_int( & + buffer, idx, spindet, i, & + singles_a, doubles, n_singles_a, n_doubles ) + + ! Compute Hij for all alpha singles + ! ---------------------------------- + + tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + !DIR$ LOOP COUNT avg(1000) + do i=1,n_singles_a,block_size + umax = 0.d0 + ! Prefetch u_t(:,l_a) + if (u_is_sparse) then + do kk=0,block_size-1 + if (i+kk > n_singles_a) exit + l_a = singles_a(i+kk) + ASSERT (l_a <= N_det) + + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (i+kk > n_singles_a) exit + l_a = singles_a(i+kk) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (i+kk > n_singles_a) exit + l_a = singles_a(i+kk) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow) +! call i_h_j_single_spin( tmp_det, tmp_det2, $N_int, 1, hij) + if(do_right)then + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij) + else + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij) + endif + + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + enddo + enddo + enddo + + + ! Compute Hij for all alpha doubles + ! ---------------------------------- + + !DIR$ LOOP COUNT avg(50000) + do i=1,n_doubles,block_size + umax = 0.d0 + ! Prefetch u_t(:,l_a) + if (u_is_sparse) then + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_a = doubles(i+kk) + ASSERT (l_a <= N_det) + + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_a = doubles(i+kk) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_a = doubles(i+kk) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + tmp_det2(1:N_int,1) = psi_det_alpha_unique(1:N_int, lrow) +! call i_H_j( tmp_det, tmp_det2, $N_int, hij) +! call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, lrow), $N_int, hij) + if(do_right)then + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij) + else + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij) + endif + + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + enddo + enddo + enddo + + + ! Single and double beta excitations + ! ================================== + + + ! Initial determinant is at k_a in alpha-major representation + ! ----------------------------------------------------------------------- + + krow = psi_bilinear_matrix_rows(k_a) + kcol = psi_bilinear_matrix_columns(k_a) + + tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + + spindet(1:$N_int) = tmp_det(1:$N_int,2) + + ! Initial determinant is at k_b in beta-major representation + ! ----------------------------------------------------------------------- + + k_b = psi_bilinear_matrix_order_transp_reverse(k_a) + ASSERT (k_b <= N_det) + + ! Loop inside the alpha row to gather all the connected betas + lrow = psi_bilinear_matrix_transp_rows(k_b) + l_b = psi_bilinear_matrix_transp_rows_loc(lrow) + !DIR$ LOOP COUNT avg(200000) + do i=1,N_det_beta_unique + if (l_b > N_det) exit + lrow = psi_bilinear_matrix_transp_rows(l_b) + if (lrow /= krow) exit + lcol = psi_bilinear_matrix_transp_columns(l_b) + ASSERT (lcol <= N_det_beta_unique) + + buffer(1:$N_int,i) = psi_det_beta_unique(1:$N_int, lcol) + idx(i) = l_b + l_b = l_b+1 + enddo + i = i-1 + + call get_all_spin_singles_and_doubles_$N_int( & + buffer, idx, spindet, i, & + singles_b, doubles, n_singles_b, n_doubles ) + + ! Compute Hij for all beta singles + ! ---------------------------------- + + tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + !DIR$ LOOP COUNT avg(1000) + do i=1,n_singles_b,block_size + umax = 0.d0 + if (u_is_sparse) then + do kk=0,block_size-1 + if (i+kk > n_singles_b) exit + l_b = singles_b(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + ASSERT (l_b <= N_det) + ASSERT (l_a <= N_det) + + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (i+kk > n_singles_b) exit + l_b = singles_b(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + ASSERT (l_b <= N_det) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (i+kk > n_singles_b) exit + l_b = singles_b(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + lcol = psi_bilinear_matrix_transp_columns(l_b) + ASSERT (lcol <= N_det_beta_unique) + + tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, lcol) +! call i_H_j_single_spin( tmp_det, tmp_det2, $N_int, 2, hij) + if(do_right)then + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij) + else + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij) + endif + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + enddo + enddo + enddo + + ! Compute Hij for all beta doubles + ! ---------------------------------- + + !DIR$ LOOP COUNT avg(50000) + do i=1,n_doubles,block_size + umax = 0.d0 + if (u_is_sparse) then + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_b = doubles(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + ASSERT (l_b <= N_det) + ASSERT (l_a <= N_det) + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_b = doubles(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + ASSERT (l_b <= N_det) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_b = doubles(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + lcol = psi_bilinear_matrix_transp_columns(l_b) + ASSERT (lcol <= N_det_beta_unique) + + tmp_det2(1:N_int,2) = psi_det_beta_unique(1:N_int, lcol) +! call i_H_j( tmp_det, tmp_det2, $N_int, hij) +! call i_H_j_double_spin( tmp_det(1,2), psi_det_beta_unique(1, lcol), $N_int, hij) + if(do_right)then + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij) + else + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij) + endif + + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + enddo + enddo + enddo + + + ! Diagonal contribution + ! ===================== + + + ! Initial determinant is at k_a in alpha-major representation + ! ----------------------------------------------------------------------- + + if (u_is_sparse) then + umax = 0.d0 + do l=1,N_st + umax = max(umax, dabs(u_t(l,k_a))) + enddo + else + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + krow = psi_bilinear_matrix_rows(k_a) + ASSERT (krow <= N_det_alpha_unique) + + kcol = psi_bilinear_matrix_columns(k_a) + ASSERT (kcol <= N_det_beta_unique) + + tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + + double precision, external :: diag_H_mat_elem + double precision :: hmono, htwoe, hthree + +! hij = diag_H_mat_elem(tmp_det,$N_int) + call diag_htilde_mu_mat_fock_bi_ortho ($N_int, tmp_det, hmono, htwoe, hthree, hij) + call get_s2(tmp_det,tmp_det,$N_int,sij) + + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,k_a) + s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,k_a) + enddo + + end do + !$OMP END DO + deallocate(buffer, singles_a, singles_b, doubles, idx, utl) + !$OMP END PARALLEL + +end + +SUBST [ N_int ] + +1;; +2;; +3;; +4;; +N_int;; + +END_TEMPLATE + + diff --git a/src/tc_bi_ortho/u0_h_u0.irp.f b/src/tc_bi_ortho/h_tc_u0.irp.f similarity index 99% rename from src/tc_bi_ortho/u0_h_u0.irp.f rename to src/tc_bi_ortho/h_tc_u0.irp.f index e107ad88..5e6150ea 100644 --- a/src/tc_bi_ortho/u0_h_u0.irp.f +++ b/src/tc_bi_ortho/h_tc_u0.irp.f @@ -93,9 +93,6 @@ subroutine H_tc_u_0_nstates_openmp(v_0,u_0,N_st,sze, do_right) double precision, allocatable :: u_t(:,:), v_t(:,:) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t allocate(u_t(N_st,N_det),v_t(N_st,N_det)) -! provide mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e -! provide ref_tc_energy_tot fock_op_2_e_tc_closed_shell -! provide eff_2_e_from_3_e_ab eff_2_e_from_3_e_aa eff_2_e_from_3_e_bb do k=1,N_st call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det) enddo diff --git a/src/tc_bi_ortho/tc_bi_ortho.irp.f b/src/tc_bi_ortho/tc_bi_ortho.irp.f index 2d51f6f0..bd0b1ef5 100644 --- a/src/tc_bi_ortho/tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/tc_bi_ortho.irp.f @@ -3,7 +3,6 @@ program tc_bi_ortho BEGIN_DOC ! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together with the energy. Saves the left-right wave functions at the end. END_DOC - print *, 'Hello world' my_grid_becke = .True. my_n_pt_r_grid = 30 my_n_pt_a_grid = 50 diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index f2cbb637..11a14b41 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -25,8 +25,6 @@ subroutine diagonalize_CI_tc psi_r_coef_bi_ortho(i,j) = reigvec_tc_bi_orth(i,j) enddo enddo -! psi_energy(1:N_states) = CI_electronic_energy(1:N_states) -! psi_s2(1:N_states) = CI_s2(1:N_states) SOFT_TOUCH psi_l_coef_bi_ortho psi_r_coef_bi_ortho end diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f index ae3b609b..85389f30 100644 --- a/src/tc_scf/tc_scf.irp.f +++ b/src/tc_scf/tc_scf.irp.f @@ -11,8 +11,8 @@ program tc_scf print *, ' starting ...' my_grid_becke = .True. - my_n_pt_r_grid = 60 - my_n_pt_a_grid = 110 + my_n_pt_r_grid = 30 + my_n_pt_a_grid = 50 ! my_n_pt_r_grid = 10 ! small grid for quick debug ! my_n_pt_a_grid = 26 ! small grid for quick debug touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid From 709741668e602b289b4c34e7593b4897870217af Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Mon, 10 Apr 2023 16:17:40 +0200 Subject: [PATCH 022/337] bug fixed in print tc integrals --- src/non_h_ints_mu/new_grad_tc.irp.f | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/src/non_h_ints_mu/new_grad_tc.irp.f b/src/non_h_ints_mu/new_grad_tc.irp.f index 754e1240..a6673252 100644 --- a/src/non_h_ints_mu/new_grad_tc.irp.f +++ b/src/non_h_ints_mu/new_grad_tc.irp.f @@ -305,16 +305,8 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, if(read_tc_integ) then - open(unit=11, form="unformatted", file='tc_grad_and_lapl_ao', action="read") - do i = 1, ao_num - do j = 1, ao_num - do k = 1, ao_num - do l = 1, ao_num - read(11) tc_grad_and_lapl_ao(l,k,j,i) - enddo - enddo - enddo - enddo + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_and_lapl_ao', action="read") + read(11) tc_grad_and_lapl_ao close(11) else @@ -374,18 +366,12 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, endif - if(write_tc_integ) then - open(unit=11, form="unformatted", file='tc_grad_and_lapl_ao', action="write") - do i = 1, ao_num - do j = 1, ao_num - do k = 1, ao_num - do l = 1, ao_num - write(11) tc_grad_and_lapl_ao(l,k,j,i) - enddo - enddo - enddo - enddo + if(write_tc_integ.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_and_lapl_ao', action="write") + call ezfio_set_work_empty(.False.) + write(11) tc_grad_and_lapl_ao close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') endif call wall_time(time1) From 042159a13497dc73129e8360cd6b47611acb5c4f Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 10 Apr 2023 16:50:55 +0200 Subject: [PATCH 023/337] added h_p in davidson diagonalization hS2 --- src/davidson/diagonalization_hs2_dressed.irp.f | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index d37b7386..8117f320 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -465,7 +465,8 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ integer :: lwork, info double precision, allocatable :: work(:) - y = h +! y = h + y = h_p lwork = -1 allocate(work(1)) call dsygv(1,'V','U',shift2,y,size(y,1), & From fbe8c4b60f349322ab8deb2b34007c585ed5587b Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 10 Apr 2023 17:53:09 +0200 Subject: [PATCH 024/337] working on S2 for TC: davidson with S2 penalty seems to work --- src/tc_bi_ortho/dav_h_tc_s2.irp.f | 547 ++++++++++++++++++++++++++++++ src/tc_bi_ortho/test_s2_tc.irp.f | 157 +++++++++ 2 files changed, 704 insertions(+) create mode 100644 src/tc_bi_ortho/dav_h_tc_s2.irp.f create mode 100644 src/tc_bi_ortho/test_s2_tc.irp.f diff --git a/src/tc_bi_ortho/dav_h_tc_s2.irp.f b/src/tc_bi_ortho/dav_h_tc_s2.irp.f new file mode 100644 index 00000000..02aa712b --- /dev/null +++ b/src/tc_bi_ortho/dav_h_tc_s2.irp.f @@ -0,0 +1,547 @@ + +! --- + +subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, energies, sze, N_st, N_st_diag_in, converged, hcalc) + + use mmap_module + + BEGIN_DOC + ! Generic modified-Davidson diagonalization + ! + ! H_jj : specific diagonal H matrix elements to diagonalize de Davidson + ! + ! u_in : guess coefficients on the various states. Overwritten on exit by right eigenvectors + ! + ! sze : Number of determinants + ! + ! N_st : Number of eigenstates + ! + ! N_st_diag_in : Number of states in which H is diagonalized. Assumed > N_st + ! + ! Initial guess vectors are not necessarily orthonormal + ! + ! hcalc subroutine to compute W = H U (see routine hcalc_template for template of input/output) + END_DOC + + implicit none + + integer, intent(in) :: sze, N_st, N_st_diag_in + double precision, intent(in) :: H_jj(sze) + logical, intent(inout) :: converged + double precision, intent(inout) :: u_in(sze,N_st_diag_in) + double precision, intent(out) :: energies(N_st) + external hcalc + + character*(16384) :: write_buffer + integer :: iter, N_st_diag + integer :: i, j, k, l, m + integer :: iter2, itertot + logical :: disk_based + integer :: shift, shift2, itermax + integer :: nproc_target + integer :: order(N_st_diag_in) + double precision :: to_print(3,N_st) + double precision :: r1, r2, alpha + double precision :: cpu, wall + double precision :: cmax + double precision :: energy_shift(N_st_diag_in*davidson_sze_max) + double precision, allocatable :: U(:,:) + double precision, allocatable :: y(:,:), h(:,:), lambda(:), h_p(:,:), s2(:) + real, allocatable :: y_s(:,:) + double precision, allocatable :: s_(:,:), s_tmp(:,:) + double precision, allocatable :: residual_norm(:) + + double precision :: lambda_tmp + integer, allocatable :: i_omax(:) + double precision, allocatable :: U_tmp(:), overlap(:), S_d(:,:) + + double precision, allocatable :: W(:,:) + real, pointer :: S(:,:) + + !double precision, pointer :: W(:,:) + double precision, external :: u_dot_v, u_dot_u + + + include 'constants.include.F' + + N_st_diag = N_st_diag_in +! print*,'trial vector' + do i = 1, sze + if(isnan(u_in(i,1)))then + print*,'pb in input vector of davidson_general_ext_rout_nonsym_b1space' + print*,i,u_in(i,1) + stop + else if (dabs(u_in(i,1)).lt.1.d-16)then + u_in(i,1) = 0.d0 + endif + enddo + + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, y_s, S_d, h, lambda + + if(N_st_diag*3 > sze) then + print *, 'error in Davidson :' + print *, 'Increase n_det_max_full to ', N_st_diag*3 + stop -1 + endif + + itermax = max(2, min(davidson_sze_max, sze/N_st_diag)) + 1 + + provide threshold_nonsym_davidson + call write_time(6) + write(6,'(A)') '' + write(6,'(A)') 'Davidson Diagonalization' + write(6,'(A)') '------------------------' + write(6,'(A)') '' + + + ! Find max number of cores to fit in memory + ! ----------------------------------------- + + nproc_target = nproc + double precision :: rss + integer :: maxab + maxab = sze + + m=1 + disk_based = .False. + call resident_memory(rss) + do + r1 = 8.d0 * &! bytes + ( dble(sze)*(N_st_diag*itermax) &! U + + 1.5d0*dble(sze*m)*(N_st_diag*itermax) &! W, S + + 4.5d0*(N_st_diag*itermax)**2 &! h,y,y_s,s_, s_tmp + + 2.d0*(N_st_diag*itermax) &! s2,lambda + + 1.d0*(N_st_diag) &! residual_norm + ! In H_S2_u_0_nstates_zmq + + 3.d0*(N_st_diag*N_det) &! u_t, v_t, s_t on collector + + 3.d0*(N_st_diag*N_det) &! u_t, v_t, s_t on slave + + 0.5d0*maxab &! idx0 in H_S2_u_0_nstates_openmp_work_* + + nproc_target * &! In OMP section + ( 1.d0*(N_int*maxab) &! buffer + + 3.5d0*(maxab) ) &! singles_a, singles_b, doubles, idx + ) / 1024.d0**3 + + if(nproc_target == 0) then + call check_mem(r1, irp_here) + nproc_target = 1 + exit + endif + + if(r1+rss < qp_max_mem) then + exit + endif + + if(itermax > 4) then + itermax = itermax - 1 +! else if (m==1.and.disk_based_davidson) then +! m = 0 +! disk_based = .True. +! itermax = 6 + else + nproc_target = nproc_target - 1 + endif + + enddo + + nthreads_davidson = nproc_target + TOUCH nthreads_davidson + + call write_int(6, N_st, 'Number of states') + call write_int(6, N_st_diag, 'Number of states in diagonalization') + call write_int(6, sze, 'Number of basis functions') + call write_int(6, nproc_target, 'Number of threads for diagonalization') + call write_double(6, r1, 'Memory(Gb)') + if(disk_based) then + print *, 'Using swap space to reduce RAM' + endif + + !--------------- + + write(6,'(A)') '' + write_buffer = '=====' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ =========== ===========' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + write_buffer = 'Iter' + do i=1,N_st + write_buffer = trim(write_buffer)//' Energy S^2 Residual ' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + write_buffer = '=====' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ =========== ===========' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + + + ! --- + + + allocate( W(sze,N_st_diag*itermax), S(sze,N_st_diag*itermax) ) + + allocate( & + ! Large + U(sze,N_st_diag*itermax), & + S_d(sze,N_st_diag), & + + ! Small + h(N_st_diag*itermax,N_st_diag*itermax), & + h_p(N_st_diag*itermax,N_st_diag*itermax), & + y(N_st_diag*itermax,N_st_diag*itermax), & + s_(N_st_diag*itermax,N_st_diag*itermax), & + s_tmp(N_st_diag*itermax,N_st_diag*itermax), & + lambda(N_st_diag*itermax), & + residual_norm(N_st_diag), & + i_omax(N_st), & + s2(N_st_diag*itermax), & + y_s(N_st_diag*itermax,N_st_diag*itermax) & + ) + + U = 0.d0 + h = 0.d0 + y = 0.d0 + s_ = 0.d0 + s_tmp = 0.d0 + + lambda = 0.d0 + residual_norm = 0.d0 + + + ASSERT (N_st > 0) + ASSERT (N_st_diag >= N_st) + ASSERT (sze > 0) + + ! Davidson iterations + ! =================== + + converged = .False. + + ! Initialize from N_st to N_st_diag with gaussian random numbers + ! to be sure to have overlap with any eigenvectors + do k = N_st+1, N_st_diag + u_in(k,k) = 10.d0 + do i = 1, sze + call random_number(r1) + call random_number(r2) + r1 = dsqrt(-2.d0*dlog(r1)) + r2 = dtwo_pi*r2 + u_in(i,k) = r1*dcos(r2) + enddo + enddo + ! Normalize all states + do k = 1, N_st_diag + call normalize(u_in(1,k), sze) + enddo + + ! Copy from the guess input "u_in" to the working vectors "U" + do k = 1, N_st_diag + do i = 1, sze + U(i,k) = u_in(i,k) + enddo + enddo + + ! --- + + itertot = 0 + + do while (.not.converged) + + itertot = itertot + 1 + if(itertot == 8) then + exit + endif + + do iter = 1, itermax-1 + + shift = N_st_diag * (iter-1) + shift2 = N_st_diag * iter + + if( (iter > 1) .or. (itertot == 1) ) then + + ! Gram-Schmidt to orthogonalize all new guess with the previous vectors + call ortho_qr(U, size(U, 1), sze, shift2) + call ortho_qr(U, size(U, 1), sze, shift2) + + ! W = H U +! call hcalc(W(1,shift+1), U(1,shift+1), N_st_diag, sze) + call hcalc(W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze) + S(1:sze,shift+1:shift+N_st_diag) = real(S_d(1:sze,1:N_st_diag)) + else + + ! Already computed in update below + continue + endif + ! Compute s_kl = = + ! ------------------------------------------- + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j,k) COLLAPSE(2) + do j=1,shift2 + do i=1,shift2 + s_(i,j) = 0.d0 + do k=1,sze + s_(i,j) = s_(i,j) + U(k,i) * dble(S(k,j)) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + + ! Compute h_kl = = + ! ------------------------------------------- + call dgemm( 'T', 'N', shift2, shift2, sze, 1.d0 & + , U, size(U, 1), W, size(W, 1) & + , 0.d0, h, size(h, 1) ) + ! Penalty method + ! -------------- + + if (s2_eig) then + h_p = s_ + do k=1,shift2 + h_p(k,k) = h_p(k,k) - expected_s2 + enddo + if (only_expected_s2) then + alpha = 0.1d0 + h_p = h + alpha*h_p + else + alpha = 0.0001d0 + h_p = h + alpha*h_p + endif + else + h_p = h + alpha = 0.d0 + endif + + ! Diagonalize h y = lambda y + ! --------------------------- + call diag_nonsym_right(shift2, h_p(1,1), size(h_p, 1), y(1,1), size(y, 1), lambda(1), size(lambda, 1)) + + do k = 1, N_st_diag +! print*,'lambda(k) before = ',lambda(k) + lambda(k) = 0.d0 + do l = 1, shift2 + do m = 1, shift2 + lambda(k) += y(m,k) * h(m,l) * y(l,k) + enddo + enddo +! print*,'lambda(k) new = ',lambda(k) + enddo + ! Compute S2 for each eigenvector + ! ------------------------------- + + call dgemm('N','N',shift2,shift2,shift2, & + 1.d0, s_, size(s_,1), y, size(y,1), & + 0.d0, s_tmp, size(s_tmp,1)) + + call dgemm('T','N',shift2,shift2,shift2, & + 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & + 0.d0, s_, size(s_,1)) + + do k=1,shift2 + s2(k) = s_(k,k) + enddo + + ! Express eigenvectors of h in the determinant basis: + ! --------------------------------------------------- + + ! y(:,k) = rk + ! U(:,k) = Bk + ! U(:,shift2+k) = Rk = Bk x rk + call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 & + , U, size(U, 1), y, size(y, 1) & + , 0.d0, U(1,shift2+1), size(U, 1) ) + + do k = 1, N_st_diag + call normalize(U(1,shift2+k), sze) + enddo + + ! --- + ! select the max overlap + + ! + ! start test ------------------------------------------------------------------------ + ! + !double precision, allocatable :: Utest(:,:), Otest(:) + !allocate( Utest(sze,shift2), Otest(shift2) ) + + !call dgemm( 'N', 'N', sze, shift2, shift2, 1.d0 & + ! , U, size(U, 1), y, size(y, 1), 0.d0, Utest(1,1), size(Utest, 1) ) + !do k = 1, shift2 + ! call normalize(Utest(1,k), sze) + !enddo + !do j = 1, sze + ! write(455, '(100(1X, F16.10))') (Utest(j,k), k=1,shift2) + !enddo + + !do k = 1, shift2 + ! Otest(k) = 0.d0 + ! do i = 1, sze + ! Otest(k) += Utest(i,k) * u_in(i,1) + ! enddo + ! Otest(k) = dabs(Otest(k)) + ! print *, ' Otest =', k, Otest(k), lambda(k) + !enddo + + !deallocate(Utest, Otest) + ! + ! end test ------------------------------------------------------------------------ + ! + + ! TODO + ! state_following is more efficient + do l = 1, N_st + + allocate( overlap(N_st_diag) ) + + do k = 1, N_st_diag + overlap(k) = 0.d0 + do i = 1, sze + overlap(k) = overlap(k) + U(i,shift2+k) * u_in(i,l) + enddo + overlap(k) = dabs(overlap(k)) + !print *, ' overlap =', k, overlap(k) + enddo + + lambda_tmp = 0.d0 + do k = 1, N_st_diag + if(overlap(k) .gt. lambda_tmp) then + i_omax(l) = k + lambda_tmp = overlap(k) + endif + enddo + + deallocate(overlap) + + if(lambda_tmp .lt. 0.7d0) then + print *, ' very small overlap ...', l, i_omax(l) + print *, ' max overlap = ', lambda_tmp + stop + endif + + if(i_omax(l) .ne. l) then + print *, ' !!! WARNONG !!!' + print *, ' index of state', l, i_omax(l) + endif + enddo + + ! y(:,k) = rk + ! W(:,k) = H x Bk + ! W(:,shift2+k) = H x Bk x rk + ! = Wk + call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 & + , W, size(W, 1), y, size(y, 1) & + , 0.d0, W(1,shift2+1), size(W, 1) ) + + ! --- + + ! Compute residual vector and davidson step + ! ----------------------------------------- + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k) + do k = 1, N_st_diag + do i = 1, sze + U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k)) / max(H_jj(i)-lambda(k), 1.d-2) + enddo + if(k <= N_st) then + l = k + residual_norm(k) = u_dot_u(U(1,shift2+l), sze) + to_print(1,k) = lambda(l) + to_print(2,k) = s2(l) + to_print(3,k) = residual_norm(l) + endif + enddo + !$OMP END PARALLEL DO + !residual_norm(1) = u_dot_u(U(1,shift2+1), sze) + !to_print(1,1) = lambda(1) + !to_print(2,1) = residual_norm(1) + + + if( (itertot > 1) .and. (iter == 1) ) then + !don't print + continue + else + write(*, '(1X, I3, 1X, 100(1X, F16.10, 1X, F16.10, 1X, F16.10))') iter-1, to_print(1:3,1:N_st) + endif + + ! Check convergence + if(iter > 1) then + converged = dabs(maxval(residual_norm(1:N_st))) < threshold_nonsym_davidson + endif + + do k = 1, N_st + if(residual_norm(k) > 1.e8) then + print *, 'Davidson failed' + stop -1 + endif + enddo + if(converged) then + exit + endif + + logical, external :: qp_stop + if(qp_stop()) then + converged = .True. + exit + endif + + enddo ! loop over iter + + + ! Re-contract U and update W + ! -------------------------------- + + call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 & + , W, size(W, 1), y, size(y, 1) & + , 0.d0, u_in, size(u_in, 1) ) + do k = 1, N_st_diag + do i = 1, sze + W(i,k) = u_in(i,k) + enddo + enddo + + call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 & + , U, size(U, 1), y, size(y, 1) & + , 0.d0, u_in, size(u_in, 1) ) + do k = 1, N_st_diag + do i = 1, sze + U(i,k) = u_in(i,k) + enddo + enddo + + call ortho_qr(U, size(U, 1), sze, N_st_diag) + call ortho_qr(U, size(U, 1), sze, N_st_diag) + do j = 1, N_st_diag + k = 1 + do while( (k < sze) .and. (U(k,j) == 0.d0) ) + k = k+1 + enddo + if(U(k,j) * u_in(k,j) < 0.d0) then + do i = 1, sze + W(i,j) = -W(i,j) + enddo + endif + enddo + + enddo ! loop over while + + ! --- + + do k = 1, N_st + energies(k) = lambda(k) + enddo + write_buffer = '=====' + do i = 1, N_st + write_buffer = trim(write_buffer)//' ================ ===========' + enddo + write(6,'(A)') trim(write_buffer) + write(6,'(A)') '' + call write_time(6) + + deallocate(W) + deallocate(U, h, y, lambda, residual_norm, i_omax) + + FREE nthreads_davidson + +end subroutine davidson_general_ext_rout_nonsym_b1space + +! --- diff --git a/src/tc_bi_ortho/test_s2_tc.irp.f b/src/tc_bi_ortho/test_s2_tc.irp.f new file mode 100644 index 00000000..a5241fe3 --- /dev/null +++ b/src/tc_bi_ortho/test_s2_tc.irp.f @@ -0,0 +1,157 @@ +program test_tc + implicit none + read_wf = .True. + my_grid_becke = .True. + my_n_pt_r_grid = 30 + my_n_pt_a_grid = 50 + read_wf = .True. + touch read_wf + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + call routine_test_s2 + call routine_test_s2_davidson +end + +subroutine routine_test_s2 + implicit none + logical :: do_right + integer :: sze ,i, N_st, j + double precision :: sij, accu_e, accu_s, accu_e_0, accu_s_0 + double precision, allocatable :: v_0_ref(:,:),u_0(:,:),s_0_ref(:,:) + double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) + sze = N_det + N_st = 1 + allocate(v_0_ref(N_det,1),u_0(N_det,1),s_0_ref(N_det,1),s_0_new(N_det,1),v_0_new(N_det,1)) + print*,'Checking first the Left ' + do_right = .False. + do i = 1, sze + u_0(i,1) = psi_l_coef_bi_ortho(i,1) + enddo + call H_tc_u_0_nstates_openmp(v_0_ref,u_0,N_st,sze, do_right) + s_0_ref = 0.d0 + do i = 1, sze + do j = 1, sze + call get_s2(psi_det(1,1,i),psi_det(1,1,j),N_int,sij) + s_0_ref(i,1) += u_0(j,1) * sij + enddo + enddo + call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,u_0,N_st,sze, do_right) + accu_e = 0.d0 + accu_s = 0.d0 + accu_e_0 = 0.d0 + accu_s_0 = 0.d0 + do i = 1, sze + accu_e_0 += v_0_ref(i,1) * psi_r_coef_bi_ortho(i,1) + accu_s_0 += s_0_ref(i,1) * psi_r_coef_bi_ortho(i,1) + accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) + accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) + enddo + print*,'accu_e = ',accu_e + print*,'accu_s = ',accu_s + print*,'accu_e_0 = ',accu_e_0 + print*,'accu_s_0 = ',accu_s_0 + + print*,'Checking then the right ' + do_right = .True. + do i = 1, sze + u_0(i,1) = psi_r_coef_bi_ortho(i,1) + enddo + call H_tc_u_0_nstates_openmp(v_0_ref,u_0,N_st,sze, do_right) + s_0_ref = 0.d0 + do i = 1, sze + do j = 1, sze + call get_s2(psi_det(1,1,i),psi_det(1,1,j),N_int,sij) + s_0_ref(i,1) += u_0(j,1) * sij + enddo + enddo + call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,u_0,N_st,sze, do_right) + accu_e = 0.d0 + accu_s = 0.d0 + accu_e_0 = 0.d0 + accu_s_0 = 0.d0 + do i = 1, sze + accu_e_0 += v_0_ref(i,1) * psi_l_coef_bi_ortho(i,1) + accu_s_0 += s_0_ref(i,1) * psi_l_coef_bi_ortho(i,1) + accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) + accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) + enddo + print*,'accu_e = ',accu_e + print*,'accu_s = ',accu_s + print*,'accu_e_0 = ',accu_e_0 + print*,'accu_s_0 = ',accu_s_0 + + +end + +subroutine routine_test_s2_davidson + implicit none + double precision, allocatable :: H_jj(:),vec_tmp(:,:), energies(:) + integer :: i,istate + logical :: converged + external H_tc_s2_dagger_u_0_opt + external H_tc_s2_u_0_opt + allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),energies(n_states_diag)) + do i = 1, N_det + call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) + enddo + ! Preparing the left-eigenvector + print*,'Computing the left-eigenvector ' + vec_tmp = 0.d0 + do istate = 1, N_states + vec_tmp(1:N_det,istate) = psi_l_coef_bi_ortho(1:N_det,istate) + enddo + do istate = N_states+1, n_states_diag + vec_tmp(istate,istate) = 1.d0 + enddo + do istate = 1, N_states + leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) + enddo + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) + print*,'energies = ',energies + double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) + integer :: sze,N_st + logical :: do_right + sze = N_det + N_st = 1 + do_right = .False. + allocate(s_0_new(N_det,1),v_0_new(N_det,1)) + call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,vec_tmp,N_st,sze, do_right) + double precision :: accu_e_0, accu_s_0 + accu_e_0 = 0.d0 + accu_s_0 = 0.d0 + do i = 1, sze + accu_e_0 += v_0_new(i,1) * vec_tmp(i,1) + accu_s_0 += s_0_new(i,1) * vec_tmp(i,1) + enddo + print*,'accu_e_0',accu_e_0 + print*,'accu_s_0',accu_s_0 + + ! Preparing the right-eigenvector + print*,'Computing the right-eigenvector ' + vec_tmp = 0.d0 + do istate = 1, N_states + vec_tmp(1:N_det,istate) = psi_r_coef_bi_ortho(1:N_det,istate) + enddo + do istate = N_states+1, n_states_diag + vec_tmp(istate,istate) = 1.d0 + enddo + do istate = 1, N_states + leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) + enddo + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_u_0_opt) + print*,'energies = ',energies + sze = N_det + N_st = 1 + do_right = .True. + v_0_new = 0.d0 + s_0_new = 0.d0 + call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,vec_tmp,N_st,sze, do_right) + accu_e_0 = 0.d0 + accu_s_0 = 0.d0 + do i = 1, sze + accu_e_0 += v_0_new(i,1) * vec_tmp(i,1) + accu_s_0 += s_0_new(i,1) * vec_tmp(i,1) + enddo + print*,'accu_e_0',accu_e_0 + print*,'accu_s_0',accu_s_0 + +end From 367abb3d70a452eec981febbd1a5999f91be9bd7 Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 10 Apr 2023 19:37:54 +0200 Subject: [PATCH 025/337] S2 OK in TC --- src/tc_bi_ortho/dav_h_tc_s2.irp.f | 4 +- src/tc_bi_ortho/h_tc_s2_u0.irp.f | 30 ++++ src/tc_bi_ortho/tc_h_eigvectors.irp.f | 201 +++++++++++++++++++------- src/tc_bi_ortho/test_s2_tc.irp.f | 14 +- 4 files changed, 189 insertions(+), 60 deletions(-) diff --git a/src/tc_bi_ortho/dav_h_tc_s2.irp.f b/src/tc_bi_ortho/dav_h_tc_s2.irp.f index 02aa712b..c0ea054a 100644 --- a/src/tc_bi_ortho/dav_h_tc_s2.irp.f +++ b/src/tc_bi_ortho/dav_h_tc_s2.irp.f @@ -1,7 +1,7 @@ ! --- -subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, energies, sze, N_st, N_st_diag_in, converged, hcalc) +subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N_st_diag_in, converged, hcalc) use mmap_module @@ -30,6 +30,7 @@ subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, energies, sze, N_st, N_st_dia logical, intent(inout) :: converged double precision, intent(inout) :: u_in(sze,N_st_diag_in) double precision, intent(out) :: energies(N_st) + double precision, intent(inout) :: s2_out(N_st) external hcalc character*(16384) :: write_buffer @@ -528,6 +529,7 @@ subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, energies, sze, N_st, N_st_dia do k = 1, N_st energies(k) = lambda(k) + s2_out(k) = s2(k) enddo write_buffer = '=====' do i = 1, N_st diff --git a/src/tc_bi_ortho/h_tc_s2_u0.irp.f b/src/tc_bi_ortho/h_tc_s2_u0.irp.f index 5a9f5e69..30b0f273 100644 --- a/src/tc_bi_ortho/h_tc_s2_u0.irp.f +++ b/src/tc_bi_ortho/h_tc_s2_u0.irp.f @@ -1,3 +1,33 @@ + +subroutine get_H_tc_s2_l0_r0(l_0,r_0,N_st,sze,energies, s2) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $e_0 = \langle l_0 | H | r_0\rangle$. + ! + ! Computes $s_0 = \langle l_0 | S^2 | r_0\rangle$. + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + integer, intent(in) :: N_st,sze + double precision, intent(in) :: l_0(sze,N_st), r_0(sze,N_st) + double precision, intent(out) :: energies(N_st), s2(N_st) + logical :: do_right + integer :: istate + double precision, allocatable :: s_0(:,:), v_0(:,:) + double precision :: u_dot_v, norm + allocate(s_0(sze,N_st), v_0(sze,N_st)) + do_right = .True. + call H_tc_s2_u_0_opt(v_0,s_0,r_0,N_st,sze) + do istate = 1, N_st + norm = u_dot_v(l_0(1,istate),r_0(1,istate),sze) + energies(istate) = u_dot_v(l_0(1,istate),v_0(1,istate),sze)/norm + s2(istate) = u_dot_v(l_0(1,istate),s_0(1,istate),sze)/norm + enddo +end + subroutine H_tc_s2_u_0_opt(v_0,s_0,u_0,N_st,sze) use bitmasks implicit none diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index 11a14b41..71dad8d6 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -35,6 +35,7 @@ end &BEGIN_PROVIDER [double precision, eigval_left_tc_bi_orth, (N_states)] &BEGIN_PROVIDER [double precision, reigvec_tc_bi_orth, (N_det,N_states)] &BEGIN_PROVIDER [double precision, leigvec_tc_bi_orth, (N_det,N_states)] +&BEGIN_PROVIDER [double precision, s2_eigvec_tc_bi_orth, (N_states)] &BEGIN_PROVIDER [double precision, norm_ground_left_right_bi_orth ] BEGIN_DOC @@ -46,64 +47,153 @@ end logical :: converged, dagger integer :: n_real_tc_bi_orth_eigval_right,igood_r,igood_l double precision, allocatable :: reigvec_tc_bi_orth_tmp(:,:),leigvec_tc_bi_orth_tmp(:,:),eigval_right_tmp(:) + double precision, allocatable :: s2_values_tmp(:), H_prime(:,:), expect_e(:) + double precision, parameter :: alpha = 0.1d0 + integer :: i_good_state,i_other_state, i_state + integer, allocatable :: index_good_state_array(:) + logical, allocatable :: good_state_array(:) + double precision, allocatable :: coef_hf_r(:),coef_hf_l(:) + integer, allocatable :: iorder(:) PROVIDE N_det N_int if(n_det.le.N_det_max_full)then - allocate(reigvec_tc_bi_orth_tmp(N_det,N_det),leigvec_tc_bi_orth_tmp(N_det,N_det),eigval_right_tmp(N_det)) - call non_hrmt_real_diag(N_det,htilde_matrix_elmt_bi_ortho,& + allocate(reigvec_tc_bi_orth_tmp(N_det,N_det),leigvec_tc_bi_orth_tmp(N_det,N_det),eigval_right_tmp(N_det),expect_e(N_det)) + allocate (H_prime(N_det,N_det),s2_values_tmp(N_det)) + H_prime(1:N_det,1:N_det) = htilde_matrix_elmt_bi_ortho(1:N_det,1:N_det) + if(s2_eig)then + H_prime(1:N_det,1:N_det) += alpha * S2_matrix_all_dets(1:N_det,1:N_det) + do j=1,N_det + H_prime(j,j) = H_prime(j,j) - alpha*expected_s2 + enddo + endif + call non_hrmt_real_diag(N_det,H_prime,& leigvec_tc_bi_orth_tmp,reigvec_tc_bi_orth_tmp,& n_real_tc_bi_orth_eigval_right,eigval_right_tmp) - double precision, allocatable :: coef_hf_r(:),coef_hf_l(:) - integer, allocatable :: iorder(:) - allocate(coef_hf_r(N_det),coef_hf_l(N_det),iorder(N_det)) - do i = 1,N_det - iorder(i) = i - coef_hf_r(i) = -dabs(reigvec_tc_bi_orth_tmp(index_HF_psi_det,i)) - enddo - call dsort(coef_hf_r,iorder,N_det) - igood_r = iorder(1) - print*,'igood_r, coef_hf_r = ',igood_r,coef_hf_r(1) - do i = 1,N_det - iorder(i) = i - coef_hf_l(i) = -dabs(leigvec_tc_bi_orth_tmp(index_HF_psi_det,i)) - enddo - call dsort(coef_hf_l,iorder,N_det) - igood_l = iorder(1) - print*,'igood_l, coef_hf_l = ',igood_l,coef_hf_l(1) - - if(igood_r.ne.igood_l.and.igood_r.ne.1)then - print *,'' - print *,'Warning, the left and right eigenvectors are "not the same" ' - print *,'Warning, the ground state is not dominated by HF...' - print *,'State with largest RIGHT coefficient of HF ',igood_r - print *,'coef of HF in RIGHT eigenvector = ',reigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_r) - print *,'State with largest LEFT coefficient of HF ',igood_l - print *,'coef of HF in LEFT eigenvector = ',leigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_l) - endif - if(state_following_tc)then - print *,'Following the states with the largest coef on HF' - print *,'igood_r,igood_l',igood_r,igood_l - i= igood_r - eigval_right_tc_bi_orth(1) = eigval_right_tmp(i) - do j = 1, N_det - reigvec_tc_bi_orth(j,1) = reigvec_tc_bi_orth_tmp(j,i) -! print*,reigvec_tc_bi_orth(j,1) - enddo - i= igood_l - eigval_left_tc_bi_orth(1) = eigval_right_tmp(i) - do j = 1, N_det - leigvec_tc_bi_orth(j,1) = leigvec_tc_bi_orth_tmp(j,i) - enddo - else - do i = 1, N_states - eigval_right_tc_bi_orth(i) = eigval_right_tmp(i) - eigval_left_tc_bi_orth(i) = eigval_right_tmp(i) - do j = 1, N_det - reigvec_tc_bi_orth(j,i) = reigvec_tc_bi_orth_tmp(j,i) - leigvec_tc_bi_orth(j,i) = leigvec_tc_bi_orth_tmp(j,i) +! do i = 1, N_det +! call get_H_tc_s2_l0_r0(leigvec_tc_bi_orth_tmp(1,i),reigvec_tc_bi_orth_tmp(1,i),1,N_det,expect_e(i), s2_values_tmp(i)) +! enddo + call get_H_tc_s2_l0_r0(leigvec_tc_bi_orth_tmp,reigvec_tc_bi_orth_tmp,N_det,N_det,expect_e, s2_values_tmp) + allocate(index_good_state_array(N_det),good_state_array(N_det)) + i_state = 0 + good_state_array = .False. + if(s2_eig)then + if (only_expected_s2) then + do j=1,N_det + ! Select at least n_states states with S^2 values closed to "expected_s2" +! print*,'s2_values_tmp(j) = ',s2_values_tmp(j),eigval_right_tmp(j),expect_e(j) + if(dabs(s2_values_tmp(j)-expected_s2).le.0.5d0)then + i_state +=1 + index_good_state_array(i_state) = j + good_state_array(j) = .True. + endif + if(i_state.eq.N_states) then + exit + endif enddo - enddo + else + do j=1,N_det + index_good_state_array(j) = j + good_state_array(j) = .True. + enddo + endif + if(i_state .ne.0)then + ! Fill the first "i_state" states that have a correct S^2 value + do j = 1, i_state + do i=1,N_det + reigvec_tc_bi_orth(i,j) = reigvec_tc_bi_orth_tmp(i,index_good_state_array(j)) + leigvec_tc_bi_orth(i,j) = leigvec_tc_bi_orth_tmp(i,index_good_state_array(j)) + enddo + eigval_right_tc_bi_orth(j) = expect_e(index_good_state_array(j)) + eigval_left_tc_bi_orth(j) = expect_e(index_good_state_array(j)) + s2_eigvec_tc_bi_orth(j) = s2_values_tmp(index_good_state_array(j)) + enddo + i_other_state = 0 + do j = 1, N_det + if(good_state_array(j))cycle + i_other_state +=1 + if(i_state+i_other_state.gt.n_states)then + exit + endif + do i=1,N_det + reigvec_tc_bi_orth(i,i_state+i_other_state) = reigvec_tc_bi_orth_tmp(i,j) + leigvec_tc_bi_orth(i,i_state+i_other_state) = leigvec_tc_bi_orth_tmp(i,j) + enddo + eigval_right_tc_bi_orth(i_state+i_other_state) = eigval_right_tmp(j) + eigval_left_tc_bi_orth (i_state+i_other_state) = eigval_right_tmp(j) + s2_eigvec_tc_bi_orth(i_state+i_other_state) = s2_values_tmp(i_state+i_other_state) + enddo + else ! istate == 0 + print*,'' + print*,'!!!!!!!! WARNING !!!!!!!!!' + print*,' Within the ',N_det,'determinants selected' + print*,' and the ',N_states_diag,'states requested' + print*,' We did not find only states with S^2 values close to ',expected_s2 + print*,' We will then set the first N_states eigenvectors of the H matrix' + print*,' as the CI_eigenvectors' + print*,' You should consider more states and maybe ask for s2_eig to be .True. or just enlarge the CI space' + print*,'' + do j=1,min(N_states_diag,N_det) + do i=1,N_det + leigvec_tc_bi_orth(i,j) = leigvec_tc_bi_orth_tmp(i,j) + reigvec_tc_bi_orth(i,j) = reigvec_tc_bi_orth_tmp(i,j) + enddo + eigval_right_tc_bi_orth(j) = eigval_right_tmp(j) + eigval_left_tc_bi_orth (j) = eigval_right_tmp(j) + s2_eigvec_tc_bi_orth(j) = s2_values_tmp(j) + enddo + endif ! istate .ne. 0 + + else ! s2_eig + allocate(coef_hf_r(N_det),coef_hf_l(N_det),iorder(N_det)) + do i = 1,N_det + iorder(i) = i + coef_hf_r(i) = -dabs(reigvec_tc_bi_orth_tmp(index_HF_psi_det,i)) + enddo + call dsort(coef_hf_r,iorder,N_det) + igood_r = iorder(1) + print*,'igood_r, coef_hf_r = ',igood_r,coef_hf_r(1) + do i = 1,N_det + iorder(i) = i + coef_hf_l(i) = -dabs(leigvec_tc_bi_orth_tmp(index_HF_psi_det,i)) + enddo + call dsort(coef_hf_l,iorder,N_det) + igood_l = iorder(1) + print*,'igood_l, coef_hf_l = ',igood_l,coef_hf_l(1) + + if(igood_r.ne.igood_l.and.igood_r.ne.1)then + print *,'' + print *,'Warning, the left and right eigenvectors are "not the same" ' + print *,'Warning, the ground state is not dominated by HF...' + print *,'State with largest RIGHT coefficient of HF ',igood_r + print *,'coef of HF in RIGHT eigenvector = ',reigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_r) + print *,'State with largest LEFT coefficient of HF ',igood_l + print *,'coef of HF in LEFT eigenvector = ',leigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_l) + endif + if(state_following_tc)then + print *,'Following the states with the largest coef on HF' + print *,'igood_r,igood_l',igood_r,igood_l + i= igood_r + eigval_right_tc_bi_orth(1) = eigval_right_tmp(i) + do j = 1, N_det + reigvec_tc_bi_orth(j,1) = reigvec_tc_bi_orth_tmp(j,i) +! print*,reigvec_tc_bi_orth(j,1) + enddo + i= igood_l + eigval_left_tc_bi_orth(1) = eigval_right_tmp(i) + do j = 1, N_det + leigvec_tc_bi_orth(j,1) = leigvec_tc_bi_orth_tmp(j,i) + enddo + else + do i = 1, N_states + eigval_right_tc_bi_orth(i) = eigval_right_tmp(i) + eigval_left_tc_bi_orth(i) = eigval_right_tmp(i) + do j = 1, N_det + reigvec_tc_bi_orth(j,i) = reigvec_tc_bi_orth_tmp(j,i) + leigvec_tc_bi_orth(j,i) = leigvec_tc_bi_orth_tmp(j,i) + enddo + enddo + endif endif else double precision, allocatable :: H_jj(:),vec_tmp(:,:) @@ -111,6 +201,8 @@ end external htcdag_bi_ortho_calc_tdav external H_tc_u_0_opt external H_tc_dagger_u_0_opt + external H_tc_s2_dagger_u_0_opt + external H_tc_s2_u_0_opt allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag)) do i = 1, N_det call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) @@ -125,7 +217,8 @@ end vec_tmp(istate,istate) = 1.d0 enddo ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, htcdag_bi_ortho_calc_tdav) - call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_dagger_u_0_opt) +! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_dagger_u_0_opt) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo @@ -140,7 +233,8 @@ end vec_tmp(istate,istate) = 1.d0 enddo ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) - call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt) +! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) do istate = 1, N_states reigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo @@ -154,6 +248,7 @@ end norm_ground_left_right_bi_orth += leigvec_tc_bi_orth(j,1) * reigvec_tc_bi_orth(j,1) enddo print*,'norm l/r = ',norm_ground_left_right_bi_orth + print*,' = ',s2_eigvec_tc_bi_orth(1) END_PROVIDER diff --git a/src/tc_bi_ortho/test_s2_tc.irp.f b/src/tc_bi_ortho/test_s2_tc.irp.f index a5241fe3..4229fef1 100644 --- a/src/tc_bi_ortho/test_s2_tc.irp.f +++ b/src/tc_bi_ortho/test_s2_tc.irp.f @@ -84,12 +84,12 @@ end subroutine routine_test_s2_davidson implicit none - double precision, allocatable :: H_jj(:),vec_tmp(:,:), energies(:) + double precision, allocatable :: H_jj(:),vec_tmp(:,:), energies(:) , s2(:) integer :: i,istate logical :: converged external H_tc_s2_dagger_u_0_opt external H_tc_s2_u_0_opt - allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),energies(n_states_diag)) + allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),energies(n_states_diag), s2(n_states_diag)) do i = 1, N_det call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) enddo @@ -105,8 +105,7 @@ subroutine routine_test_s2_davidson do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) - print*,'energies = ',energies + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) integer :: sze,N_st logical :: do_right @@ -122,6 +121,8 @@ subroutine routine_test_s2_davidson accu_e_0 += v_0_new(i,1) * vec_tmp(i,1) accu_s_0 += s_0_new(i,1) * vec_tmp(i,1) enddo + print*,'energies = ',energies + print*,'s2 = ',s2 print*,'accu_e_0',accu_e_0 print*,'accu_s_0',accu_s_0 @@ -137,8 +138,7 @@ subroutine routine_test_s2_davidson do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_u_0_opt) - print*,'energies = ',energies + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_u_0_opt) sze = N_det N_st = 1 do_right = .True. @@ -151,6 +151,8 @@ subroutine routine_test_s2_davidson accu_e_0 += v_0_new(i,1) * vec_tmp(i,1) accu_s_0 += s_0_new(i,1) * vec_tmp(i,1) enddo + print*,'energies = ',energies + print*,'s2 = ',s2 print*,'accu_e_0',accu_e_0 print*,'accu_s_0',accu_s_0 From 16955745f496ad76be8a1c1b9d001f02df8d9a1c Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Mon, 10 Apr 2023 20:41:16 +0200 Subject: [PATCH 026/337] fixed conflict after TC S^2 merge --- external/qp2-dependencies | 2 +- .../diagonalization_hs2_dressed.irp.f | 3 +- src/determinants/h_apply.irp.f | 103 ++- src/kohn_sham/print_mos.irp.f | 30 + src/nuclei/write_pt_charges.py | 18 +- src/tc_bi_ortho/dav_h_tc_s2.irp.f | 549 +++++++++++++ src/tc_bi_ortho/h_tc_s2_u0.irp.f | 769 ++++++++++++++++++ .../{u0_h_u0.irp.f => h_tc_u0.irp.f} | 3 - src/tc_bi_ortho/tc_bi_ortho.irp.f | 3 - src/tc_bi_ortho/tc_h_eigvectors.irp.f | 220 +++-- src/tc_bi_ortho/test_s2_tc.irp.f | 159 ++++ src/tc_scf/tc_scf.irp.f | 4 +- 12 files changed, 1754 insertions(+), 109 deletions(-) create mode 100644 src/kohn_sham/print_mos.irp.f create mode 100644 src/tc_bi_ortho/dav_h_tc_s2.irp.f create mode 100644 src/tc_bi_ortho/h_tc_s2_u0.irp.f rename src/tc_bi_ortho/{u0_h_u0.irp.f => h_tc_u0.irp.f} (99%) create mode 100644 src/tc_bi_ortho/test_s2_tc.irp.f diff --git a/external/qp2-dependencies b/external/qp2-dependencies index ce14f57b..6e23ebac 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit ce14f57b50511825a9fedb096749200779d3f4d4 +Subproject commit 6e23ebac001acae91d1c762ca934e09a9b7d614a diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index f4c05595..e6c6cac7 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -461,7 +461,8 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ integer :: lwork, info double precision, allocatable :: work(:) - y = h +! y = h + y = h_p lwork = -1 allocate(work(1)) call dsygv(1,'V','U',shift2,y,size(y,1), & diff --git a/src/determinants/h_apply.irp.f b/src/determinants/h_apply.irp.f index d01ad1c7..078c2104 100644 --- a/src/determinants/h_apply.irp.f +++ b/src/determinants/h_apply.irp.f @@ -69,9 +69,15 @@ subroutine resize_H_apply_buffer(new_size,iproc) END_DOC PROVIDE H_apply_buffer_allocated + ASSERT (new_size > 0) ASSERT (iproc >= 0) ASSERT (iproc < nproc) + if (N_det < 0) call abort() !irp_here//': N_det < 0') + if (N_int <= 0) call abort() !irp_here//': N_int <= 0') + if (new_size <= 0) call abort() !irp_here//': new_size <= 0') + if (iproc < 0) call abort() !irp_here//': iproc < 0') + if (iproc >= nproc) call abort() !irp_here//': iproc >= nproc') allocate ( buffer_det(N_int,2,new_size), & buffer_coef(new_size,N_states), & @@ -126,31 +132,34 @@ subroutine copy_H_apply_buffer_to_wf ASSERT (N_int > 0) - ASSERT (N_det > 0) + ASSERT (N_det >= 0) - allocate ( buffer_det(N_int,2,N_det), buffer_coef(N_det,N_states) ) + N_det_old = N_det + if (N_det > 0) then + allocate ( buffer_det(N_int,2,N_det), buffer_coef(N_det,N_states) ) - ! Backup determinants - j=0 - do i=1,N_det - if (pruned(i)) cycle ! Pruned determinants - j+=1 - ASSERT (sum(popcnt(psi_det(:,1,i))) == elec_alpha_num) - ASSERT (sum(popcnt(psi_det(:,2,i))) == elec_beta_num) - buffer_det(:,:,j) = psi_det(:,:,i) - enddo - N_det_old = j + ! Backup determinants + j=0 + do i=1,N_det + if (pruned(i)) cycle ! Pruned determinants + j+=1 + ASSERT (sum(popcnt(psi_det(:,1,i))) == elec_alpha_num) + ASSERT (sum(popcnt(psi_det(:,2,i))) == elec_beta_num) + buffer_det(:,:,j) = psi_det(:,:,i) + enddo + N_det_old = j - ! Backup coefficients - do k=1,N_states - j=0 - do i=1,N_det - if (pruned(i)) cycle ! Pruned determinants - j += 1 - buffer_coef(j,k) = psi_coef(i,k) - enddo - ASSERT ( j == N_det_old ) - enddo + ! Backup coefficients + do k=1,N_states + j=0 + do i=1,N_det + if (pruned(i)) cycle ! Pruned determinants + j += 1 + buffer_coef(j,k) = psi_coef(i,k) + enddo + ASSERT ( j == N_det_old ) + enddo + endif ! Update N_det N_det = N_det_old @@ -164,17 +173,19 @@ subroutine copy_H_apply_buffer_to_wf TOUCH psi_det_size endif - ! Restore backup in resized array - do i=1,N_det_old - psi_det(:,:,i) = buffer_det(:,:,i) - ASSERT (sum(popcnt(psi_det(:,1,i))) == elec_alpha_num) - ASSERT (sum(popcnt(psi_det(:,2,i))) == elec_beta_num ) - enddo - do k=1,N_states + if (N_det_old > 0) then + ! Restore backup in resized array do i=1,N_det_old - psi_coef(i,k) = buffer_coef(i,k) + psi_det(:,:,i) = buffer_det(:,:,i) + ASSERT (sum(popcnt(psi_det(:,1,i))) == elec_alpha_num) + ASSERT (sum(popcnt(psi_det(:,2,i))) == elec_beta_num ) enddo - enddo + do k=1,N_states + do i=1,N_det_old + psi_coef(i,k) = buffer_coef(i,k) + enddo + enddo + endif ! Copy new buffers @@ -339,3 +350,33 @@ subroutine fill_H_apply_buffer_no_selection(n_selected,det_buffer,Nint,iproc) call omp_unset_lock(H_apply_buffer_lock(1,iproc)) end + +subroutine replace_wf(N_det_new, LDA, psi_coef_new, psi_det_new) + use omp_lib + implicit none + BEGIN_DOC +! Replaces the wave function. +! After calling this subroutine, N_det, psi_det and psi_coef need to be touched + END_DOC + integer, intent(in) :: N_det_new, LDA + double precision, intent(in) :: psi_coef_new(LDA,N_states) + integer(bit_kind), intent(in) :: psi_det_new(N_int,2,N_det_new) + + integer :: i,j + + PROVIDE H_apply_buffer_allocated + + if (N_det_new <= 0) call abort() !irp_here//': N_det_new <= 0') + if (N_int <= 0) call abort() !irp_here//': N_int <= 0') + if (LDA < N_det_new) call abort() !irp_here//': LDA < N_det_new') + + do j=0,nproc-1 + H_apply_buffer(j)%N_det = 0 + enddo + N_det = 0 + SOFT_TOUCH N_det + call fill_H_apply_buffer_no_selection(N_det_new,psi_det_new,N_int,0) + call copy_h_apply_buffer_to_wf + psi_coef(1:N_det_new,1:N_states) = psi_coef_new(1:N_det_new,1:N_states) + +end diff --git a/src/kohn_sham/print_mos.irp.f b/src/kohn_sham/print_mos.irp.f new file mode 100644 index 00000000..5e728444 --- /dev/null +++ b/src/kohn_sham/print_mos.irp.f @@ -0,0 +1,30 @@ +program print_mos + implicit none + integer :: i,nx + double precision :: r(3), xmax, dx, accu + double precision, allocatable :: mos_array(:) + double precision:: alpha,envelop + allocate(mos_array(mo_num)) + xmax = 5.d0 + nx = 1000 + dx=xmax/dble(nx) + r = 0.d0 + alpha = 0.5d0 + do i = 1, nx + call give_all_mos_at_r(r,mos_array) + accu = mos_array(3)**2+mos_array(4)**2+mos_array(5)**2 + accu = dsqrt(accu) + envelop = (1.d0 - dexp(-alpha * r(3)**2)) + write(33,'(100(F16.10,X))')r(3), mos_array(1), mos_array(2), accu, envelop + r(3) += dx + enddo + +end + +double precision function f_mu(x) + implicit none + double precision, intent(in) :: x + + + +end diff --git a/src/nuclei/write_pt_charges.py b/src/nuclei/write_pt_charges.py index 6dbcd5b8..f5007090 100644 --- a/src/nuclei/write_pt_charges.py +++ b/src/nuclei/write_pt_charges.py @@ -21,7 +21,7 @@ def mv_in_ezfio(ezfio,tmp): os.system(cmdmv) -# Getting the EZFIO + ##Getting the EZFIO EZFIO=sys.argv[1] EZFIO=EZFIO.replace("/", "") print(EZFIO) @@ -66,8 +66,20 @@ zip_in_ezfio(EZFIO,tmp) tmp="pts_charge_coord" fcoord = open(tmp,'w') fcoord.write(" 2\n") -fcoord.write(" "+str(n_charges)+' 3\n') -#fcoord.write(" "+' 3 '+str(n_charges)+' \n') +if(n_charges < 10): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <100): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <1000): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <10000): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <100000): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <1000000): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <10000000): + fcoord.write(" "+str(n_charges)+' 3\n') for i in range(n_charges): fcoord.write(' '+coord_x[i]+'\n') for i in range(n_charges): diff --git a/src/tc_bi_ortho/dav_h_tc_s2.irp.f b/src/tc_bi_ortho/dav_h_tc_s2.irp.f new file mode 100644 index 00000000..c0ea054a --- /dev/null +++ b/src/tc_bi_ortho/dav_h_tc_s2.irp.f @@ -0,0 +1,549 @@ + +! --- + +subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N_st_diag_in, converged, hcalc) + + use mmap_module + + BEGIN_DOC + ! Generic modified-Davidson diagonalization + ! + ! H_jj : specific diagonal H matrix elements to diagonalize de Davidson + ! + ! u_in : guess coefficients on the various states. Overwritten on exit by right eigenvectors + ! + ! sze : Number of determinants + ! + ! N_st : Number of eigenstates + ! + ! N_st_diag_in : Number of states in which H is diagonalized. Assumed > N_st + ! + ! Initial guess vectors are not necessarily orthonormal + ! + ! hcalc subroutine to compute W = H U (see routine hcalc_template for template of input/output) + END_DOC + + implicit none + + integer, intent(in) :: sze, N_st, N_st_diag_in + double precision, intent(in) :: H_jj(sze) + logical, intent(inout) :: converged + double precision, intent(inout) :: u_in(sze,N_st_diag_in) + double precision, intent(out) :: energies(N_st) + double precision, intent(inout) :: s2_out(N_st) + external hcalc + + character*(16384) :: write_buffer + integer :: iter, N_st_diag + integer :: i, j, k, l, m + integer :: iter2, itertot + logical :: disk_based + integer :: shift, shift2, itermax + integer :: nproc_target + integer :: order(N_st_diag_in) + double precision :: to_print(3,N_st) + double precision :: r1, r2, alpha + double precision :: cpu, wall + double precision :: cmax + double precision :: energy_shift(N_st_diag_in*davidson_sze_max) + double precision, allocatable :: U(:,:) + double precision, allocatable :: y(:,:), h(:,:), lambda(:), h_p(:,:), s2(:) + real, allocatable :: y_s(:,:) + double precision, allocatable :: s_(:,:), s_tmp(:,:) + double precision, allocatable :: residual_norm(:) + + double precision :: lambda_tmp + integer, allocatable :: i_omax(:) + double precision, allocatable :: U_tmp(:), overlap(:), S_d(:,:) + + double precision, allocatable :: W(:,:) + real, pointer :: S(:,:) + + !double precision, pointer :: W(:,:) + double precision, external :: u_dot_v, u_dot_u + + + include 'constants.include.F' + + N_st_diag = N_st_diag_in +! print*,'trial vector' + do i = 1, sze + if(isnan(u_in(i,1)))then + print*,'pb in input vector of davidson_general_ext_rout_nonsym_b1space' + print*,i,u_in(i,1) + stop + else if (dabs(u_in(i,1)).lt.1.d-16)then + u_in(i,1) = 0.d0 + endif + enddo + + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, y_s, S_d, h, lambda + + if(N_st_diag*3 > sze) then + print *, 'error in Davidson :' + print *, 'Increase n_det_max_full to ', N_st_diag*3 + stop -1 + endif + + itermax = max(2, min(davidson_sze_max, sze/N_st_diag)) + 1 + + provide threshold_nonsym_davidson + call write_time(6) + write(6,'(A)') '' + write(6,'(A)') 'Davidson Diagonalization' + write(6,'(A)') '------------------------' + write(6,'(A)') '' + + + ! Find max number of cores to fit in memory + ! ----------------------------------------- + + nproc_target = nproc + double precision :: rss + integer :: maxab + maxab = sze + + m=1 + disk_based = .False. + call resident_memory(rss) + do + r1 = 8.d0 * &! bytes + ( dble(sze)*(N_st_diag*itermax) &! U + + 1.5d0*dble(sze*m)*(N_st_diag*itermax) &! W, S + + 4.5d0*(N_st_diag*itermax)**2 &! h,y,y_s,s_, s_tmp + + 2.d0*(N_st_diag*itermax) &! s2,lambda + + 1.d0*(N_st_diag) &! residual_norm + ! In H_S2_u_0_nstates_zmq + + 3.d0*(N_st_diag*N_det) &! u_t, v_t, s_t on collector + + 3.d0*(N_st_diag*N_det) &! u_t, v_t, s_t on slave + + 0.5d0*maxab &! idx0 in H_S2_u_0_nstates_openmp_work_* + + nproc_target * &! In OMP section + ( 1.d0*(N_int*maxab) &! buffer + + 3.5d0*(maxab) ) &! singles_a, singles_b, doubles, idx + ) / 1024.d0**3 + + if(nproc_target == 0) then + call check_mem(r1, irp_here) + nproc_target = 1 + exit + endif + + if(r1+rss < qp_max_mem) then + exit + endif + + if(itermax > 4) then + itermax = itermax - 1 +! else if (m==1.and.disk_based_davidson) then +! m = 0 +! disk_based = .True. +! itermax = 6 + else + nproc_target = nproc_target - 1 + endif + + enddo + + nthreads_davidson = nproc_target + TOUCH nthreads_davidson + + call write_int(6, N_st, 'Number of states') + call write_int(6, N_st_diag, 'Number of states in diagonalization') + call write_int(6, sze, 'Number of basis functions') + call write_int(6, nproc_target, 'Number of threads for diagonalization') + call write_double(6, r1, 'Memory(Gb)') + if(disk_based) then + print *, 'Using swap space to reduce RAM' + endif + + !--------------- + + write(6,'(A)') '' + write_buffer = '=====' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ =========== ===========' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + write_buffer = 'Iter' + do i=1,N_st + write_buffer = trim(write_buffer)//' Energy S^2 Residual ' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + write_buffer = '=====' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ =========== ===========' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + + + ! --- + + + allocate( W(sze,N_st_diag*itermax), S(sze,N_st_diag*itermax) ) + + allocate( & + ! Large + U(sze,N_st_diag*itermax), & + S_d(sze,N_st_diag), & + + ! Small + h(N_st_diag*itermax,N_st_diag*itermax), & + h_p(N_st_diag*itermax,N_st_diag*itermax), & + y(N_st_diag*itermax,N_st_diag*itermax), & + s_(N_st_diag*itermax,N_st_diag*itermax), & + s_tmp(N_st_diag*itermax,N_st_diag*itermax), & + lambda(N_st_diag*itermax), & + residual_norm(N_st_diag), & + i_omax(N_st), & + s2(N_st_diag*itermax), & + y_s(N_st_diag*itermax,N_st_diag*itermax) & + ) + + U = 0.d0 + h = 0.d0 + y = 0.d0 + s_ = 0.d0 + s_tmp = 0.d0 + + lambda = 0.d0 + residual_norm = 0.d0 + + + ASSERT (N_st > 0) + ASSERT (N_st_diag >= N_st) + ASSERT (sze > 0) + + ! Davidson iterations + ! =================== + + converged = .False. + + ! Initialize from N_st to N_st_diag with gaussian random numbers + ! to be sure to have overlap with any eigenvectors + do k = N_st+1, N_st_diag + u_in(k,k) = 10.d0 + do i = 1, sze + call random_number(r1) + call random_number(r2) + r1 = dsqrt(-2.d0*dlog(r1)) + r2 = dtwo_pi*r2 + u_in(i,k) = r1*dcos(r2) + enddo + enddo + ! Normalize all states + do k = 1, N_st_diag + call normalize(u_in(1,k), sze) + enddo + + ! Copy from the guess input "u_in" to the working vectors "U" + do k = 1, N_st_diag + do i = 1, sze + U(i,k) = u_in(i,k) + enddo + enddo + + ! --- + + itertot = 0 + + do while (.not.converged) + + itertot = itertot + 1 + if(itertot == 8) then + exit + endif + + do iter = 1, itermax-1 + + shift = N_st_diag * (iter-1) + shift2 = N_st_diag * iter + + if( (iter > 1) .or. (itertot == 1) ) then + + ! Gram-Schmidt to orthogonalize all new guess with the previous vectors + call ortho_qr(U, size(U, 1), sze, shift2) + call ortho_qr(U, size(U, 1), sze, shift2) + + ! W = H U +! call hcalc(W(1,shift+1), U(1,shift+1), N_st_diag, sze) + call hcalc(W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze) + S(1:sze,shift+1:shift+N_st_diag) = real(S_d(1:sze,1:N_st_diag)) + else + + ! Already computed in update below + continue + endif + ! Compute s_kl = = + ! ------------------------------------------- + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j,k) COLLAPSE(2) + do j=1,shift2 + do i=1,shift2 + s_(i,j) = 0.d0 + do k=1,sze + s_(i,j) = s_(i,j) + U(k,i) * dble(S(k,j)) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + + ! Compute h_kl = = + ! ------------------------------------------- + call dgemm( 'T', 'N', shift2, shift2, sze, 1.d0 & + , U, size(U, 1), W, size(W, 1) & + , 0.d0, h, size(h, 1) ) + ! Penalty method + ! -------------- + + if (s2_eig) then + h_p = s_ + do k=1,shift2 + h_p(k,k) = h_p(k,k) - expected_s2 + enddo + if (only_expected_s2) then + alpha = 0.1d0 + h_p = h + alpha*h_p + else + alpha = 0.0001d0 + h_p = h + alpha*h_p + endif + else + h_p = h + alpha = 0.d0 + endif + + ! Diagonalize h y = lambda y + ! --------------------------- + call diag_nonsym_right(shift2, h_p(1,1), size(h_p, 1), y(1,1), size(y, 1), lambda(1), size(lambda, 1)) + + do k = 1, N_st_diag +! print*,'lambda(k) before = ',lambda(k) + lambda(k) = 0.d0 + do l = 1, shift2 + do m = 1, shift2 + lambda(k) += y(m,k) * h(m,l) * y(l,k) + enddo + enddo +! print*,'lambda(k) new = ',lambda(k) + enddo + ! Compute S2 for each eigenvector + ! ------------------------------- + + call dgemm('N','N',shift2,shift2,shift2, & + 1.d0, s_, size(s_,1), y, size(y,1), & + 0.d0, s_tmp, size(s_tmp,1)) + + call dgemm('T','N',shift2,shift2,shift2, & + 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & + 0.d0, s_, size(s_,1)) + + do k=1,shift2 + s2(k) = s_(k,k) + enddo + + ! Express eigenvectors of h in the determinant basis: + ! --------------------------------------------------- + + ! y(:,k) = rk + ! U(:,k) = Bk + ! U(:,shift2+k) = Rk = Bk x rk + call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 & + , U, size(U, 1), y, size(y, 1) & + , 0.d0, U(1,shift2+1), size(U, 1) ) + + do k = 1, N_st_diag + call normalize(U(1,shift2+k), sze) + enddo + + ! --- + ! select the max overlap + + ! + ! start test ------------------------------------------------------------------------ + ! + !double precision, allocatable :: Utest(:,:), Otest(:) + !allocate( Utest(sze,shift2), Otest(shift2) ) + + !call dgemm( 'N', 'N', sze, shift2, shift2, 1.d0 & + ! , U, size(U, 1), y, size(y, 1), 0.d0, Utest(1,1), size(Utest, 1) ) + !do k = 1, shift2 + ! call normalize(Utest(1,k), sze) + !enddo + !do j = 1, sze + ! write(455, '(100(1X, F16.10))') (Utest(j,k), k=1,shift2) + !enddo + + !do k = 1, shift2 + ! Otest(k) = 0.d0 + ! do i = 1, sze + ! Otest(k) += Utest(i,k) * u_in(i,1) + ! enddo + ! Otest(k) = dabs(Otest(k)) + ! print *, ' Otest =', k, Otest(k), lambda(k) + !enddo + + !deallocate(Utest, Otest) + ! + ! end test ------------------------------------------------------------------------ + ! + + ! TODO + ! state_following is more efficient + do l = 1, N_st + + allocate( overlap(N_st_diag) ) + + do k = 1, N_st_diag + overlap(k) = 0.d0 + do i = 1, sze + overlap(k) = overlap(k) + U(i,shift2+k) * u_in(i,l) + enddo + overlap(k) = dabs(overlap(k)) + !print *, ' overlap =', k, overlap(k) + enddo + + lambda_tmp = 0.d0 + do k = 1, N_st_diag + if(overlap(k) .gt. lambda_tmp) then + i_omax(l) = k + lambda_tmp = overlap(k) + endif + enddo + + deallocate(overlap) + + if(lambda_tmp .lt. 0.7d0) then + print *, ' very small overlap ...', l, i_omax(l) + print *, ' max overlap = ', lambda_tmp + stop + endif + + if(i_omax(l) .ne. l) then + print *, ' !!! WARNONG !!!' + print *, ' index of state', l, i_omax(l) + endif + enddo + + ! y(:,k) = rk + ! W(:,k) = H x Bk + ! W(:,shift2+k) = H x Bk x rk + ! = Wk + call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 & + , W, size(W, 1), y, size(y, 1) & + , 0.d0, W(1,shift2+1), size(W, 1) ) + + ! --- + + ! Compute residual vector and davidson step + ! ----------------------------------------- + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k) + do k = 1, N_st_diag + do i = 1, sze + U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k)) / max(H_jj(i)-lambda(k), 1.d-2) + enddo + if(k <= N_st) then + l = k + residual_norm(k) = u_dot_u(U(1,shift2+l), sze) + to_print(1,k) = lambda(l) + to_print(2,k) = s2(l) + to_print(3,k) = residual_norm(l) + endif + enddo + !$OMP END PARALLEL DO + !residual_norm(1) = u_dot_u(U(1,shift2+1), sze) + !to_print(1,1) = lambda(1) + !to_print(2,1) = residual_norm(1) + + + if( (itertot > 1) .and. (iter == 1) ) then + !don't print + continue + else + write(*, '(1X, I3, 1X, 100(1X, F16.10, 1X, F16.10, 1X, F16.10))') iter-1, to_print(1:3,1:N_st) + endif + + ! Check convergence + if(iter > 1) then + converged = dabs(maxval(residual_norm(1:N_st))) < threshold_nonsym_davidson + endif + + do k = 1, N_st + if(residual_norm(k) > 1.e8) then + print *, 'Davidson failed' + stop -1 + endif + enddo + if(converged) then + exit + endif + + logical, external :: qp_stop + if(qp_stop()) then + converged = .True. + exit + endif + + enddo ! loop over iter + + + ! Re-contract U and update W + ! -------------------------------- + + call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 & + , W, size(W, 1), y, size(y, 1) & + , 0.d0, u_in, size(u_in, 1) ) + do k = 1, N_st_diag + do i = 1, sze + W(i,k) = u_in(i,k) + enddo + enddo + + call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 & + , U, size(U, 1), y, size(y, 1) & + , 0.d0, u_in, size(u_in, 1) ) + do k = 1, N_st_diag + do i = 1, sze + U(i,k) = u_in(i,k) + enddo + enddo + + call ortho_qr(U, size(U, 1), sze, N_st_diag) + call ortho_qr(U, size(U, 1), sze, N_st_diag) + do j = 1, N_st_diag + k = 1 + do while( (k < sze) .and. (U(k,j) == 0.d0) ) + k = k+1 + enddo + if(U(k,j) * u_in(k,j) < 0.d0) then + do i = 1, sze + W(i,j) = -W(i,j) + enddo + endif + enddo + + enddo ! loop over while + + ! --- + + do k = 1, N_st + energies(k) = lambda(k) + s2_out(k) = s2(k) + enddo + write_buffer = '=====' + do i = 1, N_st + write_buffer = trim(write_buffer)//' ================ ===========' + enddo + write(6,'(A)') trim(write_buffer) + write(6,'(A)') '' + call write_time(6) + + deallocate(W) + deallocate(U, h, y, lambda, residual_norm, i_omax) + + FREE nthreads_davidson + +end subroutine davidson_general_ext_rout_nonsym_b1space + +! --- diff --git a/src/tc_bi_ortho/h_tc_s2_u0.irp.f b/src/tc_bi_ortho/h_tc_s2_u0.irp.f new file mode 100644 index 00000000..30b0f273 --- /dev/null +++ b/src/tc_bi_ortho/h_tc_s2_u0.irp.f @@ -0,0 +1,769 @@ + +subroutine get_H_tc_s2_l0_r0(l_0,r_0,N_st,sze,energies, s2) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $e_0 = \langle l_0 | H | r_0\rangle$. + ! + ! Computes $s_0 = \langle l_0 | S^2 | r_0\rangle$. + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + integer, intent(in) :: N_st,sze + double precision, intent(in) :: l_0(sze,N_st), r_0(sze,N_st) + double precision, intent(out) :: energies(N_st), s2(N_st) + logical :: do_right + integer :: istate + double precision, allocatable :: s_0(:,:), v_0(:,:) + double precision :: u_dot_v, norm + allocate(s_0(sze,N_st), v_0(sze,N_st)) + do_right = .True. + call H_tc_s2_u_0_opt(v_0,s_0,r_0,N_st,sze) + do istate = 1, N_st + norm = u_dot_v(l_0(1,istate),r_0(1,istate),sze) + energies(istate) = u_dot_v(l_0(1,istate),v_0(1,istate),sze)/norm + s2(istate) = u_dot_v(l_0(1,istate),s_0(1,istate),sze)/norm + enddo +end + +subroutine H_tc_s2_u_0_opt(v_0,s_0,u_0,N_st,sze) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $v_0 = H | u_0\rangle$. + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + integer, intent(in) :: N_st,sze + double precision, intent(inout) :: v_0(sze,N_st), u_0(sze,N_st), s_0(sze,N_st) + logical :: do_right + do_right = .True. + call H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right) +end + +subroutine H_tc_s2_dagger_u_0_opt(v_0,s_0,u_0,N_st,sze) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $v_0 = H | u_0\rangle$. + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + integer, intent(in) :: N_st,sze + double precision, intent(inout) :: v_0(sze,N_st), u_0(sze,N_st), s_0(sze,N_st) + logical :: do_right + do_right = .False. + call H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right) +end + + +subroutine H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $v_0 = H | u_0\rangle$. + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + ! + ! if do_right == True then you compute H_TC |Psi>, else H_TC^T |Psi> + END_DOC + integer, intent(in) :: N_st,sze + double precision, intent(inout) :: v_0(sze,N_st), u_0(sze,N_st), s_0(sze,N_st) + logical, intent(in) :: do_right + integer :: k + double precision, allocatable :: u_t(:,:), v_t(:,:), s_t(:,:) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t + allocate(u_t(N_st,N_det),v_t(N_st,N_det),s_t(N_st,N_det)) + do k=1,N_st + call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det) + enddo + v_t = 0.d0 + s_t = 0.d0 + call dtranspose( & + u_0, & + size(u_0, 1), & + u_t, & + size(u_t, 1), & + N_det, N_st) + + call H_tc_s2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,1,N_det,0,1, do_right) + deallocate(u_t) + + call dtranspose( & + v_t, & + size(v_t, 1), & + v_0, & + size(v_0, 1), & + N_st, N_det) + call dtranspose( & + s_t, & + size(s_t, 1), & + s_0, & + size(s_0, 1), & + N_st, N_det) + deallocate(v_t,s_t) + + do k=1,N_st + call dset_order(v_0(1,k),psi_bilinear_matrix_order_reverse,N_det) + call dset_order(s_0(1,k),psi_bilinear_matrix_order_reverse,N_det) + call dset_order(u_0(1,k),psi_bilinear_matrix_order_reverse,N_det) + enddo + +end + + +subroutine H_tc_s2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep, do_right) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $v_t = H | u_t\rangle$ + ! + ! Default should be 1,N_det,0,1 + ! + ! if do_right == True then you compute H_TC |Psi>, else H_TC^T |Psi> + END_DOC + integer, intent(in) :: N_st,sze,istart,iend,ishift,istep + double precision, intent(in) :: u_t(N_st,N_det) + logical, intent(in) :: do_right + double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze) + + + PROVIDE ref_bitmask_energy N_int + + select case (N_int) + case (1) + call H_tc_s2_u_0_nstates_openmp_work_1(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + case (2) + call H_tc_s2_u_0_nstates_openmp_work_2(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + case (3) + call H_tc_s2_u_0_nstates_openmp_work_3(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + case (4) + call H_tc_s2_u_0_nstates_openmp_work_4(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + case default + call H_tc_s2_u_0_nstates_openmp_work_N_int(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + end select +end +BEGIN_TEMPLATE + +subroutine H_tc_s2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $v_t = H | u_t \\rangle$ and $s_t = S^2 | u_t\\rangle$ + ! + ! Default should be 1,N_det,0,1 + ! + ! if do_right == True then you compute H_TC |Psi>, else H_TC^T |Psi> + END_DOC + integer, intent(in) :: N_st,sze,istart,iend,ishift,istep + double precision, intent(in) :: u_t(N_st,N_det) + logical, intent(in) :: do_right + double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze) + + double precision :: hij, sij + integer :: i,j,k,l,kk + integer :: k_a, k_b, l_a, l_b, m_a, m_b + integer :: istate + integer :: krow, kcol, krow_b, kcol_b + integer :: lrow, lcol + integer :: mrow, mcol + integer(bit_kind) :: spindet($N_int) + integer(bit_kind) :: tmp_det($N_int,2) + integer(bit_kind) :: tmp_det2($N_int,2) + integer(bit_kind) :: tmp_det3($N_int,2) + integer(bit_kind), allocatable :: buffer(:,:) + integer :: n_doubles + integer, allocatable :: doubles(:) + integer, allocatable :: singles_a(:) + integer, allocatable :: singles_b(:) + integer, allocatable :: idx(:), idx0(:) + integer :: maxab, n_singles_a, n_singles_b, kcol_prev + integer*8 :: k8 + logical :: compute_singles + integer*8 :: last_found, left, right, right_max + double precision :: rss, mem, ratio + double precision, allocatable :: utl(:,:) + integer, parameter :: block_size=128 + logical :: u_is_sparse + +! call resident_memory(rss) +! mem = dble(singles_beta_csc_size) / 1024.d0**3 +! +! compute_singles = (mem+rss > qp_max_mem) +! +! if (.not.compute_singles) then +! provide singles_beta_csc +! endif +compute_singles=.True. + + + maxab = max(N_det_alpha_unique, N_det_beta_unique)+1 + allocate(idx0(maxab)) + + do i=1,maxab + idx0(i) = i + enddo + + ! Prepare the array of all alpha single excitations + ! ------------------------------------------------- + + PROVIDE N_int nthreads_davidson + !$OMP PARALLEL DEFAULT(SHARED) NUM_THREADS(nthreads_davidson) & + !$OMP SHARED(psi_bilinear_matrix_rows, N_det, & + !$OMP psi_bilinear_matrix_columns, & + !$OMP psi_det_alpha_unique, psi_det_beta_unique, & + !$OMP n_det_alpha_unique, n_det_beta_unique, N_int, & + !$OMP psi_bilinear_matrix_transp_rows, & + !$OMP psi_bilinear_matrix_transp_columns, & + !$OMP psi_bilinear_matrix_transp_order, N_st, & + !$OMP psi_bilinear_matrix_order_transp_reverse, & + !$OMP psi_bilinear_matrix_columns_loc, & + !$OMP psi_bilinear_matrix_transp_rows_loc, & + !$OMP istart, iend, istep, irp_here, v_t, s_t, & + !$OMP ishift, idx0, u_t, maxab, compute_singles, & + !$OMP singles_alpha_csc,singles_alpha_csc_idx, & + !$OMP singles_beta_csc,singles_beta_csc_idx) & + !$OMP PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i, & + !$OMP lcol, lrow, l_a, l_b, utl, kk, u_is_sparse, & + !$OMP buffer, doubles, n_doubles, umax, & + !$OMP tmp_det2, hij, sij, idx, l, kcol_prev,hmono, htwoe, hthree, & + !$OMP singles_a, n_singles_a, singles_b, ratio, & + !$OMP n_singles_b, k8, last_found,left,right,right_max) + + ! Alpha/Beta double excitations + ! ============================= + + allocate( buffer($N_int,maxab), & + singles_a(maxab), & + singles_b(maxab), & + doubles(maxab), & + idx(maxab), utl(N_st,block_size)) + + kcol_prev=-1 + + ! Check if u has multiple zeros + kk=1 ! Avoid division by zero + !$OMP DO + do k=1,N_det + umax = 0.d0 + do l=1,N_st + umax = max(umax, dabs(u_t(l,k))) + enddo + if (umax < 1.d-20) then + !$OMP ATOMIC + kk = kk+1 + endif + enddo + !$OMP END DO + u_is_sparse = N_det / kk < 20 ! 5% + + ASSERT (iend <= N_det) + ASSERT (istart > 0) + ASSERT (istep > 0) + + !$OMP DO SCHEDULE(guided,64) + do k_a=istart+ishift,iend,istep ! Loop over all determinants (/!\ not in psidet order) + + krow = psi_bilinear_matrix_rows(k_a) ! Index of alpha part of determinant k_a + ASSERT (krow <= N_det_alpha_unique) + + kcol = psi_bilinear_matrix_columns(k_a) ! Index of beta part of determinant k_a + ASSERT (kcol <= N_det_beta_unique) + + tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + + if (kcol /= kcol_prev) then + tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + if (compute_singles) then + call get_all_spin_singles_$N_int( & + psi_det_beta_unique, idx0, & + tmp_det(1,2), N_det_beta_unique, & + singles_b, n_singles_b) + else + n_singles_b = 0 + !DIR$ LOOP COUNT avg(1000) + do k8=singles_beta_csc_idx(kcol),singles_beta_csc_idx(kcol+1)-1 + n_singles_b = n_singles_b+1 + singles_b(n_singles_b) = singles_beta_csc(k8) + enddo + endif + endif + kcol_prev = kcol + + ! -> Here, tmp_det is determinant k_a + + ! Loop over singly excited beta columns + ! ------------------------------------- + + !DIR$ LOOP COUNT avg(1000) + do i=1,n_singles_b + lcol = singles_b(i) + + tmp_det2(1:$N_int,2) = psi_det_beta_unique(1:$N_int, lcol) + + ! tmp_det2 is a single excitation of tmp_det in the beta spin + ! the alpha part is not defined yet + +!--- +! if (compute_singles) then + + l_a = psi_bilinear_matrix_columns_loc(lcol) + ASSERT (l_a <= N_det) + ! rows : | 1 2 3 4 | 1 3 4 6 | .... | 1 2 4 5 | + ! cols : | 1 1 1 1 | 2 2 2 2 | .... | 8 8 8 8 | + ! index : | 1 2 3 4 | 5 6 7 8 | .... | 58 59 60 61 | + ! ^ ^ + ! | | + ! l_a N_det + ! l_a is the index in the big vector os size Ndet of the position of the first element of column lcol + + ! Below we identify all the determinants with the same beta part + + !DIR$ UNROLL(8) + !DIR$ LOOP COUNT avg(50000) + do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + buffer(1:$N_int,j) = psi_det_alpha_unique(1:$N_int, lrow) ! hot spot + + ASSERT (l_a <= N_det) + idx(j) = l_a + l_a = l_a+1 + enddo + j = j-1 + + ! Get all single excitations from tmp_det(1,1) to buffer(1,?) + + call get_all_spin_singles_$N_int( & + buffer, idx, tmp_det(1,1), j, & + singles_a, n_singles_a ) + + ! Loop over alpha singles + ! ----------------------- + + double precision :: umax + + !DIR$ LOOP COUNT avg(1000) + do k = 1,n_singles_a,block_size + umax = 0.d0 + ! Prefetch u_t(:,l_a) + if (u_is_sparse) then + do kk=0,block_size-1 + if (k+kk > n_singles_a) exit + l_a = singles_a(k+kk) + ASSERT (l_a <= N_det) + + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (k+kk > n_singles_a) exit + l_a = singles_a(k+kk) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (k+kk > n_singles_a) exit + l_a = singles_a(k+kk) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow) +! call i_H_j( tmp_det, tmp_det2, $N_int, hij) ! double alpha-beta + if(do_right)then + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij) + else + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij) + endif + call get_s2(tmp_det,tmp_det2,$N_int,sij) + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + s_t(l,k_a) = s_t(l,k_a) + sij * utl(l,kk+1) + enddo + enddo + enddo + + enddo + + enddo + !$OMP END DO + + !$OMP DO SCHEDULE(guided,64) + do k_a=istart+ishift,iend,istep + + + ! Single and double alpha excitations + ! =================================== + + + ! Initial determinant is at k_a in alpha-major representation + ! ----------------------------------------------------------------------- + + krow = psi_bilinear_matrix_rows(k_a) + ASSERT (krow <= N_det_alpha_unique) + + kcol = psi_bilinear_matrix_columns(k_a) + ASSERT (kcol <= N_det_beta_unique) + + tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + + ! Initial determinant is at k_b in beta-major representation + ! ---------------------------------------------------------------------- + + k_b = psi_bilinear_matrix_order_transp_reverse(k_a) + ASSERT (k_b <= N_det) + + spindet(1:$N_int) = tmp_det(1:$N_int,1) + + ! Loop inside the beta column to gather all the connected alphas + lcol = psi_bilinear_matrix_columns(k_a) + l_a = psi_bilinear_matrix_columns_loc(lcol) + + !DIR$ LOOP COUNT avg(200000) + do i=1,N_det_alpha_unique + if (l_a > N_det) exit + lcol = psi_bilinear_matrix_columns(l_a) + if (lcol /= kcol) exit + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + buffer(1:$N_int,i) = psi_det_alpha_unique(1:$N_int, lrow) ! Hot spot + idx(i) = l_a + l_a = l_a+1 + enddo + i = i-1 + + call get_all_spin_singles_and_doubles_$N_int( & + buffer, idx, spindet, i, & + singles_a, doubles, n_singles_a, n_doubles ) + + ! Compute Hij for all alpha singles + ! ---------------------------------- + + tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + !DIR$ LOOP COUNT avg(1000) + do i=1,n_singles_a,block_size + umax = 0.d0 + ! Prefetch u_t(:,l_a) + if (u_is_sparse) then + do kk=0,block_size-1 + if (i+kk > n_singles_a) exit + l_a = singles_a(i+kk) + ASSERT (l_a <= N_det) + + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (i+kk > n_singles_a) exit + l_a = singles_a(i+kk) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (i+kk > n_singles_a) exit + l_a = singles_a(i+kk) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow) +! call i_h_j_single_spin( tmp_det, tmp_det2, $N_int, 1, hij) + if(do_right)then + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij) + else + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij) + endif + + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + enddo + enddo + enddo + + + ! Compute Hij for all alpha doubles + ! ---------------------------------- + + !DIR$ LOOP COUNT avg(50000) + do i=1,n_doubles,block_size + umax = 0.d0 + ! Prefetch u_t(:,l_a) + if (u_is_sparse) then + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_a = doubles(i+kk) + ASSERT (l_a <= N_det) + + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_a = doubles(i+kk) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_a = doubles(i+kk) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + tmp_det2(1:N_int,1) = psi_det_alpha_unique(1:N_int, lrow) +! call i_H_j( tmp_det, tmp_det2, $N_int, hij) +! call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, lrow), $N_int, hij) + if(do_right)then + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij) + else + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij) + endif + + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + enddo + enddo + enddo + + + ! Single and double beta excitations + ! ================================== + + + ! Initial determinant is at k_a in alpha-major representation + ! ----------------------------------------------------------------------- + + krow = psi_bilinear_matrix_rows(k_a) + kcol = psi_bilinear_matrix_columns(k_a) + + tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + + spindet(1:$N_int) = tmp_det(1:$N_int,2) + + ! Initial determinant is at k_b in beta-major representation + ! ----------------------------------------------------------------------- + + k_b = psi_bilinear_matrix_order_transp_reverse(k_a) + ASSERT (k_b <= N_det) + + ! Loop inside the alpha row to gather all the connected betas + lrow = psi_bilinear_matrix_transp_rows(k_b) + l_b = psi_bilinear_matrix_transp_rows_loc(lrow) + !DIR$ LOOP COUNT avg(200000) + do i=1,N_det_beta_unique + if (l_b > N_det) exit + lrow = psi_bilinear_matrix_transp_rows(l_b) + if (lrow /= krow) exit + lcol = psi_bilinear_matrix_transp_columns(l_b) + ASSERT (lcol <= N_det_beta_unique) + + buffer(1:$N_int,i) = psi_det_beta_unique(1:$N_int, lcol) + idx(i) = l_b + l_b = l_b+1 + enddo + i = i-1 + + call get_all_spin_singles_and_doubles_$N_int( & + buffer, idx, spindet, i, & + singles_b, doubles, n_singles_b, n_doubles ) + + ! Compute Hij for all beta singles + ! ---------------------------------- + + tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + !DIR$ LOOP COUNT avg(1000) + do i=1,n_singles_b,block_size + umax = 0.d0 + if (u_is_sparse) then + do kk=0,block_size-1 + if (i+kk > n_singles_b) exit + l_b = singles_b(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + ASSERT (l_b <= N_det) + ASSERT (l_a <= N_det) + + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (i+kk > n_singles_b) exit + l_b = singles_b(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + ASSERT (l_b <= N_det) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (i+kk > n_singles_b) exit + l_b = singles_b(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + lcol = psi_bilinear_matrix_transp_columns(l_b) + ASSERT (lcol <= N_det_beta_unique) + + tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, lcol) +! call i_H_j_single_spin( tmp_det, tmp_det2, $N_int, 2, hij) + if(do_right)then + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij) + else + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij) + endif + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + enddo + enddo + enddo + + ! Compute Hij for all beta doubles + ! ---------------------------------- + + !DIR$ LOOP COUNT avg(50000) + do i=1,n_doubles,block_size + umax = 0.d0 + if (u_is_sparse) then + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_b = doubles(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + ASSERT (l_b <= N_det) + ASSERT (l_a <= N_det) + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_b = doubles(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + ASSERT (l_b <= N_det) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_b = doubles(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + lcol = psi_bilinear_matrix_transp_columns(l_b) + ASSERT (lcol <= N_det_beta_unique) + + tmp_det2(1:N_int,2) = psi_det_beta_unique(1:N_int, lcol) +! call i_H_j( tmp_det, tmp_det2, $N_int, hij) +! call i_H_j_double_spin( tmp_det(1,2), psi_det_beta_unique(1, lcol), $N_int, hij) + if(do_right)then + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij) + else + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij) + endif + + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + enddo + enddo + enddo + + + ! Diagonal contribution + ! ===================== + + + ! Initial determinant is at k_a in alpha-major representation + ! ----------------------------------------------------------------------- + + if (u_is_sparse) then + umax = 0.d0 + do l=1,N_st + umax = max(umax, dabs(u_t(l,k_a))) + enddo + else + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + krow = psi_bilinear_matrix_rows(k_a) + ASSERT (krow <= N_det_alpha_unique) + + kcol = psi_bilinear_matrix_columns(k_a) + ASSERT (kcol <= N_det_beta_unique) + + tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + + double precision, external :: diag_H_mat_elem + double precision :: hmono, htwoe, hthree + +! hij = diag_H_mat_elem(tmp_det,$N_int) + call diag_htilde_mu_mat_fock_bi_ortho ($N_int, tmp_det, hmono, htwoe, hthree, hij) + call get_s2(tmp_det,tmp_det,$N_int,sij) + + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,k_a) + s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,k_a) + enddo + + end do + !$OMP END DO + deallocate(buffer, singles_a, singles_b, doubles, idx, utl) + !$OMP END PARALLEL + +end + +SUBST [ N_int ] + +1;; +2;; +3;; +4;; +N_int;; + +END_TEMPLATE + + diff --git a/src/tc_bi_ortho/u0_h_u0.irp.f b/src/tc_bi_ortho/h_tc_u0.irp.f similarity index 99% rename from src/tc_bi_ortho/u0_h_u0.irp.f rename to src/tc_bi_ortho/h_tc_u0.irp.f index e107ad88..5e6150ea 100644 --- a/src/tc_bi_ortho/u0_h_u0.irp.f +++ b/src/tc_bi_ortho/h_tc_u0.irp.f @@ -93,9 +93,6 @@ subroutine H_tc_u_0_nstates_openmp(v_0,u_0,N_st,sze, do_right) double precision, allocatable :: u_t(:,:), v_t(:,:) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t allocate(u_t(N_st,N_det),v_t(N_st,N_det)) -! provide mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e -! provide ref_tc_energy_tot fock_op_2_e_tc_closed_shell -! provide eff_2_e_from_3_e_ab eff_2_e_from_3_e_aa eff_2_e_from_3_e_bb do k=1,N_st call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det) enddo diff --git a/src/tc_bi_ortho/tc_bi_ortho.irp.f b/src/tc_bi_ortho/tc_bi_ortho.irp.f index 98b83329..9109edc4 100644 --- a/src/tc_bi_ortho/tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/tc_bi_ortho.irp.f @@ -7,9 +7,6 @@ program tc_bi_ortho ! END_DOC - implicit none - - print *, 'Hello world' my_grid_becke = .True. my_n_pt_r_grid = 30 my_n_pt_a_grid = 50 diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index fa9d9f5c..07b5a6e2 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -25,8 +25,6 @@ subroutine diagonalize_CI_tc psi_r_coef_bi_ortho(i,j) = reigvec_tc_bi_orth(i,j) enddo enddo -! psi_energy(1:N_states) = CI_electronic_energy(1:N_states) -! psi_s2(1:N_states) = CI_s2(1:N_states) SOFT_TOUCH psi_l_coef_bi_ortho psi_r_coef_bi_ortho end @@ -37,6 +35,7 @@ end &BEGIN_PROVIDER [double precision, eigval_left_tc_bi_orth, (N_states)] &BEGIN_PROVIDER [double precision, reigvec_tc_bi_orth, (N_det,N_states)] &BEGIN_PROVIDER [double precision, leigvec_tc_bi_orth, (N_det,N_states)] +&BEGIN_PROVIDER [double precision, s2_eigvec_tc_bi_orth, (N_states)] &BEGIN_PROVIDER [double precision, norm_ground_left_right_bi_orth ] BEGIN_DOC @@ -47,76 +46,163 @@ end integer :: i, idx_dress, j, istate, k logical :: converged, dagger integer :: n_real_tc_bi_orth_eigval_right,igood_r,igood_l - integer, allocatable :: iorder(:) - double precision, allocatable :: reigvec_tc_bi_orth_tmp(:,:), leigvec_tc_bi_orth_tmp(:,:), eigval_right_tmp(:) - double precision, allocatable :: coef_hf_r(:),coef_hf_l(:), Stmp(:,:) + double precision, allocatable :: reigvec_tc_bi_orth_tmp(:,:),leigvec_tc_bi_orth_tmp(:,:),eigval_right_tmp(:) + double precision, allocatable :: s2_values_tmp(:), H_prime(:,:), expect_e(:) + double precision, parameter :: alpha = 0.1d0 + integer :: i_good_state,i_other_state, i_state + integer, allocatable :: index_good_state_array(:) + logical, allocatable :: good_state_array(:) + double precision, allocatable :: coef_hf_r(:),coef_hf_l(:) + double precision, allocatable :: Stmp(:,:) + integer, allocatable :: iorder(:) PROVIDE N_det N_int - if(n_det .le. N_det_max_full) then - - allocate(reigvec_tc_bi_orth_tmp(N_det,N_det), leigvec_tc_bi_orth_tmp(N_det,N_det), eigval_right_tmp(N_det)) - - call non_hrmt_real_diag( N_det, htilde_matrix_elmt_bi_ortho & - , leigvec_tc_bi_orth_tmp, reigvec_tc_bi_orth_tmp, n_real_tc_bi_orth_eigval_right, eigval_right_tmp) - - allocate(coef_hf_r(N_det), coef_hf_l(N_det), iorder(N_det)) - do i = 1, N_det - iorder(i) = i - coef_hf_r(i) = -dabs(reigvec_tc_bi_orth_tmp(index_HF_psi_det,i)) - enddo - call dsort(coef_hf_r, iorder, N_det) - igood_r = iorder(1) - print*, 'igood_r, coef_hf_r = ', igood_r, coef_hf_r(1) - do i = 1, N_det - iorder(i) = i - coef_hf_l(i) = -dabs(leigvec_tc_bi_orth_tmp(index_HF_psi_det,i)) - enddo - call dsort(coef_hf_l, iorder, N_det) - igood_l = iorder(1) - print*, 'igood_l, coef_hf_l = ', igood_l, coef_hf_l(1) - - if(igood_r .ne. igood_l .and. igood_r .ne. 1)then - print *,'' - print *,'Warning, the left and right eigenvectors are "not the same" ' - print *,'Warning, the ground state is not dominated by HF...' - print *,'State with largest RIGHT coefficient of HF ',igood_r - print *,'coef of HF in RIGHT eigenvector = ',reigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_r) - print *,'State with largest LEFT coefficient of HF ',igood_l - print *,'coef of HF in LEFT eigenvector = ',leigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_l) + if(n_det.le.N_det_max_full)then + allocate(reigvec_tc_bi_orth_tmp(N_det,N_det),leigvec_tc_bi_orth_tmp(N_det,N_det),eigval_right_tmp(N_det),expect_e(N_det)) + allocate (H_prime(N_det,N_det),s2_values_tmp(N_det)) + H_prime(1:N_det,1:N_det) = htilde_matrix_elmt_bi_ortho(1:N_det,1:N_det) + if(s2_eig)then + H_prime(1:N_det,1:N_det) += alpha * S2_matrix_all_dets(1:N_det,1:N_det) + do j=1,N_det + H_prime(j,j) = H_prime(j,j) - alpha*expected_s2 + enddo endif + call non_hrmt_real_diag(N_det,H_prime,& + leigvec_tc_bi_orth_tmp,reigvec_tc_bi_orth_tmp,& + n_real_tc_bi_orth_eigval_right,eigval_right_tmp) +! do i = 1, N_det +! call get_H_tc_s2_l0_r0(leigvec_tc_bi_orth_tmp(1,i),reigvec_tc_bi_orth_tmp(1,i),1,N_det,expect_e(i), s2_values_tmp(i)) +! enddo + call get_H_tc_s2_l0_r0(leigvec_tc_bi_orth_tmp,reigvec_tc_bi_orth_tmp,N_det,N_det,expect_e, s2_values_tmp) + allocate(index_good_state_array(N_det),good_state_array(N_det)) + i_state = 0 + good_state_array = .False. + if(s2_eig)then + if (only_expected_s2) then + do j=1,N_det + ! Select at least n_states states with S^2 values closed to "expected_s2" +! print*,'s2_values_tmp(j) = ',s2_values_tmp(j),eigval_right_tmp(j),expect_e(j) + if(dabs(s2_values_tmp(j)-expected_s2).le.0.5d0)then + i_state +=1 + index_good_state_array(i_state) = j + good_state_array(j) = .True. + endif + if(i_state.eq.N_states) then + exit + endif + enddo + else + do j=1,N_det + index_good_state_array(j) = j + good_state_array(j) = .True. + enddo + endif + if(i_state .ne.0)then + ! Fill the first "i_state" states that have a correct S^2 value + do j = 1, i_state + do i=1,N_det + reigvec_tc_bi_orth(i,j) = reigvec_tc_bi_orth_tmp(i,index_good_state_array(j)) + leigvec_tc_bi_orth(i,j) = leigvec_tc_bi_orth_tmp(i,index_good_state_array(j)) + enddo + eigval_right_tc_bi_orth(j) = expect_e(index_good_state_array(j)) + eigval_left_tc_bi_orth(j) = expect_e(index_good_state_array(j)) + s2_eigvec_tc_bi_orth(j) = s2_values_tmp(index_good_state_array(j)) + enddo + i_other_state = 0 + do j = 1, N_det + if(good_state_array(j))cycle + i_other_state +=1 + if(i_state+i_other_state.gt.n_states)then + exit + endif + do i=1,N_det + reigvec_tc_bi_orth(i,i_state+i_other_state) = reigvec_tc_bi_orth_tmp(i,j) + leigvec_tc_bi_orth(i,i_state+i_other_state) = leigvec_tc_bi_orth_tmp(i,j) + enddo + eigval_right_tc_bi_orth(i_state+i_other_state) = eigval_right_tmp(j) + eigval_left_tc_bi_orth (i_state+i_other_state) = eigval_right_tmp(j) + s2_eigvec_tc_bi_orth(i_state+i_other_state) = s2_values_tmp(i_state+i_other_state) + enddo + else ! istate == 0 + print*,'' + print*,'!!!!!!!! WARNING !!!!!!!!!' + print*,' Within the ',N_det,'determinants selected' + print*,' and the ',N_states_diag,'states requested' + print*,' We did not find only states with S^2 values close to ',expected_s2 + print*,' We will then set the first N_states eigenvectors of the H matrix' + print*,' as the CI_eigenvectors' + print*,' You should consider more states and maybe ask for s2_eig to be .True. or just enlarge the CI space' + print*,'' + do j=1,min(N_states_diag,N_det) + do i=1,N_det + leigvec_tc_bi_orth(i,j) = leigvec_tc_bi_orth_tmp(i,j) + reigvec_tc_bi_orth(i,j) = reigvec_tc_bi_orth_tmp(i,j) + enddo + eigval_right_tc_bi_orth(j) = eigval_right_tmp(j) + eigval_left_tc_bi_orth (j) = eigval_right_tmp(j) + s2_eigvec_tc_bi_orth(j) = s2_values_tmp(j) + enddo + endif ! istate .ne. 0 - if(state_following_tc) then - - print *,'Following the states with the largest coef on HF' - print *,'igood_r,igood_l',igood_r,igood_l - i= igood_r - eigval_right_tc_bi_orth(1) = eigval_right_tmp(i) - do j = 1, N_det - reigvec_tc_bi_orth(j,1) = reigvec_tc_bi_orth_tmp(j,i) - enddo - i= igood_l - eigval_left_tc_bi_orth(1) = eigval_right_tmp(i) - do j = 1, N_det - leigvec_tc_bi_orth(j,1) = leigvec_tc_bi_orth_tmp(j,i) - enddo - - else - - do i = 1, N_states - eigval_right_tc_bi_orth(i) = eigval_right_tmp(i) - eigval_left_tc_bi_orth(i) = eigval_right_tmp(i) + else ! s2_eig + allocate(coef_hf_r(N_det),coef_hf_l(N_det),iorder(N_det)) + do i = 1,N_det + iorder(i) = i + coef_hf_r(i) = -dabs(reigvec_tc_bi_orth_tmp(index_HF_psi_det,i)) + enddo + call dsort(coef_hf_r,iorder,N_det) + igood_r = iorder(1) + print*,'igood_r, coef_hf_r = ',igood_r,coef_hf_r(1) + do i = 1,N_det + iorder(i) = i + coef_hf_l(i) = -dabs(leigvec_tc_bi_orth_tmp(index_HF_psi_det,i)) + enddo + call dsort(coef_hf_l,iorder,N_det) + igood_l = iorder(1) + print*,'igood_l, coef_hf_l = ',igood_l,coef_hf_l(1) + + if(igood_r.ne.igood_l.and.igood_r.ne.1)then + print *,'' + print *,'Warning, the left and right eigenvectors are "not the same" ' + print *,'Warning, the ground state is not dominated by HF...' + print *,'State with largest RIGHT coefficient of HF ',igood_r + print *,'coef of HF in RIGHT eigenvector = ',reigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_r) + print *,'State with largest LEFT coefficient of HF ',igood_l + print *,'coef of HF in LEFT eigenvector = ',leigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_l) + endif + if(state_following_tc)then + print *,'Following the states with the largest coef on HF' + print *,'igood_r,igood_l',igood_r,igood_l + i= igood_r + eigval_right_tc_bi_orth(1) = eigval_right_tmp(i) do j = 1, N_det - reigvec_tc_bi_orth(j,i) = reigvec_tc_bi_orth_tmp(j,i) - leigvec_tc_bi_orth(j,i) = leigvec_tc_bi_orth_tmp(j,i) + reigvec_tc_bi_orth(j,1) = reigvec_tc_bi_orth_tmp(j,i) +! print*,reigvec_tc_bi_orth(j,1) enddo - enddo + i= igood_l + eigval_left_tc_bi_orth(1) = eigval_right_tmp(i) + do j = 1, N_det + leigvec_tc_bi_orth(j,1) = leigvec_tc_bi_orth_tmp(j,i) + enddo + + else + + do i = 1, N_states + eigval_right_tc_bi_orth(i) = eigval_right_tmp(i) + eigval_left_tc_bi_orth(i) = eigval_right_tmp(i) + do j = 1, N_det + reigvec_tc_bi_orth(j,i) = reigvec_tc_bi_orth_tmp(j,i) + leigvec_tc_bi_orth(j,i) = leigvec_tc_bi_orth_tmp(j,i) + enddo + enddo + endif ! check bi-orthogonality allocate(Stmp(N_states,N_states)) call dgemm( 'T', 'N', N_states, N_states, N_det, 1.d0 & , leigvec_tc_bi_orth(1,1), size(leigvec_tc_bi_orth, 1), reigvec_tc_bi_orth(1,1), size(reigvec_tc_bi_orth, 1) & - , 0.d0, Stmp, size(Stmp, 1) ) + , 0.d0, Stmp(1,1), size(Stmp, 1) ) print *, ' overlap matrix between states:' do i = 1, N_states write(*,'(1000(F16.10,X))') Stmp(i,:) @@ -132,6 +218,8 @@ end external htcdag_bi_ortho_calc_tdav external H_tc_u_0_opt external H_tc_dagger_u_0_opt + external H_tc_s2_dagger_u_0_opt + external H_tc_s2_u_0_opt allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag)) do i = 1, N_det call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) @@ -146,7 +234,8 @@ end vec_tmp(istate,istate) = 1.d0 enddo ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, htcdag_bi_ortho_calc_tdav) - call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_dagger_u_0_opt) +! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_dagger_u_0_opt) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo @@ -161,7 +250,8 @@ end vec_tmp(istate,istate) = 1.d0 enddo ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) - call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt) +! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) do istate = 1, N_states reigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo @@ -176,7 +266,9 @@ end do j = 1, N_det norm_ground_left_right_bi_orth += leigvec_tc_bi_orth(j,i) * reigvec_tc_bi_orth(j,i) enddo - print*,'norm l/r = ',norm_ground_left_right_bi_orth + print*,' state ', i + print*,' norm l/r = ', norm_ground_left_right_bi_orth + print*,' = ', s2_eigvec_tc_bi_orth(i) enddo ! --- @@ -200,8 +292,6 @@ end deallocate(buffer) - ! --- - END_PROVIDER diff --git a/src/tc_bi_ortho/test_s2_tc.irp.f b/src/tc_bi_ortho/test_s2_tc.irp.f new file mode 100644 index 00000000..4229fef1 --- /dev/null +++ b/src/tc_bi_ortho/test_s2_tc.irp.f @@ -0,0 +1,159 @@ +program test_tc + implicit none + read_wf = .True. + my_grid_becke = .True. + my_n_pt_r_grid = 30 + my_n_pt_a_grid = 50 + read_wf = .True. + touch read_wf + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + call routine_test_s2 + call routine_test_s2_davidson +end + +subroutine routine_test_s2 + implicit none + logical :: do_right + integer :: sze ,i, N_st, j + double precision :: sij, accu_e, accu_s, accu_e_0, accu_s_0 + double precision, allocatable :: v_0_ref(:,:),u_0(:,:),s_0_ref(:,:) + double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) + sze = N_det + N_st = 1 + allocate(v_0_ref(N_det,1),u_0(N_det,1),s_0_ref(N_det,1),s_0_new(N_det,1),v_0_new(N_det,1)) + print*,'Checking first the Left ' + do_right = .False. + do i = 1, sze + u_0(i,1) = psi_l_coef_bi_ortho(i,1) + enddo + call H_tc_u_0_nstates_openmp(v_0_ref,u_0,N_st,sze, do_right) + s_0_ref = 0.d0 + do i = 1, sze + do j = 1, sze + call get_s2(psi_det(1,1,i),psi_det(1,1,j),N_int,sij) + s_0_ref(i,1) += u_0(j,1) * sij + enddo + enddo + call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,u_0,N_st,sze, do_right) + accu_e = 0.d0 + accu_s = 0.d0 + accu_e_0 = 0.d0 + accu_s_0 = 0.d0 + do i = 1, sze + accu_e_0 += v_0_ref(i,1) * psi_r_coef_bi_ortho(i,1) + accu_s_0 += s_0_ref(i,1) * psi_r_coef_bi_ortho(i,1) + accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) + accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) + enddo + print*,'accu_e = ',accu_e + print*,'accu_s = ',accu_s + print*,'accu_e_0 = ',accu_e_0 + print*,'accu_s_0 = ',accu_s_0 + + print*,'Checking then the right ' + do_right = .True. + do i = 1, sze + u_0(i,1) = psi_r_coef_bi_ortho(i,1) + enddo + call H_tc_u_0_nstates_openmp(v_0_ref,u_0,N_st,sze, do_right) + s_0_ref = 0.d0 + do i = 1, sze + do j = 1, sze + call get_s2(psi_det(1,1,i),psi_det(1,1,j),N_int,sij) + s_0_ref(i,1) += u_0(j,1) * sij + enddo + enddo + call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,u_0,N_st,sze, do_right) + accu_e = 0.d0 + accu_s = 0.d0 + accu_e_0 = 0.d0 + accu_s_0 = 0.d0 + do i = 1, sze + accu_e_0 += v_0_ref(i,1) * psi_l_coef_bi_ortho(i,1) + accu_s_0 += s_0_ref(i,1) * psi_l_coef_bi_ortho(i,1) + accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) + accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) + enddo + print*,'accu_e = ',accu_e + print*,'accu_s = ',accu_s + print*,'accu_e_0 = ',accu_e_0 + print*,'accu_s_0 = ',accu_s_0 + + +end + +subroutine routine_test_s2_davidson + implicit none + double precision, allocatable :: H_jj(:),vec_tmp(:,:), energies(:) , s2(:) + integer :: i,istate + logical :: converged + external H_tc_s2_dagger_u_0_opt + external H_tc_s2_u_0_opt + allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),energies(n_states_diag), s2(n_states_diag)) + do i = 1, N_det + call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) + enddo + ! Preparing the left-eigenvector + print*,'Computing the left-eigenvector ' + vec_tmp = 0.d0 + do istate = 1, N_states + vec_tmp(1:N_det,istate) = psi_l_coef_bi_ortho(1:N_det,istate) + enddo + do istate = N_states+1, n_states_diag + vec_tmp(istate,istate) = 1.d0 + enddo + do istate = 1, N_states + leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) + enddo + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) + double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) + integer :: sze,N_st + logical :: do_right + sze = N_det + N_st = 1 + do_right = .False. + allocate(s_0_new(N_det,1),v_0_new(N_det,1)) + call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,vec_tmp,N_st,sze, do_right) + double precision :: accu_e_0, accu_s_0 + accu_e_0 = 0.d0 + accu_s_0 = 0.d0 + do i = 1, sze + accu_e_0 += v_0_new(i,1) * vec_tmp(i,1) + accu_s_0 += s_0_new(i,1) * vec_tmp(i,1) + enddo + print*,'energies = ',energies + print*,'s2 = ',s2 + print*,'accu_e_0',accu_e_0 + print*,'accu_s_0',accu_s_0 + + ! Preparing the right-eigenvector + print*,'Computing the right-eigenvector ' + vec_tmp = 0.d0 + do istate = 1, N_states + vec_tmp(1:N_det,istate) = psi_r_coef_bi_ortho(1:N_det,istate) + enddo + do istate = N_states+1, n_states_diag + vec_tmp(istate,istate) = 1.d0 + enddo + do istate = 1, N_states + leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) + enddo + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_u_0_opt) + sze = N_det + N_st = 1 + do_right = .True. + v_0_new = 0.d0 + s_0_new = 0.d0 + call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,vec_tmp,N_st,sze, do_right) + accu_e_0 = 0.d0 + accu_s_0 = 0.d0 + do i = 1, sze + accu_e_0 += v_0_new(i,1) * vec_tmp(i,1) + accu_s_0 += s_0_new(i,1) * vec_tmp(i,1) + enddo + print*,'energies = ',energies + print*,'s2 = ',s2 + print*,'accu_e_0',accu_e_0 + print*,'accu_s_0',accu_s_0 + +end diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f index ae3b609b..85389f30 100644 --- a/src/tc_scf/tc_scf.irp.f +++ b/src/tc_scf/tc_scf.irp.f @@ -11,8 +11,8 @@ program tc_scf print *, ' starting ...' my_grid_becke = .True. - my_n_pt_r_grid = 60 - my_n_pt_a_grid = 110 + my_n_pt_r_grid = 30 + my_n_pt_a_grid = 50 ! my_n_pt_r_grid = 10 ! small grid for quick debug ! my_n_pt_a_grid = 26 ! small grid for quick debug touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid From 6be57e3c01f1ba71d22495fcfaed52448792163e Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 12 Apr 2023 17:10:06 +0200 Subject: [PATCH 027/337] fixed bug in S2 for TC davidson --- .../diagonalization_hs2_dressed.irp.f | 4 ++-- src/tc_bi_ortho/dav_h_tc_s2.irp.f | 16 +++++++++++---- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 20 +++++++++++++++++-- src/tc_bi_ortho/test_s2_tc.irp.f | 7 +++++-- 4 files changed, 37 insertions(+), 10 deletions(-) diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index 8117f320..ac71d1d4 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -465,8 +465,8 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ integer :: lwork, info double precision, allocatable :: work(:) -! y = h - y = h_p + y = h +! y = h_p lwork = -1 allocate(work(1)) call dsygv(1,'V','U',shift2,y,size(y,1), & diff --git a/src/tc_bi_ortho/dav_h_tc_s2.irp.f b/src/tc_bi_ortho/dav_h_tc_s2.irp.f index c0ea054a..ea9cacff 100644 --- a/src/tc_bi_ortho/dav_h_tc_s2.irp.f +++ b/src/tc_bi_ortho/dav_h_tc_s2.irp.f @@ -1,7 +1,7 @@ ! --- -subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N_st_diag_in, converged, hcalc) +subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N_st_diag_in, n_it_max_dav, converged, hcalc) use mmap_module @@ -21,11 +21,17 @@ subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N ! Initial guess vectors are not necessarily orthonormal ! ! hcalc subroutine to compute W = H U (see routine hcalc_template for template of input/output) + ! + ! !!! WARNING !!! IT SEEMS THAT IF THE NUMBER OF MACRO ITERATIONS EXCEEDS n_it_max_dav, + ! + ! THE RECONTRACTION IS WRONG. YOU SHOULD CONSIDER CALLING MULTIPLE TIME THE ROUTINE + ! + ! SEE FOR INSTANCE IN tc_bi_ortho/tc_h_eigvectors.irp.f END_DOC implicit none - integer, intent(in) :: sze, N_st, N_st_diag_in + integer, intent(in) :: sze, N_st, N_st_diag_in, n_it_max_dav double precision, intent(in) :: H_jj(sze) logical, intent(inout) :: converged double precision, intent(inout) :: u_in(sze,N_st_diag_in) @@ -246,7 +252,9 @@ subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N itertot = 0 - do while (.not.converged) +! do while (.not.converged.or.itertot.le.n_it_max_dav) + integer :: iiii + do iiii = 1, n_it_max_dav itertot = itertot + 1 if(itertot == 8) then @@ -522,7 +530,7 @@ subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N enddo endif enddo - + if(converged)exit enddo ! loop over while ! --- diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index 71dad8d6..91775cf1 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -208,7 +208,11 @@ end call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) enddo !!!! Preparing the left-eigenvector + print*,'---------------------------------' + print*,'---------------------------------' print*,'Computing the left-eigenvector ' + print*,'---------------------------------' + print*,'---------------------------------' vec_tmp = 0.d0 do istate = 1, N_states vec_tmp(1:N_det,istate) = psi_l_coef_bi_ortho(1:N_det,istate) @@ -218,12 +222,21 @@ end enddo ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, htcdag_bi_ortho_calc_tdav) ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_dagger_u_0_opt) - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) + integer :: n_it_max + n_it_max = 1 + converged = .False. + do while (.not.converged) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + enddo do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo + print*,'---------------------------------' + print*,'---------------------------------' print*,'Computing the right-eigenvector ' + print*,'---------------------------------' + print*,'---------------------------------' !!!! Preparing the right-eigenvector vec_tmp = 0.d0 do istate = 1, N_states @@ -234,7 +247,10 @@ end enddo ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt) - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) + converged = .False. + do while (.not.converged) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + enddo do istate = 1, N_states reigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo diff --git a/src/tc_bi_ortho/test_s2_tc.irp.f b/src/tc_bi_ortho/test_s2_tc.irp.f index 4229fef1..4debe2e2 100644 --- a/src/tc_bi_ortho/test_s2_tc.irp.f +++ b/src/tc_bi_ortho/test_s2_tc.irp.f @@ -105,7 +105,9 @@ subroutine routine_test_s2_davidson do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) + integer :: n_it_max + n_it_max = 1 + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) integer :: sze,N_st logical :: do_right @@ -138,7 +140,8 @@ subroutine routine_test_s2_davidson do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_u_0_opt) + n_it_max = 1 + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_opt) sze = N_det N_st = 1 do_right = .True. From 5cfff229a17859c97aeb15363bad58946852cdf7 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 12 Apr 2023 17:10:06 +0200 Subject: [PATCH 028/337] fixed bug in S2 for TC davidson --- .../diagonalization_hs2_dressed.irp.f | 4 ++-- src/tc_bi_ortho/dav_h_tc_s2.irp.f | 16 +++++++++++---- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 20 +++++++++++++++++-- src/tc_bi_ortho/test_s2_tc.irp.f | 7 +++++-- 4 files changed, 37 insertions(+), 10 deletions(-) diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index 8117f320..ac71d1d4 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -465,8 +465,8 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ integer :: lwork, info double precision, allocatable :: work(:) -! y = h - y = h_p + y = h +! y = h_p lwork = -1 allocate(work(1)) call dsygv(1,'V','U',shift2,y,size(y,1), & diff --git a/src/tc_bi_ortho/dav_h_tc_s2.irp.f b/src/tc_bi_ortho/dav_h_tc_s2.irp.f index c0ea054a..ea9cacff 100644 --- a/src/tc_bi_ortho/dav_h_tc_s2.irp.f +++ b/src/tc_bi_ortho/dav_h_tc_s2.irp.f @@ -1,7 +1,7 @@ ! --- -subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N_st_diag_in, converged, hcalc) +subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N_st_diag_in, n_it_max_dav, converged, hcalc) use mmap_module @@ -21,11 +21,17 @@ subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N ! Initial guess vectors are not necessarily orthonormal ! ! hcalc subroutine to compute W = H U (see routine hcalc_template for template of input/output) + ! + ! !!! WARNING !!! IT SEEMS THAT IF THE NUMBER OF MACRO ITERATIONS EXCEEDS n_it_max_dav, + ! + ! THE RECONTRACTION IS WRONG. YOU SHOULD CONSIDER CALLING MULTIPLE TIME THE ROUTINE + ! + ! SEE FOR INSTANCE IN tc_bi_ortho/tc_h_eigvectors.irp.f END_DOC implicit none - integer, intent(in) :: sze, N_st, N_st_diag_in + integer, intent(in) :: sze, N_st, N_st_diag_in, n_it_max_dav double precision, intent(in) :: H_jj(sze) logical, intent(inout) :: converged double precision, intent(inout) :: u_in(sze,N_st_diag_in) @@ -246,7 +252,9 @@ subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N itertot = 0 - do while (.not.converged) +! do while (.not.converged.or.itertot.le.n_it_max_dav) + integer :: iiii + do iiii = 1, n_it_max_dav itertot = itertot + 1 if(itertot == 8) then @@ -522,7 +530,7 @@ subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N enddo endif enddo - + if(converged)exit enddo ! loop over while ! --- diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index 71dad8d6..91775cf1 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -208,7 +208,11 @@ end call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) enddo !!!! Preparing the left-eigenvector + print*,'---------------------------------' + print*,'---------------------------------' print*,'Computing the left-eigenvector ' + print*,'---------------------------------' + print*,'---------------------------------' vec_tmp = 0.d0 do istate = 1, N_states vec_tmp(1:N_det,istate) = psi_l_coef_bi_ortho(1:N_det,istate) @@ -218,12 +222,21 @@ end enddo ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, htcdag_bi_ortho_calc_tdav) ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_dagger_u_0_opt) - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) + integer :: n_it_max + n_it_max = 1 + converged = .False. + do while (.not.converged) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + enddo do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo + print*,'---------------------------------' + print*,'---------------------------------' print*,'Computing the right-eigenvector ' + print*,'---------------------------------' + print*,'---------------------------------' !!!! Preparing the right-eigenvector vec_tmp = 0.d0 do istate = 1, N_states @@ -234,7 +247,10 @@ end enddo ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt) - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) + converged = .False. + do while (.not.converged) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + enddo do istate = 1, N_states reigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo diff --git a/src/tc_bi_ortho/test_s2_tc.irp.f b/src/tc_bi_ortho/test_s2_tc.irp.f index 4229fef1..4debe2e2 100644 --- a/src/tc_bi_ortho/test_s2_tc.irp.f +++ b/src/tc_bi_ortho/test_s2_tc.irp.f @@ -105,7 +105,9 @@ subroutine routine_test_s2_davidson do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) + integer :: n_it_max + n_it_max = 1 + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) integer :: sze,N_st logical :: do_right @@ -138,7 +140,8 @@ subroutine routine_test_s2_davidson do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_u_0_opt) + n_it_max = 1 + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_opt) sze = N_det N_st = 1 do_right = .True. From e3aadcf06cfe92744d15688ee9ba5aaa0a6d2e53 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 12 Apr 2023 22:44:14 +0200 Subject: [PATCH 029/337] last update on davidson S2 --- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index 91775cf1..69302da2 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -222,11 +222,14 @@ end enddo ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, htcdag_bi_ortho_calc_tdav) ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_dagger_u_0_opt) - integer :: n_it_max + integer :: n_it_max,i_it n_it_max = 1 converged = .False. + i_it = 0 do while (.not.converged) call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + i_it += 1 + if(i_it .gt. 5)exit enddo do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) @@ -248,8 +251,11 @@ end ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt) converged = .False. + i_it = 0 do while (.not.converged) call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + i_it += 1 + if(i_it .gt. 5)exit enddo do istate = 1, N_states reigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) From b1df3d6d037efe59a3e9272cbf5b7795941e9969 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Thu, 13 Apr 2023 13:03:10 +0200 Subject: [PATCH 030/337] save l/r coef after diag --- src/tc_bi_ortho/psi_r_l_prov.irp.f | 61 +++--- src/tc_bi_ortho/tc_bi_ortho.irp.f | 22 +- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 282 ++++++++++++-------------- src/tc_bi_ortho/tc_utils.irp.f | 2 + 4 files changed, 186 insertions(+), 181 deletions(-) diff --git a/src/tc_bi_ortho/psi_r_l_prov.irp.f b/src/tc_bi_ortho/psi_r_l_prov.irp.f index 05d132d5..ee8abcec 100644 --- a/src/tc_bi_ortho/psi_r_l_prov.irp.f +++ b/src/tc_bi_ortho/psi_r_l_prov.irp.f @@ -192,44 +192,39 @@ subroutine save_tc_wavefunction_general(ndet, nstates, psidet, sze, dim_psicoef, endif end -subroutine save_tc_bi_ortho_wavefunction - implicit none - if(save_sorted_tc_wf)then - call save_tc_wavefunction_general( N_det, N_states, psi_det_sorted_tc, N_det & - , size(psi_l_coef_sorted_bi_ortho, 1), psi_l_coef_sorted_bi_ortho & - , psi_r_coef_sorted_bi_ortho ) - !call save_tc_wavefunction_general( N_det, N_states, psi_det_sorted_tc, size(psi_det_sorted_tc, 3) & - ! , size(psi_l_coef_sorted_bi_ortho, 1), psi_l_coef_sorted_bi_ortho & - ! , psi_r_coef_sorted_bi_ortho ) - else - call save_tc_wavefunction_general( N_det, N_states, psi_det, size(psi_det, 3) & - , size(psi_l_coef_bi_ortho, 1), psi_l_coef_bi_ortho & - , psi_r_coef_bi_ortho ) - endif - call routine_save_right_bi_ortho +subroutine save_tc_bi_ortho_wavefunction() + implicit none + if(save_sorted_tc_wf)then + call save_tc_wavefunction_general( N_det, N_states, psi_det_sorted_tc, size(psi_det_sorted_tc, 3) & + , size(psi_l_coef_sorted_bi_ortho, 1), psi_l_coef_sorted_bi_ortho, psi_r_coef_sorted_bi_ortho) + else + call save_tc_wavefunction_general( N_det, N_states, psi_det, size(psi_det, 3) & + , size(psi_l_coef_bi_ortho, 1), psi_l_coef_bi_ortho, psi_r_coef_bi_ortho ) + endif + call routine_save_right_bi_ortho() end subroutine routine_save_right_bi_ortho - implicit none - double precision, allocatable :: coef_tmp(:,:) - integer :: i - allocate(coef_tmp(N_det, N_states)) - do i = 1, N_det - coef_tmp(i,1:N_states) = psi_r_coef_sorted_bi_ortho(i,1:N_states) - enddo - call save_wavefunction_general_unormalized(N_det,N_states,psi_det_sorted_tc,size(coef_tmp,1),coef_tmp(1,1)) + implicit none + double precision, allocatable :: coef_tmp(:,:) + integer :: i + allocate(coef_tmp(N_det, N_states)) + do i = 1, N_det + coef_tmp(i,1:N_states) = psi_r_coef_sorted_bi_ortho(i,1:N_states) + enddo + call save_wavefunction_general_unormalized(N_det, N_states, psi_det_sorted_tc, size(coef_tmp, 1), coef_tmp(1,1)) end subroutine routine_save_left_right_bi_ortho - implicit none - double precision, allocatable :: coef_tmp(:,:) - integer :: i,n_states_tmp - n_states_tmp = 2 - allocate(coef_tmp(N_det, n_states_tmp)) - do i = 1, N_det - coef_tmp(i,1) = psi_r_coef_bi_ortho(i,1) - coef_tmp(i,2) = psi_l_coef_bi_ortho(i,1) - enddo - call save_wavefunction_general_unormalized(N_det,n_states_tmp,psi_det,size(coef_tmp,1),coef_tmp(1,1)) + implicit none + double precision, allocatable :: coef_tmp(:,:) + integer :: i,n_states_tmp + n_states_tmp = 2 + allocate(coef_tmp(N_det, n_states_tmp)) + do i = 1, N_det + coef_tmp(i,1) = psi_r_coef_bi_ortho(i,1) + coef_tmp(i,2) = psi_l_coef_bi_ortho(i,1) + enddo + call save_wavefunction_general_unormalized(N_det, n_states_tmp, psi_det, size(coef_tmp, 1), coef_tmp(1,1)) end diff --git a/src/tc_bi_ortho/tc_bi_ortho.irp.f b/src/tc_bi_ortho/tc_bi_ortho.irp.f index 9109edc4..2a1919f4 100644 --- a/src/tc_bi_ortho/tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/tc_bi_ortho.irp.f @@ -39,7 +39,7 @@ end subroutine routine_diag() implicit none - integer :: i, j + integer :: i, j, k double precision :: dE ! provide eigval_right_tc_bi_orth @@ -82,6 +82,26 @@ subroutine routine_diag() endif + double precision, allocatable :: buffer(:,:) + allocate(buffer(N_det,N_states)) + do k = 1, N_states + do i = 1, N_det + psi_l_coef_bi_ortho(i,k) = leigvec_tc_bi_orth(i,k) + buffer(i,k) = leigvec_tc_bi_orth(i,k) + enddo + enddo + TOUCH psi_l_coef_bi_ortho + call ezfio_set_tc_bi_ortho_psi_l_coef_bi_ortho(buffer) + do k = 1, N_states + do i = 1, N_det + psi_r_coef_bi_ortho(i,k) = reigvec_tc_bi_orth(i,k) + buffer(i,k) = reigvec_tc_bi_orth(i,k) + enddo + enddo + TOUCH psi_r_coef_bi_ortho + call ezfio_set_tc_bi_ortho_psi_r_coef_bi_ortho(buffer) + deallocate(buffer) + end diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index 3b346efe..cc236359 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -58,111 +58,117 @@ end PROVIDE N_det N_int - if(n_det.le.N_det_max_full)then + if(n_det .le. N_det_max_full) then + allocate(reigvec_tc_bi_orth_tmp(N_det,N_det),leigvec_tc_bi_orth_tmp(N_det,N_det),eigval_right_tmp(N_det),expect_e(N_det)) allocate (H_prime(N_det,N_det),s2_values_tmp(N_det)) + H_prime(1:N_det,1:N_det) = htilde_matrix_elmt_bi_ortho(1:N_det,1:N_det) - if(s2_eig)then - H_prime(1:N_det,1:N_det) += alpha * S2_matrix_all_dets(1:N_det,1:N_det) - do j=1,N_det - H_prime(j,j) = H_prime(j,j) - alpha*expected_s2 - enddo + if(s2_eig) then + H_prime(1:N_det,1:N_det) += alpha * S2_matrix_all_dets(1:N_det,1:N_det) + do j=1,N_det + H_prime(j,j) = H_prime(j,j) - alpha*expected_s2 + enddo endif - call non_hrmt_real_diag(N_det,H_prime,& - leigvec_tc_bi_orth_tmp,reigvec_tc_bi_orth_tmp,& - n_real_tc_bi_orth_eigval_right,eigval_right_tmp) + + call non_hrmt_real_diag(N_det, H_prime, leigvec_tc_bi_orth_tmp, reigvec_tc_bi_orth_tmp, n_real_tc_bi_orth_eigval_right, eigval_right_tmp) ! do i = 1, N_det ! call get_H_tc_s2_l0_r0(leigvec_tc_bi_orth_tmp(1,i),reigvec_tc_bi_orth_tmp(1,i),1,N_det,expect_e(i), s2_values_tmp(i)) ! enddo call get_H_tc_s2_l0_r0(leigvec_tc_bi_orth_tmp,reigvec_tc_bi_orth_tmp,N_det,N_det,expect_e, s2_values_tmp) + allocate(index_good_state_array(N_det),good_state_array(N_det)) i_state = 0 good_state_array = .False. - if(s2_eig)then - if (only_expected_s2) then - do j=1,N_det + + if(s2_eig) then + + if(only_expected_s2) then + do j = 1, N_det ! Select at least n_states states with S^2 values closed to "expected_s2" ! print*,'s2_values_tmp(j) = ',s2_values_tmp(j),eigval_right_tmp(j),expect_e(j) - if(dabs(s2_values_tmp(j)-expected_s2).le.0.5d0)then - i_state +=1 - index_good_state_array(i_state) = j - good_state_array(j) = .True. - endif - if(i_state.eq.N_states) then - exit - endif - enddo - else - do j=1,N_det - index_good_state_array(j) = j - good_state_array(j) = .True. - enddo - endif - if(i_state .ne.0)then - ! Fill the first "i_state" states that have a correct S^2 value - do j = 1, i_state - do i=1,N_det - reigvec_tc_bi_orth(i,j) = reigvec_tc_bi_orth_tmp(i,index_good_state_array(j)) - leigvec_tc_bi_orth(i,j) = leigvec_tc_bi_orth_tmp(i,index_good_state_array(j)) - enddo - eigval_right_tc_bi_orth(j) = expect_e(index_good_state_array(j)) - eigval_left_tc_bi_orth(j) = expect_e(index_good_state_array(j)) - s2_eigvec_tc_bi_orth(j) = s2_values_tmp(index_good_state_array(j)) - enddo - i_other_state = 0 - do j = 1, N_det - if(good_state_array(j))cycle - i_other_state +=1 - if(i_state+i_other_state.gt.n_states)then - exit - endif - do i=1,N_det - reigvec_tc_bi_orth(i,i_state+i_other_state) = reigvec_tc_bi_orth_tmp(i,j) - leigvec_tc_bi_orth(i,i_state+i_other_state) = leigvec_tc_bi_orth_tmp(i,j) - enddo - eigval_right_tc_bi_orth(i_state+i_other_state) = eigval_right_tmp(j) - eigval_left_tc_bi_orth (i_state+i_other_state) = eigval_right_tmp(j) - s2_eigvec_tc_bi_orth(i_state+i_other_state) = s2_values_tmp(i_state+i_other_state) - enddo - else ! istate == 0 - print*,'' - print*,'!!!!!!!! WARNING !!!!!!!!!' - print*,' Within the ',N_det,'determinants selected' - print*,' and the ',N_states_diag,'states requested' - print*,' We did not find only states with S^2 values close to ',expected_s2 - print*,' We will then set the first N_states eigenvectors of the H matrix' - print*,' as the CI_eigenvectors' - print*,' You should consider more states and maybe ask for s2_eig to be .True. or just enlarge the CI space' - print*,'' - do j=1,min(N_states_diag,N_det) - do i=1,N_det - leigvec_tc_bi_orth(i,j) = leigvec_tc_bi_orth_tmp(i,j) - reigvec_tc_bi_orth(i,j) = reigvec_tc_bi_orth_tmp(i,j) - enddo - eigval_right_tc_bi_orth(j) = eigval_right_tmp(j) - eigval_left_tc_bi_orth (j) = eigval_right_tmp(j) - s2_eigvec_tc_bi_orth(j) = s2_values_tmp(j) - enddo - endif ! istate .ne. 0 + if(dabs(s2_values_tmp(j) - expected_s2).le.0.5d0)then + i_state +=1 + index_good_state_array(i_state) = j + good_state_array(j) = .True. + endif + if(i_state.eq.N_states) then + exit + endif + enddo + else + do j = 1, N_det + index_good_state_array(j) = j + good_state_array(j) = .True. + enddo + endif + + if(i_state .ne. 0) then + ! Fill the first "i_state" states that have a correct S^2 value + do j = 1, i_state + do i = 1, N_det + reigvec_tc_bi_orth(i,j) = reigvec_tc_bi_orth_tmp(i,index_good_state_array(j)) + leigvec_tc_bi_orth(i,j) = leigvec_tc_bi_orth_tmp(i,index_good_state_array(j)) + enddo + eigval_right_tc_bi_orth(j) = expect_e(index_good_state_array(j)) + eigval_left_tc_bi_orth(j) = expect_e(index_good_state_array(j)) + s2_eigvec_tc_bi_orth(j) = s2_values_tmp(index_good_state_array(j)) + enddo + i_other_state = 0 + do j = 1, N_det + if(good_state_array(j))cycle + i_other_state +=1 + if(i_state+i_other_state.gt.n_states)then + exit + endif + do i = 1, N_det + reigvec_tc_bi_orth(i,i_state+i_other_state) = reigvec_tc_bi_orth_tmp(i,j) + leigvec_tc_bi_orth(i,i_state+i_other_state) = leigvec_tc_bi_orth_tmp(i,j) + enddo + eigval_right_tc_bi_orth(i_state+i_other_state) = eigval_right_tmp(j) + eigval_left_tc_bi_orth (i_state+i_other_state) = eigval_right_tmp(j) + s2_eigvec_tc_bi_orth(i_state+i_other_state) = s2_values_tmp(i_state+i_other_state) + enddo + else ! istate == 0 + print*,'' + print*,'!!!!!!!! WARNING !!!!!!!!!' + print*,' Within the ',N_det,'determinants selected' + print*,' and the ',N_states_diag,'states requested' + print*,' We did not find only states with S^2 values close to ',expected_s2 + print*,' We will then set the first N_states eigenvectors of the H matrix' + print*,' as the CI_eigenvectors' + print*,' You should consider more states and maybe ask for s2_eig to be .True. or just enlarge the CI space' + print*,'' + do j = 1, min(N_states_diag, N_det) + do i = 1, N_det + leigvec_tc_bi_orth(i,j) = leigvec_tc_bi_orth_tmp(i,j) + reigvec_tc_bi_orth(i,j) = reigvec_tc_bi_orth_tmp(i,j) + enddo + eigval_right_tc_bi_orth(j) = eigval_right_tmp(j) + eigval_left_tc_bi_orth (j) = eigval_right_tmp(j) + s2_eigvec_tc_bi_orth(j) = s2_values_tmp(j) + enddo + endif ! istate .ne. 0 else ! s2_eig - allocate(coef_hf_r(N_det),coef_hf_l(N_det),iorder(N_det)) - do i = 1,N_det + + allocate(coef_hf_r(N_det),coef_hf_l(N_det),iorder(N_det)) + do i = 1,N_det iorder(i) = i coef_hf_r(i) = -dabs(reigvec_tc_bi_orth_tmp(index_HF_psi_det,i)) - enddo - call dsort(coef_hf_r,iorder,N_det) - igood_r = iorder(1) - print*,'igood_r, coef_hf_r = ',igood_r,coef_hf_r(1) - do i = 1,N_det + enddo + call dsort(coef_hf_r,iorder,N_det) + igood_r = iorder(1) + print*,'igood_r, coef_hf_r = ',igood_r,coef_hf_r(1) + do i = 1,N_det iorder(i) = i coef_hf_l(i) = -dabs(leigvec_tc_bi_orth_tmp(index_HF_psi_det,i)) - enddo - call dsort(coef_hf_l,iorder,N_det) - igood_l = iorder(1) - print*,'igood_l, coef_hf_l = ',igood_l,coef_hf_l(1) + enddo + call dsort(coef_hf_l,iorder,N_det) + igood_l = iorder(1) + print*,'igood_l, coef_hf_l = ',igood_l,coef_hf_l(1) - if(igood_r.ne.igood_l.and.igood_r.ne.1)then + if(igood_r.ne.igood_l .and. igood_r.ne.1) then print *,'' print *,'Warning, the left and right eigenvectors are "not the same" ' print *,'Warning, the ground state is not dominated by HF...' @@ -170,22 +176,22 @@ end print *,'coef of HF in RIGHT eigenvector = ',reigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_r) print *,'State with largest LEFT coefficient of HF ',igood_l print *,'coef of HF in LEFT eigenvector = ',leigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_l) - endif - if(state_following_tc)then + endif + + if(state_following_tc) then print *,'Following the states with the largest coef on HF' print *,'igood_r,igood_l',igood_r,igood_l - i= igood_r + i = igood_r eigval_right_tc_bi_orth(1) = eigval_right_tmp(i) do j = 1, N_det reigvec_tc_bi_orth(j,1) = reigvec_tc_bi_orth_tmp(j,i) -! print*,reigvec_tc_bi_orth(j,1) enddo - i= igood_l + i = igood_l eigval_left_tc_bi_orth(1) = eigval_right_tmp(i) do j = 1, N_det leigvec_tc_bi_orth(j,1) = leigvec_tc_bi_orth_tmp(j,i) enddo - else + else do i = 1, N_states eigval_right_tc_bi_orth(i) = eigval_right_tmp(i) eigval_left_tc_bi_orth(i) = eigval_right_tmp(i) @@ -194,22 +200,11 @@ end leigvec_tc_bi_orth(j,i) = leigvec_tc_bi_orth_tmp(j,i) enddo enddo - endif - - ! check bi-orthogonality - allocate(Stmp(N_states,N_states)) - call dgemm( 'T', 'N', N_states, N_states, N_det, 1.d0 & - , leigvec_tc_bi_orth(1,1), size(leigvec_tc_bi_orth, 1), reigvec_tc_bi_orth(1,1), size(reigvec_tc_bi_orth, 1) & - , 0.d0, Stmp(1,1), size(Stmp, 1) ) - print *, ' overlap matrix between states:' - do i = 1, N_states - write(*,'(1000(F16.10,X))') Stmp(i,:) - enddo - deallocate(Stmp) + endif endif - else + else ! n_det > N_det_max_full double precision, allocatable :: H_jj(:),vec_tmp(:,:) external htc_bi_ortho_calc_tdav @@ -218,36 +213,39 @@ end external H_tc_dagger_u_0_opt external H_tc_s2_dagger_u_0_opt external H_tc_s2_u_0_opt + allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag)) + do i = 1, N_det call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) enddo - !!!! Preparing the left-eigenvector + print*,'---------------------------------' print*,'---------------------------------' print*,'Computing the left-eigenvector ' print*,'---------------------------------' print*,'---------------------------------' + !!!! Preparing the left-eigenvector vec_tmp = 0.d0 do istate = 1, N_states - vec_tmp(1:N_det,istate) = psi_l_coef_bi_ortho(1:N_det,istate) + vec_tmp(1:N_det,istate) = psi_l_coef_bi_ortho(1:N_det,istate) enddo do istate = N_states+1, n_states_diag - vec_tmp(istate,istate) = 1.d0 + vec_tmp(istate,istate) = 1.d0 enddo -! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, htcdag_bi_ortho_calc_tdav) -! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_dagger_u_0_opt) + !call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, htcdag_bi_ortho_calc_tdav) + !call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_dagger_u_0_opt) integer :: n_it_max,i_it n_it_max = 1 converged = .False. i_it = 0 do while (.not.converged) - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) - i_it += 1 - if(i_it .gt. 5)exit + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + i_it += 1 + if(i_it .gt. 5) exit enddo do istate = 1, N_states - leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) + leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo print*,'---------------------------------' @@ -255,32 +253,43 @@ end print*,'Computing the right-eigenvector ' print*,'---------------------------------' print*,'---------------------------------' - !!!! Preparing the right-eigenvector + !!!! Preparing the right-eigenvector vec_tmp = 0.d0 do istate = 1, N_states - vec_tmp(1:N_det,istate) = psi_r_coef_bi_ortho(1:N_det,istate) + vec_tmp(1:N_det,istate) = psi_r_coef_bi_ortho(1:N_det,istate) enddo do istate = N_states+1, n_states_diag - vec_tmp(istate,istate) = 1.d0 + vec_tmp(istate,istate) = 1.d0 enddo -! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) -! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt) + !call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) + !call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt) converged = .False. i_it = 0 - do while (.not.converged) - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) - i_it += 1 - if(i_it .gt. 5)exit + do while (.not. converged) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + i_it += 1 + if(i_it .gt. 5) exit enddo do istate = 1, N_states - reigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) + reigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo deallocate(H_jj) - endif + endif - call bi_normalize(leigvec_tc_bi_orth,reigvec_tc_bi_orth,size(reigvec_tc_bi_orth,1),N_det,N_states) - print*,'leigvec_tc_bi_orth(1,1),reigvec_tc_bi_orth(1,1) = ',leigvec_tc_bi_orth(1,1),reigvec_tc_bi_orth(1,1) + call bi_normalize(leigvec_tc_bi_orth, reigvec_tc_bi_orth, size(reigvec_tc_bi_orth, 1), N_det, N_states) + ! check bi-orthogonality + allocate(Stmp(N_states,N_states)) + call dgemm( 'T', 'N', N_states, N_states, N_det, 1.d0 & + , leigvec_tc_bi_orth(1,1), size(leigvec_tc_bi_orth, 1), reigvec_tc_bi_orth(1,1), size(reigvec_tc_bi_orth, 1) & + , 0.d0, Stmp(1,1), size(Stmp, 1) ) + print *, ' overlap matrix between states:' + do i = 1, N_states + write(*,'(1000(F16.10,X))') Stmp(i,:) + enddo + deallocate(Stmp) + + print*,'leigvec_tc_bi_orth(1,1),reigvec_tc_bi_orth(1,1) = ', leigvec_tc_bi_orth(1,1), reigvec_tc_bi_orth(1,1) do i = 1, N_states norm_ground_left_right_bi_orth = 0.d0 do j = 1, N_det @@ -291,27 +300,6 @@ end print*,' = ', s2_eigvec_tc_bi_orth(i) enddo - ! --- - - double precision, allocatable :: buffer(:,:) - allocate(buffer(N_det,N_states)) - - do k = 1, N_states - do i = 1, N_det - buffer(i,k) = leigvec_tc_bi_orth(i,k) - enddo - enddo - call ezfio_set_tc_bi_ortho_psi_l_coef_bi_ortho(buffer) - - do k = 1, N_states - do i = 1, N_det - buffer(i,k) = reigvec_tc_bi_orth(i,k) - enddo - enddo - call ezfio_set_tc_bi_ortho_psi_r_coef_bi_ortho(buffer) - - deallocate(buffer) - END_PROVIDER diff --git a/src/tc_bi_ortho/tc_utils.irp.f b/src/tc_bi_ortho/tc_utils.irp.f index 92e8639d..594b466c 100644 --- a/src/tc_bi_ortho/tc_utils.irp.f +++ b/src/tc_bi_ortho/tc_utils.irp.f @@ -14,11 +14,13 @@ subroutine write_tc_energy() !htot = htilde_matrix_elmt_bi_ortho(i,j) call htilde_mu_mat_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) E_TC = E_TC + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(j,k) * htot + !E_TC = E_TC + leigvec_tc_bi_orth(i,k) * reigvec_tc_bi_orth(j,k) * htot enddo enddo O_TC = 0.d0 do i = 1, N_det + !O_TC = O_TC + leigvec_tc_bi_orth(i,k) * reigvec_tc_bi_orth(i,k) O_TC = O_TC + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(i,k) enddo From 948f8399529dba5ad07a94fc4a1be4659cdfd2be Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Thu, 13 Apr 2023 13:22:04 +0200 Subject: [PATCH 031/337] print correctly after diag --- src/tc_bi_ortho/tc_bi_ortho.irp.f | 20 -------------------- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/tc_bi_ortho/tc_bi_ortho.irp.f b/src/tc_bi_ortho/tc_bi_ortho.irp.f index 2a1919f4..b69a2fe6 100644 --- a/src/tc_bi_ortho/tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/tc_bi_ortho.irp.f @@ -82,26 +82,6 @@ subroutine routine_diag() endif - double precision, allocatable :: buffer(:,:) - allocate(buffer(N_det,N_states)) - do k = 1, N_states - do i = 1, N_det - psi_l_coef_bi_ortho(i,k) = leigvec_tc_bi_orth(i,k) - buffer(i,k) = leigvec_tc_bi_orth(i,k) - enddo - enddo - TOUCH psi_l_coef_bi_ortho - call ezfio_set_tc_bi_ortho_psi_l_coef_bi_ortho(buffer) - do k = 1, N_states - do i = 1, N_det - psi_r_coef_bi_ortho(i,k) = reigvec_tc_bi_orth(i,k) - buffer(i,k) = reigvec_tc_bi_orth(i,k) - enddo - enddo - TOUCH psi_r_coef_bi_ortho - call ezfio_set_tc_bi_ortho_psi_r_coef_bi_ortho(buffer) - deallocate(buffer) - end diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index cc236359..3d452b68 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -300,6 +300,26 @@ end print*,' = ', s2_eigvec_tc_bi_orth(i) enddo + double precision, allocatable :: buffer(:,:) + allocate(buffer(N_det,N_states)) + do k = 1, N_states + do i = 1, N_det + psi_l_coef_bi_ortho(i,k) = leigvec_tc_bi_orth(i,k) + buffer(i,k) = leigvec_tc_bi_orth(i,k) + enddo + enddo + TOUCH psi_l_coef_bi_ortho + call ezfio_set_tc_bi_ortho_psi_l_coef_bi_ortho(buffer) + do k = 1, N_states + do i = 1, N_det + psi_r_coef_bi_ortho(i,k) = reigvec_tc_bi_orth(i,k) + buffer(i,k) = reigvec_tc_bi_orth(i,k) + enddo + enddo + TOUCH psi_r_coef_bi_ortho + call ezfio_set_tc_bi_ortho_psi_r_coef_bi_ortho(buffer) + deallocate(buffer) + END_PROVIDER From 4c0de615fb0a572212c2c3940c601f64e8cd0164 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 13 Apr 2023 19:36:39 +0200 Subject: [PATCH 032/337] Fix qp_extract_cipsi_data.py --- scripts/qp_extract_cipsi_data.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/qp_extract_cipsi_data.py b/scripts/qp_extract_cipsi_data.py index 8f0b1f3c..dd8e9c4d 100755 --- a/scripts/qp_extract_cipsi_data.py +++ b/scripts/qp_extract_cipsi_data.py @@ -23,6 +23,9 @@ def extract_data(output): reading = False for iline, line in enumerate(lines): + if line.startswith("Summary at N_det"): + reading = False + if not reading and line.startswith(" N_det "): n_det = int(re.search(r"N_det\s+=\s+(\d+)", line).group(1)) reading = True From ef3471737869cbc9896f2cb586f9e7708c6a3af4 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Fri, 14 Apr 2023 02:12:53 +0200 Subject: [PATCH 033/337] removed s2 penality --- src/davidson/diagonalization_hs2_dressed.irp.f | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index c20d66d7..5c3a495b 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -462,8 +462,8 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ double precision, allocatable :: work(:) -! y = h - y = h_p + y = h + !y = h_p lwork = -1 allocate(work(1)) call dsygv(1,'V','U',shift2,y,size(y,1), & From 3d1c30700889a34094a345037876a09f00e1ebda Mon Sep 17 00:00:00 2001 From: eginer Date: Fri, 14 Apr 2023 10:56:07 +0200 Subject: [PATCH 034/337] canged h_p to h --- src/davidson/diagonalization_hs2_dressed.irp.f | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index bbbab2d4..dc42b9a8 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -466,8 +466,8 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ double precision, allocatable :: work(:) -! y = h - y = h_p + y = h +! y = h_p lwork = -1 allocate(work(1)) call dsygv(1,'V','U',shift2,y,size(y,1), & From f475446d9dfb9294cb916053b8a6a0d1f52149f7 Mon Sep 17 00:00:00 2001 From: ydamour Date: Fri, 14 Apr 2023 16:35:06 +0200 Subject: [PATCH 035/337] Improved qp command --- bin/qp_test | 3 ++- etc/qp.rc | 12 +++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/bin/qp_test b/bin/qp_test index 288b7291..d3a188fb 100755 --- a/bin/qp_test +++ b/bin/qp_test @@ -46,7 +46,7 @@ def main(arguments): append_bats(dirname, filenames) else: for (dirname, _, filenames) in os.walk(os.getcwd(), followlinks=False): - if "IRPF90_temp" not in dirname: + if "IRPF90_temp" not in dirname and "external" not in dirname: append_bats(dirname, filenames) l_bats = [y for _, y in sorted(l_bats)] @@ -67,6 +67,7 @@ def main(arguments): os.system(test+" python3 bats_to_sh.py "+bats_file+ "| bash") else: +# print(" ".join(["bats", "--verbose-run", "--trace", bats_file])) subprocess.check_call(["bats", "--verbose-run", "--trace", bats_file], env=os.environ) diff --git a/etc/qp.rc b/etc/qp.rc index c56661c7..d339f475 100644 --- a/etc/qp.rc +++ b/etc/qp.rc @@ -110,6 +110,11 @@ function qp() unset COMMAND ;; + "test") + shift + qp_test $@ + ;; + *) which "qp_$1" &> /dev/null if [[ $? -eq 0 ]] ; then @@ -183,7 +188,7 @@ _qp_Complete() ;; esac;; set_file) - COMPREPLY=( $(compgen -W "$(for i in * ; do [[ -f ${i}/ezfio/.version ]] && echo $i ; done)" -- ${cur} ) ) + COMPREPLY=( $(compgen -W "$(for i in $(find . -name ezfio | sed 's/ezfio$/.version/') ; do [[ -f $i ]] && echo ${i%/.version} ; done)" -- ${cur} ) ) return 0 ;; plugins) @@ -215,10 +220,15 @@ _qp_Complete() return 0 ;; esac;; + test) + COMPREPLY=( $(compgen -W "-v -a " -- $cur ) ) + return 0 + ;; *) COMPREPLY=( $(compgen -W 'plugins set_file \ unset_file man \ create_ezfio \ + test \ convert_output_to_ezfio \ -h update' -- $cur ) ) From e4664975e14fa901ad6e0fa8b009b0ba80912951 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 15 Apr 2023 00:59:22 +0200 Subject: [PATCH 036/337] fixed bug in tc_bi_ortho --- src/tc_bi_ortho/psi_r_l_prov.irp.f | 53 ++++++++++-- src/tc_bi_ortho/save_lr_bi_ortho_states.irp.f | 15 ---- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 80 +++++++++++-------- 3 files changed, 92 insertions(+), 56 deletions(-) delete mode 100644 src/tc_bi_ortho/save_lr_bi_ortho_states.irp.f diff --git a/src/tc_bi_ortho/psi_r_l_prov.irp.f b/src/tc_bi_ortho/psi_r_l_prov.irp.f index ee8abcec..b28c417f 100644 --- a/src/tc_bi_ortho/psi_r_l_prov.irp.f +++ b/src/tc_bi_ortho/psi_r_l_prov.irp.f @@ -192,33 +192,51 @@ subroutine save_tc_wavefunction_general(ndet, nstates, psidet, sze, dim_psicoef, endif end +! --- + subroutine save_tc_bi_ortho_wavefunction() + implicit none - if(save_sorted_tc_wf)then + + if(save_sorted_tc_wf) then + call save_tc_wavefunction_general( N_det, N_states, psi_det_sorted_tc, size(psi_det_sorted_tc, 3) & , size(psi_l_coef_sorted_bi_ortho, 1), psi_l_coef_sorted_bi_ortho, psi_r_coef_sorted_bi_ortho) + call routine_save_right_sorted_bi_ortho() + else + call save_tc_wavefunction_general( N_det, N_states, psi_det, size(psi_det, 3) & , size(psi_l_coef_bi_ortho, 1), psi_l_coef_bi_ortho, psi_r_coef_bi_ortho ) + call routine_save_right_bi_ortho() + endif - call routine_save_right_bi_ortho() + end -subroutine routine_save_right_bi_ortho +! --- + +subroutine routine_save_right_sorted_bi_ortho() + implicit none + integer :: i double precision, allocatable :: coef_tmp(:,:) - integer :: i + allocate(coef_tmp(N_det, N_states)) do i = 1, N_det coef_tmp(i,1:N_states) = psi_r_coef_sorted_bi_ortho(i,1:N_states) enddo call save_wavefunction_general_unormalized(N_det, N_states, psi_det_sorted_tc, size(coef_tmp, 1), coef_tmp(1,1)) -end + deallocate(coef_tmp) + +end + +subroutine routine_save_left_right_sorted_bi_ortho() -subroutine routine_save_left_right_bi_ortho implicit none + integer :: i, n_states_tmp double precision, allocatable :: coef_tmp(:,:) - integer :: i,n_states_tmp + n_states_tmp = 2 allocate(coef_tmp(N_det, n_states_tmp)) do i = 1, N_det @@ -226,5 +244,26 @@ subroutine routine_save_left_right_bi_ortho coef_tmp(i,2) = psi_l_coef_bi_ortho(i,1) enddo call save_wavefunction_general_unormalized(N_det, n_states_tmp, psi_det, size(coef_tmp, 1), coef_tmp(1,1)) + deallocate(coef_tmp) end +! --- + +subroutine routine_save_right_bi_ortho() + + implicit none + integer :: i + double precision, allocatable :: coef_tmp(:,:) + + allocate(coef_tmp(N_det, N_states)) + do i = 1, N_det + coef_tmp(i,1:N_states) = psi_r_coef_bi_ortho(i,1:N_states) + enddo + call save_wavefunction_general_unormalized(N_det, N_states, psi_det, size(coef_tmp, 1), coef_tmp(1,1)) + deallocate(coef_tmp) + +end + +! --- + + diff --git a/src/tc_bi_ortho/save_lr_bi_ortho_states.irp.f b/src/tc_bi_ortho/save_lr_bi_ortho_states.irp.f deleted file mode 100644 index 5eb3c069..00000000 --- a/src/tc_bi_ortho/save_lr_bi_ortho_states.irp.f +++ /dev/null @@ -1,15 +0,0 @@ -program tc_bi_ortho - implicit none - BEGIN_DOC -! TODO : Put the documentation of the program here - END_DOC - print *, 'Hello world' - my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 - read_wf = .True. - touch read_wf - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - call routine_save_left_right_bi_ortho -! call test -end diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index 3d452b68..a288810b 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -266,7 +266,7 @@ end converged = .False. i_it = 0 do while (.not. converged) - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_opt) i_it += 1 if(i_it .gt. 5) exit enddo @@ -324,42 +324,54 @@ END_PROVIDER -subroutine bi_normalize(u_l,u_r,n,ld,nstates) +subroutine bi_normalize(u_l, u_r, n, ld, nstates) + + BEGIN_DOC !!!! Normalization of the scalar product of the left/right eigenvectors + END_DOC + + implicit none + integer, intent(in) :: n, ld, nstates double precision, intent(inout) :: u_l(ld,nstates), u_r(ld,nstates) - integer, intent(in) :: n,ld,nstates - integer :: i - double precision :: accu, tmp + integer :: i, j + double precision :: accu, tmp + do i = 1, nstates - !!!! Normalization of right eigenvectors |Phi> - accu = 0.d0 - do j = 1, n - accu += u_r(j,i) * u_r(j,i) - enddo - accu = 1.d0/dsqrt(accu) - print*,'accu_r = ',accu - do j = 1, n - u_r(j,i) *= accu - enddo - tmp = u_r(1,i) / dabs(u_r(1,i)) - do j = 1, n - u_r(j,i) *= tmp - enddo - !!!! Adaptation of the norm of the left eigenvector such that = 1 - accu = 0.d0 - do j = 1, n - accu += u_l(j,i) * u_r(j,i) -! print*,j, u_l(j,i) , u_r(j,i) - enddo - if(accu.gt.0.d0)then + + !!!! Normalization of right eigenvectors |Phi> + accu = 0.d0 + do j = 1, n + accu += u_r(j,i) * u_r(j,i) + enddo accu = 1.d0/dsqrt(accu) - else - accu = 1.d0/dsqrt(-accu) - endif - tmp = (u_l(1,i) * u_r(1,i) )/dabs(u_l(1,i) * u_r(1,i)) - do j = 1, n - u_l(j,i) *= accu * tmp - u_r(j,i) *= accu - enddo + print*,'accu_r = ',accu + do j = 1, n + u_r(j,i) *= accu + enddo + tmp = u_r(1,i) / dabs(u_r(1,i)) + do j = 1, n + u_r(j,i) *= tmp + enddo + + !!!! Adaptation of the norm of the left eigenvector such that = 1 + accu = 0.d0 + do j = 1, n + accu += u_l(j,i) * u_r(j,i) + !print*,j, u_l(j,i) , u_r(j,i) + enddo + print*,'accu_lr = ', accu + if(accu.gt.0.d0)then + accu = 1.d0/dsqrt(accu) + else + accu = 1.d0/dsqrt(-accu) + endif + tmp = (u_l(1,i) * u_r(1,i) )/dabs(u_l(1,i) * u_r(1,i)) + do j = 1, n + u_l(j,i) *= accu * tmp + u_r(j,i) *= accu + enddo + enddo + end + From cc93d54f98448162883b3eae6f87f121f11aec60 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sun, 16 Apr 2023 03:43:28 +0200 Subject: [PATCH 037/337] extract in better format --- scripts/qp_extract_cipsi_data.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/qp_extract_cipsi_data.py b/scripts/qp_extract_cipsi_data.py index 8f0b1f3c..faffdda2 100755 --- a/scripts/qp_extract_cipsi_data.py +++ b/scripts/qp_extract_cipsi_data.py @@ -41,7 +41,8 @@ def extract_data(output): e_ex = float(e_ex_line.split()[1]) reading = False - data.append((n_det, e, pt2, err_pt2, rpt2, err_rpt2, e_ex)) + new_data = " {:8d} {:16.8f} {:e} {:e} {:e} {:e} {:16.8f}".format(n_det, e, pt2, err_pt2, rpt2, err_rpt2, e_ex) + data.append(new_data) n_det = e = pt2 = err_pt2 = rpt2 = err_rpt2 = e_ex = None return data @@ -49,6 +50,6 @@ def extract_data(output): data = extract_data(output) for item in data: - print(" ".join(str(x) for x in item)) + print(item) From 16a17f021a837a08f6c45eacfd92dc210af7275a Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sun, 16 Apr 2023 19:47:00 +0200 Subject: [PATCH 038/337] fixed inout problem --- src/tc_bi_ortho/h_tc_s2_u0.irp.f | 100 ++++++++++++++++++++----------- 1 file changed, 64 insertions(+), 36 deletions(-) diff --git a/src/tc_bi_ortho/h_tc_s2_u0.irp.f b/src/tc_bi_ortho/h_tc_s2_u0.irp.f index 30b0f273..55b4da5e 100644 --- a/src/tc_bi_ortho/h_tc_s2_u0.irp.f +++ b/src/tc_bi_ortho/h_tc_s2_u0.irp.f @@ -1,7 +1,6 @@ -subroutine get_H_tc_s2_l0_r0(l_0,r_0,N_st,sze,energies, s2) - use bitmasks - implicit none +subroutine get_H_tc_s2_l0_r0(l_0, r_0, N_st, sze, energies, s2) + BEGIN_DOC ! Computes $e_0 = \langle l_0 | H | r_0\rangle$. ! @@ -11,26 +10,34 @@ subroutine get_H_tc_s2_l0_r0(l_0,r_0,N_st,sze,energies, s2) ! ! istart, iend, ishift, istep are used in ZMQ parallelization. END_DOC - integer, intent(in) :: N_st,sze - double precision, intent(in) :: l_0(sze,N_st), r_0(sze,N_st) - double precision, intent(out) :: energies(N_st), s2(N_st) - logical :: do_right - integer :: istate + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + double precision, intent(in) :: l_0(sze,N_st), r_0(sze,N_st) + double precision, intent(out) :: energies(N_st), s2(N_st) + logical :: do_right + integer :: istate double precision, allocatable :: s_0(:,:), v_0(:,:) - double precision :: u_dot_v, norm + double precision :: u_dot_v, norm + allocate(s_0(sze,N_st), v_0(sze,N_st)) do_right = .True. - call H_tc_s2_u_0_opt(v_0,s_0,r_0,N_st,sze) + call H_tc_s2_u_0_opt(v_0, s_0, r_0, N_st, sze) + do istate = 1, N_st - norm = u_dot_v(l_0(1,istate),r_0(1,istate),sze) - energies(istate) = u_dot_v(l_0(1,istate),v_0(1,istate),sze)/norm - s2(istate) = u_dot_v(l_0(1,istate),s_0(1,istate),sze)/norm + norm = u_dot_v(l_0(1,istate),r_0(1,istate),sze) + energies(istate) = u_dot_v(l_0(1,istate),v_0(1,istate),sze)/norm + s2(istate) = u_dot_v(l_0(1,istate),s_0(1,istate),sze)/norm enddo + end -subroutine H_tc_s2_u_0_opt(v_0,s_0,u_0,N_st,sze) - use bitmasks - implicit none +! --- + +subroutine H_tc_s2_u_0_opt(v_0, s_0, u_0, N_st, sze) + BEGIN_DOC ! Computes $v_0 = H | u_0\rangle$. ! @@ -38,16 +45,24 @@ subroutine H_tc_s2_u_0_opt(v_0,s_0,u_0,N_st,sze) ! ! istart, iend, ishift, istep are used in ZMQ parallelization. END_DOC - integer, intent(in) :: N_st,sze - double precision, intent(inout) :: v_0(sze,N_st), u_0(sze,N_st), s_0(sze,N_st) - logical :: do_right + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + logical :: do_right + do_right = .True. - call H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right) + call H_tc_s2_u_0_nstates_openmp(v_0, s_0, u_0, N_st, sze, do_right) + end -subroutine H_tc_s2_dagger_u_0_opt(v_0,s_0,u_0,N_st,sze) - use bitmasks - implicit none +! --- + +subroutine H_tc_s2_dagger_u_0_opt(v_0, s_0, u_0, N_st, sze) + BEGIN_DOC ! Computes $v_0 = H | u_0\rangle$. ! @@ -55,17 +70,23 @@ subroutine H_tc_s2_dagger_u_0_opt(v_0,s_0,u_0,N_st,sze) ! ! istart, iend, ishift, istep are used in ZMQ parallelization. END_DOC - integer, intent(in) :: N_st,sze - double precision, intent(inout) :: v_0(sze,N_st), u_0(sze,N_st), s_0(sze,N_st) - logical :: do_right - do_right = .False. - call H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right) -end - -subroutine H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right) use bitmasks implicit none + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + logical :: do_right + + do_right = .False. + call H_tc_s2_u_0_nstates_openmp(v_0, s_0, u_0, N_st, sze, do_right) + +end + +! --- + +subroutine H_tc_s2_u_0_nstates_openmp(v_0, s_0, u_0, N_st, sze, do_right) + BEGIN_DOC ! Computes $v_0 = H | u_0\rangle$. ! @@ -75,12 +96,18 @@ subroutine H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right) ! ! if do_right == True then you compute H_TC |Psi>, else H_TC^T |Psi> END_DOC - integer, intent(in) :: N_st,sze - double precision, intent(inout) :: v_0(sze,N_st), u_0(sze,N_st), s_0(sze,N_st) - logical, intent(in) :: do_right - integer :: k - double precision, allocatable :: u_t(:,:), v_t(:,:), s_t(:,:) + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + logical, intent(in) :: do_right + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + integer :: k + double precision, allocatable :: u_t(:,:), v_t(:,:), s_t(:,:) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t + allocate(u_t(N_st,N_det),v_t(N_st,N_det),s_t(N_st,N_det)) do k=1,N_st call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det) @@ -119,6 +146,7 @@ subroutine H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right) end +! --- subroutine H_tc_s2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep, do_right) use bitmasks From 79c9d91d1991b7f2561e089f00e808c5c8ceb881 Mon Sep 17 00:00:00 2001 From: ydamour Date: Tue, 18 Apr 2023 11:20:36 +0200 Subject: [PATCH 039/337] missing script ccsd --- src/ccsd/org/TANGLE_org_mode.sh | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100755 src/ccsd/org/TANGLE_org_mode.sh diff --git a/src/ccsd/org/TANGLE_org_mode.sh b/src/ccsd/org/TANGLE_org_mode.sh new file mode 100755 index 00000000..059cbe7d --- /dev/null +++ b/src/ccsd/org/TANGLE_org_mode.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +list='ls *.org' +for element in $list +do + emacs --batch $element -f org-babel-tangle +done From 0325e59ebef7fa8c0c6757d126215ee28db3021d Mon Sep 17 00:00:00 2001 From: ydamour Date: Tue, 18 Apr 2023 11:22:04 +0200 Subject: [PATCH 040/337] remove old utils_trust_region --- src/utils_trust_region/EZFIO.cfg | 89 - src/utils_trust_region/NEED | 1 - src/utils_trust_region/README.rst | 5 - src/utils_trust_region/TANGLE_org_mode.sh | 7 - src/utils_trust_region/algo_trust.irp.f | 248 --- src/utils_trust_region/algo_trust.org | 593 ------ .../apply_mo_rotation.irp.f | 85 - src/utils_trust_region/apply_mo_rotation.org | 86 - src/utils_trust_region/mat_to_vec_index.irp.f | 61 - src/utils_trust_region/mat_to_vec_index.org | 63 - src/utils_trust_region/pi.h | 2 - src/utils_trust_region/rotation_matrix.irp.f | 443 ----- src/utils_trust_region/rotation_matrix.org | 454 ----- .../sub_to_full_rotation_matrix.irp.f | 64 - .../sub_to_full_rotation_matrix.org | 65 - .../trust_region_expected_e.irp.f | 119 -- .../trust_region_expected_e.org | 121 -- .../trust_region_optimal_lambda.irp.f | 1655 ---------------- .../trust_region_optimal_lambda.org | 1665 ----------------- src/utils_trust_region/trust_region_rho.irp.f | 121 -- src/utils_trust_region/trust_region_rho.org | 123 -- .../trust_region_step.irp.f | 716 ------- src/utils_trust_region/trust_region_step.org | 726 ------- src/utils_trust_region/vec_to_mat_index.irp.f | 71 - src/utils_trust_region/vec_to_mat_index.org | 72 - src/utils_trust_region/vec_to_mat_v2.irp.f | 39 - src/utils_trust_region/vec_to_mat_v2.org | 40 - 27 files changed, 7734 deletions(-) delete mode 100644 src/utils_trust_region/EZFIO.cfg delete mode 100644 src/utils_trust_region/NEED delete mode 100644 src/utils_trust_region/README.rst delete mode 100755 src/utils_trust_region/TANGLE_org_mode.sh delete mode 100644 src/utils_trust_region/algo_trust.irp.f delete mode 100644 src/utils_trust_region/algo_trust.org delete mode 100644 src/utils_trust_region/apply_mo_rotation.irp.f delete mode 100644 src/utils_trust_region/apply_mo_rotation.org delete mode 100644 src/utils_trust_region/mat_to_vec_index.irp.f delete mode 100644 src/utils_trust_region/mat_to_vec_index.org delete mode 100644 src/utils_trust_region/pi.h delete mode 100644 src/utils_trust_region/rotation_matrix.irp.f delete mode 100644 src/utils_trust_region/rotation_matrix.org delete mode 100644 src/utils_trust_region/sub_to_full_rotation_matrix.irp.f delete mode 100644 src/utils_trust_region/sub_to_full_rotation_matrix.org delete mode 100644 src/utils_trust_region/trust_region_expected_e.irp.f delete mode 100644 src/utils_trust_region/trust_region_expected_e.org delete mode 100644 src/utils_trust_region/trust_region_optimal_lambda.irp.f delete mode 100644 src/utils_trust_region/trust_region_optimal_lambda.org delete mode 100644 src/utils_trust_region/trust_region_rho.irp.f delete mode 100644 src/utils_trust_region/trust_region_rho.org delete mode 100644 src/utils_trust_region/trust_region_step.irp.f delete mode 100644 src/utils_trust_region/trust_region_step.org delete mode 100644 src/utils_trust_region/vec_to_mat_index.irp.f delete mode 100644 src/utils_trust_region/vec_to_mat_index.org delete mode 100644 src/utils_trust_region/vec_to_mat_v2.irp.f delete mode 100644 src/utils_trust_region/vec_to_mat_v2.org diff --git a/src/utils_trust_region/EZFIO.cfg b/src/utils_trust_region/EZFIO.cfg deleted file mode 100644 index 9c9f6248..00000000 --- a/src/utils_trust_region/EZFIO.cfg +++ /dev/null @@ -1,89 +0,0 @@ -[thresh_delta] -type: double precision -doc: Threshold to stop the optimization if the radius of the trust region delta < thresh_delta -interface: ezfio,provider,ocaml -default: 1.e-10 - -[thresh_rho] -type: double precision -doc: Threshold for the step acceptance in the trust region algorithm, if (rho .geq. thresh_rho) the step is accepted, else the step is cancelled and a smaller step is tried until (rho .geq. thresh_rho) -interface: ezfio,provider,ocaml -default: 0.1 - -[thresh_eig] -type: double precision -doc: Threshold to consider when an eigenvalue is 0 in the trust region algorithm -interface: ezfio,provider,ocaml -default: 1.e-12 - -[thresh_model] -type: double precision -doc: If if ABS(criterion - criterion_model) < thresh_model, the program exit the trust region algorithm -interface: ezfio,provider,ocaml -default: 1.e-12 - -[absolute_eig] -type: logical -doc: If True, the algorithm replace the eigenvalues of the hessian by their absolute value to compute the step (in the trust region) -interface: ezfio,provider,ocaml -default: false - -[thresh_wtg] -type: double precision -doc: Threshold in the trust region algorithm to considere when the dot product of the eigenvector W by the gradient v_grad is equal to 0. Must be smaller than thresh_eig by several order of magnitude to avoid numerical problem. If the research of the optimal lambda cannot reach the condition (||x|| .eq. delta) because (||x|| .lt. delta), the reason might be that thresh_wtg is too big or/and thresh_eig is too small -interface: ezfio,provider,ocaml -default: 1.e-6 - -[thresh_wtg2] -type: double precision -doc: Threshold in the trust region algorithm to considere when the dot product of the eigenvector W by the gradient v_grad is 0 in the case of avoid_saddle .eq. true. There is no particular reason to put a different value that thresh_wtg, but it can be useful one day -interface: ezfio,provider,ocaml -default: 1.e-6 - -[avoid_saddle] -type: logical -doc: Test to avoid saddle point, active if true -interface: ezfio,provider,ocaml -default: false - -[version_avoid_saddle] -type: integer -doc: cf. trust region, not stable -interface: ezfio,provider,ocaml -default: 3 - -[thresh_rho_2] -type: double precision -doc: Threshold for the step acceptance for the research of lambda in the trust region algorithm, if (rho_2 .geq. thresh_rho_2) the step is accepted, else the step is rejected -interface: ezfio,provider,ocaml -default: 0.1 - -[thresh_cc] -type: double precision -doc: Threshold to stop the research of the optimal lambda in the trust region algorithm when (dabs(1d0-||x||^2/delta^2) < thresh_cc) -interface: ezfio,provider,ocaml -default: 1.e-6 - -[thresh_model_2] -type: double precision -doc: if (ABS(criterion - criterion_model) < thresh_model_2), i.e., the difference between the actual criterion and the predicted next criterion, during the research of the optimal lambda in the trust region algorithm it prints a warning -interface: ezfio,provider,ocaml -default: 1.e-12 - -[version_lambda_search] -type: integer -doc: Research of the optimal lambda in the trust region algorithm to constrain the norm of the step by solving: 1 -> ||x||^2 - delta^2 .eq. 0, 2 -> 1/||x||^2 - 1/delta^2 .eq. 0 -interface: ezfio,provider,ocaml -default: 2 - -[nb_it_max_lambda] -type: integer -doc: Maximal number of iterations for the research of the optimal lambda in the trust region algorithm -interface: ezfio,provider,ocaml -default: 100 - -[nb_it_max_pre_search] -type: integer -doc: Maximal number of iterations for the pre-research of the optimal lambda in the trust region algorithm -interface: ezfio,provider,ocaml -default: 40 diff --git a/src/utils_trust_region/NEED b/src/utils_trust_region/NEED deleted file mode 100644 index 1a65ce38..00000000 --- a/src/utils_trust_region/NEED +++ /dev/null @@ -1 +0,0 @@ -hartree_fock diff --git a/src/utils_trust_region/README.rst b/src/utils_trust_region/README.rst deleted file mode 100644 index 6a0689b6..00000000 --- a/src/utils_trust_region/README.rst +++ /dev/null @@ -1,5 +0,0 @@ -============ -trust_region -============ - -The documentation can be found in the org files. diff --git a/src/utils_trust_region/TANGLE_org_mode.sh b/src/utils_trust_region/TANGLE_org_mode.sh deleted file mode 100755 index 059cbe7d..00000000 --- a/src/utils_trust_region/TANGLE_org_mode.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh - -list='ls *.org' -for element in $list -do - emacs --batch $element -f org-babel-tangle -done diff --git a/src/utils_trust_region/algo_trust.irp.f b/src/utils_trust_region/algo_trust.irp.f deleted file mode 100644 index eac17275..00000000 --- a/src/utils_trust_region/algo_trust.irp.f +++ /dev/null @@ -1,248 +0,0 @@ -! Algorithm for the trust region - -! step_in_trust_region: -! Computes the step in the trust region (delta) -! (automatically sets at the iteration 0 and which evolves during the -! process in function of the evolution of rho). The step is computing by -! constraining its norm with a lagrange multiplier. -! Since the calculation of the step is based on the Newton method, an -! estimation of the gain in energy is given using the Taylors series -! truncated at the second order (criterion_model). -! If (DABS(criterion-criterion_model) < 1d-12) then -! must_exit = .True. -! else -! must_exit = .False. - -! This estimation of the gain in energy is used by -! is_step_cancel_trust_region to say if the step is accepted or cancelled. - -! If the step must be cancelled, the calculation restart from the same -! hessian and gradient and recomputes the step but in a smaller trust -! region and so on until the step is accepted. If the step is accepted -! the hessian and the gradient are recomputed to produce a new step. - -! Example: - - -! !### Initialization ### -! delta = 0d0 -! nb_iter = 0 ! Must start at 0 !!! -! rho = 0.5d0 -! not_converged = .True. -! -! ! ### TODO ### -! ! Compute the criterion before the loop -! call #your_criterion(prev_criterion) -! -! do while (not_converged) -! ! ### TODO ## -! ! Call your gradient -! ! Call you hessian -! call #your_gradient(v_grad) (1D array) -! call #your_hessian(H) (2D array) -! -! ! ### TODO ### -! ! Diagonalization of the hessian -! call diagonalization_hessian(n,H,e_val,w) -! -! cancel_step = .True. ! To enter in the loop just after -! ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho -! do while (cancel_step) -! -! ! Hessian,gradient,Criterion -> x -! call trust_region_step_w_expected_e(tmp_n,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit) -! -! if (must_exit) then -! ! ### Message ### -! ! if step_in_trust_region sets must_exit on true for numerical reasons -! print*,'algo_trust1 sends the message : Exit' -! !### exit ### -! endif -! -! !### TODO ### -! ! Compute x -> m_x -! ! Compute m_x -> R -! ! Apply R and keep the previous MOs... -! ! Update/touch -! ! Compute the new criterion/energy -> criterion -! -! call #your_routine_1D_to_2D_antisymmetric_array(x,m_x) -! call #your_routine_2D_antisymmetric_array_to_rotation_matrix(m_x,R) -! call #your_routine_to_apply_the_rotation_matrix(R,prev_mos) -! -! TOUCH #your_variables -! -! call #your_criterion(criterion) -! -! ! Criterion -> step accepted or rejected -! call trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) -! -! ! ### TODO ### -! !if (cancel_step) then -! ! Cancel the previous step (mo_coef = prev_mos if you keep them...) -! !endif -! #if (cancel_step) then -! #mo_coef = prev_mos -! #endif -! -! enddo -! -! !call save_mos() !### depend of the time for 1 iteration -! -! ! To exit the external loop if must_exit = .True. -! if (must_exit) then -! !### exit ### -! endif -! -! ! Step accepted, nb iteration + 1 -! nb_iter = nb_iter + 1 -! -! ! ### TODO ### -! !if (###Conditions###) then -! ! no_converged = .False. -! !endif -! #if (#your_conditions) then -! # not_converged = .False. -! #endif -! -! enddo - - - -! Variables: - -! Input: -! | n | integer | m*(m-1)/2 | -! | m | integer | number of mo in the mo_class | -! | H(n,n) | double precision | Hessian | -! | v_grad(n) | double precision | Gradient | -! | W(n,n) | double precision | Eigenvectors of the hessian | -! | e_val(n) | double precision | Eigenvalues of the hessian | -! | criterion | double precision | Actual criterion | -! | prev_criterion | double precision | Value of the criterion before the first iteration/after the previous iteration | -! | rho | double precision | Given by is_step_cancel_trus_region | -! | | | Agreement between the real function and the Taylor series (2nd order) | -! | nb_iter | integer | Actual number of iterations | - -! Input/output: -! | delta | double precision | Radius of the trust region | - -! Output: -! | criterion_model | double precision | Predicted criterion after the rotation | -! | x(n) | double precision | Step | -! | must_exit | logical | If the program must exit the loop | - - -subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit) - - include 'pi.h' - - BEGIN_DOC - ! Compute the step and the expected criterion/energy after the step - END_DOC - - implicit none - - ! in - integer, intent(in) :: n, nb_iter - double precision, intent(in) :: H(n,n), W(n,n), v_grad(n) - double precision, intent(in) :: rho, prev_criterion - - ! inout - double precision, intent(inout) :: delta, e_val(n) - - ! out - double precision, intent(out) :: criterion_model, x(n) - logical, intent(out) :: must_exit - - ! internal - integer :: info - - must_exit = .False. - - call trust_region_step(n,nb_iter,v_grad,rho,e_val,W,x,delta) - - call trust_region_expected_e(n,v_grad,H,x,prev_criterion,criterion_model) - - ! exit if DABS(prev_criterion - criterion_model) < 1d-12 - if (DABS(prev_criterion - criterion_model) < thresh_model) then - print*,'' - print*,'###############################################################################' - print*,'DABS(prev_criterion - criterion_model) <', thresh_model, 'stop the trust region' - print*,'###############################################################################' - print*,'' - must_exit = .True. - endif - - if (delta < thresh_delta) then - print*,'' - print*,'##############################################' - print*,'Delta <', thresh_delta, 'stop the trust region' - print*,'##############################################' - print*,'' - must_exit = .True. - endif - - ! Add after the call to this subroutine, a statement: - ! "if (must_exit) then - ! exit - ! endif" - ! in order to exit the optimization loop - -end subroutine - - - -! Variables: - -! Input: -! | nb_iter | integer | actual number of iterations | -! | prev_criterion | double precision | criterion before the application of the step x | -! | criterion | double precision | criterion after the application of the step x | -! | criterion_model | double precision | predicted criterion after the application of x | - -! Output: -! | rho | double precision | Agreement between the predicted criterion and the real new criterion | -! | cancel_step | logical | If the step must be cancelled | - - -subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) - - include 'pi.h' - - BEGIN_DOC - ! Compute if the step should be cancelled - END_DOC - - implicit none - - ! in - double precision, intent(in) :: prev_criterion, criterion, criterion_model - - ! inout - integer, intent(inout) :: nb_iter - - ! out - logical, intent(out) :: cancel_step - double precision, intent(out) :: rho - - ! Computes rho - call trust_region_rho(prev_criterion,criterion,criterion_model,rho) - - if (nb_iter == 0) then - nb_iter = 1 ! in order to enable the change of delta if the first iteration is cancelled - endif - - ! If rho < thresh_rho -> give something in output to cancel the step - if (rho >= thresh_rho) then !0.1d0) then - ! The step is accepted - cancel_step = .False. - else - ! The step is rejected - cancel_step = .True. - print*, '***********************' - print*, 'Step cancel : rho <', thresh_rho - print*, '***********************' - endif - -end subroutine diff --git a/src/utils_trust_region/algo_trust.org b/src/utils_trust_region/algo_trust.org deleted file mode 100644 index aa836f98..00000000 --- a/src/utils_trust_region/algo_trust.org +++ /dev/null @@ -1,593 +0,0 @@ -* Algorithm for the trust region - -step_in_trust_region: -Computes the step in the trust region (delta) -(automatically sets at the iteration 0 and which evolves during the -process in function of the evolution of rho). The step is computing by -constraining its norm with a lagrange multiplier. -Since the calculation of the step is based on the Newton method, an -estimation of the gain in energy is given using the Taylors series -truncated at the second order (criterion_model). -If (DABS(criterion-criterion_model) < 1d-12) then - must_exit = .True. -else - must_exit = .False. - -This estimation of the gain in energy is used by -is_step_cancel_trust_region to say if the step is accepted or cancelled. - -If the step must be cancelled, the calculation restart from the same -hessian and gradient and recomputes the step but in a smaller trust -region and so on until the step is accepted. If the step is accepted -the hessian and the gradient are recomputed to produce a new step. - -Example: - -#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f -! !### Initialization ### -! delta = 0d0 -! nb_iter = 0 ! Must start at 0 !!! -! rho = 0.5d0 -! not_converged = .True. -! -! ! ### TODO ### -! ! Compute the criterion before the loop -! call #your_criterion(prev_criterion) -! -! do while (not_converged) -! ! ### TODO ## -! ! Call your gradient -! ! Call you hessian -! call #your_gradient(v_grad) (1D array) -! call #your_hessian(H) (2D array) -! -! ! ### TODO ### -! ! Diagonalization of the hessian -! call diagonalization_hessian(n,H,e_val,w) -! -! cancel_step = .True. ! To enter in the loop just after -! ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho -! do while (cancel_step) -! -! ! Hessian,gradient,Criterion -> x -! call trust_region_step_w_expected_e(tmp_n,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit) -! -! if (must_exit) then -! ! ### Message ### -! ! if step_in_trust_region sets must_exit on true for numerical reasons -! print*,'algo_trust1 sends the message : Exit' -! !### exit ### -! endif -! -! !### TODO ### -! ! Compute x -> m_x -! ! Compute m_x -> R -! ! Apply R and keep the previous MOs... -! ! Update/touch -! ! Compute the new criterion/energy -> criterion -! -! call #your_routine_1D_to_2D_antisymmetric_array(x,m_x) -! call #your_routine_2D_antisymmetric_array_to_rotation_matrix(m_x,R) -! call #your_routine_to_apply_the_rotation_matrix(R,prev_mos) -! -! TOUCH #your_variables -! -! call #your_criterion(criterion) -! -! ! Criterion -> step accepted or rejected -! call trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) -! -! ! ### TODO ### -! !if (cancel_step) then -! ! Cancel the previous step (mo_coef = prev_mos if you keep them...) -! !endif -! #if (cancel_step) then -! #mo_coef = prev_mos -! #endif -! -! enddo -! -! !call save_mos() !### depend of the time for 1 iteration -! -! ! To exit the external loop if must_exit = .True. -! if (must_exit) then -! !### exit ### -! endif -! -! ! Step accepted, nb iteration + 1 -! nb_iter = nb_iter + 1 -! -! ! ### TODO ### -! !if (###Conditions###) then -! ! no_converged = .False. -! !endif -! #if (#your_conditions) then -! # not_converged = .False. -! #endif -! -! enddo -#+END_SRC - -Variables: - -Input: -| n | integer | m*(m-1)/2 | -| m | integer | number of mo in the mo_class | -| H(n,n) | double precision | Hessian | -| v_grad(n) | double precision | Gradient | -| W(n,n) | double precision | Eigenvectors of the hessian | -| e_val(n) | double precision | Eigenvalues of the hessian | -| criterion | double precision | Actual criterion | -| prev_criterion | double precision | Value of the criterion before the first iteration/after the previous iteration | -| rho | double precision | Given by is_step_cancel_trus_region | -| | | Agreement between the real function and the Taylor series (2nd order) | -| nb_iter | integer | Actual number of iterations | - -Input/output: -| delta | double precision | Radius of the trust region | - -Output: -| criterion_model | double precision | Predicted criterion after the rotation | -| x(n) | double precision | Step | -| must_exit | logical | If the program must exit the loop | - -#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f -subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit) - - include 'pi.h' - - BEGIN_DOC - ! Compute the step and the expected criterion/energy after the step - END_DOC - - implicit none - - ! in - integer, intent(in) :: n, nb_iter - double precision, intent(in) :: H(n,n), W(n,n), v_grad(n) - double precision, intent(in) :: rho, prev_criterion - - ! inout - double precision, intent(inout) :: delta, e_val(n) - - ! out - double precision, intent(out) :: criterion_model, x(n) - logical, intent(out) :: must_exit - - ! internal - integer :: info - - must_exit = .False. - - call trust_region_step(n,nb_iter,v_grad,rho,e_val,W,x,delta) - - call trust_region_expected_e(n,v_grad,H,x,prev_criterion,criterion_model) - - ! exit if DABS(prev_criterion - criterion_model) < 1d-12 - if (DABS(prev_criterion - criterion_model) < thresh_model) then - print*,'' - print*,'###############################################################################' - print*,'DABS(prev_criterion - criterion_model) <', thresh_model, 'stop the trust region' - print*,'###############################################################################' - print*,'' - must_exit = .True. - endif - - if (delta < thresh_delta) then - print*,'' - print*,'##############################################' - print*,'Delta <', thresh_delta, 'stop the trust region' - print*,'##############################################' - print*,'' - must_exit = .True. - endif - - ! Add after the call to this subroutine, a statement: - ! "if (must_exit) then - ! exit - ! endif" - ! in order to exit the optimization loop - -end subroutine -#+END_SRC - -Variables: - -Input: -| nb_iter | integer | actual number of iterations | -| prev_criterion | double precision | criterion before the application of the step x | -| criterion | double precision | criterion after the application of the step x | -| criterion_model | double precision | predicted criterion after the application of x | - -Output: -| rho | double precision | Agreement between the predicted criterion and the real new criterion | -| cancel_step | logical | If the step must be cancelled | - -#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f -subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) - - include 'pi.h' - - BEGIN_DOC - ! Compute if the step should be cancelled - END_DOC - - implicit none - - ! in - double precision, intent(in) :: prev_criterion, criterion, criterion_model - - ! inout - integer, intent(inout) :: nb_iter - - ! out - logical, intent(out) :: cancel_step - double precision, intent(out) :: rho - - ! Computes rho - call trust_region_rho(prev_criterion,criterion,criterion_model,rho) - - if (nb_iter == 0) then - nb_iter = 1 ! in order to enable the change of delta if the first iteration is cancelled - endif - - ! If rho < thresh_rho -> give something in output to cancel the step - if (rho >= thresh_rho) then !0.1d0) then - ! The step is accepted - cancel_step = .False. - else - ! The step is rejected - cancel_step = .True. - print*, '***********************' - print*, 'Step cancel : rho <', thresh_rho - print*, '***********************' - endif - -end subroutine -#+END_SRC - -** Template for MOs -#+BEGIN_SRC f90 :comments org :tangle trust_region_template_mos.txt -subroutine algo_trust_template(tmp_n, tmp_list_size, tmp_list) - - implicit none - - ! Variables - - ! In - integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) - - ! Out - ! Rien ou un truc pour savoir si ça c'est bien passé - - ! Internal - double precision, allocatable :: e_val(:), W(:,:), tmp_R(:,:), R(:,:), tmp_x(:), tmp_m_x(:,:) - double precision, allocatable :: prev_mos(:,:) - double precision :: criterion, prev_criterion, criterion_model - double precision :: delta, rho - logical :: not_converged, cancel_step, must_exit, enforce_step_cancellation - integer :: nb_iter, info, nb_sub_iter - integer :: i,j,tmp_i,tmp_j - - allocate(W(tmp_n, tmp_n),e_val(tmp_n),tmp_x(tmp_n),tmp_m_x(tmp_list_size, tmp_list_size)) - allocate(tmp_R(tmp_list_size, tmp_list_size), R(mo_num, mo_num)) - allocate(prev_mos(ao_num, mo_num)) - - ! Provide the criterion, but unnecessary because it's done - ! automatically - PROVIDE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER - - ! Initialization - delta = 0d0 - nb_iter = 0 ! Must start at 0 !!! - rho = 0.5d0 ! Must start at 0.5 - not_converged = .True. ! Must be true - - ! Compute the criterion before the loop - prev_criterion = C_PROVIDER - - do while (not_converged) - - print*,'' - print*,'******************' - print*,'Iteration', nb_iter - print*,'******************' - print*,'' - - ! The new hessian and gradient are computed at the end of the previous iteration - ! Diagonalization of the hessian - call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W) - - cancel_step = .True. ! To enter in the loop just after - nb_sub_iter = 0 - - ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho - do while (cancel_step) - - print*,'-----------------------------' - print*,'Iteration:', nb_iter - print*,'Sub iteration:', nb_sub_iter - print*,'-----------------------------' - - ! Hessian,gradient,Criterion -> x - call trust_region_step_w_expected_e(tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, & - prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit) - - if (must_exit) then - ! if step_in_trust_region sets must_exit on true for numerical reasons - print*,'trust_region_step_w_expected_e sent the message : Exit' - exit - endif - - ! 1D tmp -> 2D tmp - call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) - - ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) - call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, info, enforce_step_cancellation) - - if (enforce_step_cancellation) then - print*, 'Forces the step cancellation, too large error in the rotation matrix' - rho = 0d0 - cycle - endif - - ! tmp_R to R, subspace to full space - call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) - - ! Rotation of the MOs - call apply_mo_rotation(R, prev_mos) - - ! touch mo_coef - call clear_mo_map ! Only if you are using the bi-electronic integrals - ! mo_coef becomes valid - ! And avoid the recomputation of the providers which depend of mo_coef - TOUCH mo_coef C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER - - ! To update the other parameters if needed - call #update_parameters() - - ! To enforce the program to provide new criterion after the update - ! of the parameters - FREE C_PROVIDER - PROVIDE C_PROVIDER - criterion = C_PROVIDER - - ! Criterion -> step accepted or rejected - call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step) - - ! Cancellation of the step ? - if (cancel_step) then - ! Replacement by the previous MOs - mo_coef = prev_mos - ! call save_mos() ! depends of the time for 1 iteration - - ! No need to clear_mo_map since we don't recompute the gradient and the hessian - ! mo_coef becomes valid - ! Avoid the recomputation of the providers which depend of mo_coef - TOUCH mo_coef H_PROVIDER g_PROVIDER C_PROVIDER cc_PROVIDER - else - ! The step is accepted: - ! criterion -> prev criterion - - ! The replacement "criterion -> prev criterion" is already done - ! in trust_region_rho, so if the criterion does not have a reason - ! to change, it will change nothing for the criterion and will - ! force the program to provide the new hessian, gradient and - ! convergence criterion for the next iteration. - ! But in the case of orbital optimization we diagonalize the CI - ! matrix after the "FREE" statement, so the criterion will change - - FREE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER - PROVIDE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER - prev_criterion = C_PROVIDER - - endif - - nb_sub_iter = nb_sub_iter + 1 - enddo - - ! call save_mos() ! depends of the time for 1 iteration - - ! To exit the external loop if must_exit = .True. - if (must_exit) then - exit - endif - - ! Step accepted, nb iteration + 1 - nb_iter = nb_iter + 1 - - ! Provide the convergence criterion - ! Provide the gradient and the hessian for the next iteration - PROVIDE cc_PROVIDER - - ! To exit - if (dabs(cc_PROVIDER) < thresh_opt_max_elem_grad) then - not_converged = .False. - endif - - if (nb_iter > optimization_max_nb_iter) then - not_converged = .False. - endif - - if (delta < thresh_delta) then - not_converged = .False. - endif - - enddo - - ! Save the final MOs - call save_mos() - - ! Diagonalization of the hessian - ! (To see the eigenvalues at the end of the optimization) - call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W) - - deallocate(e_val, W, tmp_R, R, tmp_x, prev_mos) - -end -#+END_SRC - -** Cartesian version -#+BEGIN_SRC f90 :comments org :tangle trust_region_template_xyz.txt -subroutine algo_trust_cartesian_template(tmp_n) - - implicit none - - ! Variables - - ! In - integer, intent(in) :: tmp_n - - ! Out - ! Rien ou un truc pour savoir si ça c'est bien passé - - ! Internal - double precision, allocatable :: e_val(:), W(:,:), tmp_x(:) - double precision :: criterion, prev_criterion, criterion_model - double precision :: delta, rho - logical :: not_converged, cancel_step, must_exit - integer :: nb_iter, nb_sub_iter - integer :: i,j - - allocate(W(tmp_n, tmp_n),e_val(tmp_n),tmp_x(tmp_n)) - - PROVIDE C_PROVIDER X_PROVIDER H_PROVIDER g_PROVIDER - - ! Initialization - delta = 0d0 - nb_iter = 0 ! Must start at 0 !!! - rho = 0.5d0 ! Must start at 0.5 - not_converged = .True. ! Must be true - - ! Compute the criterion before the loop - prev_criterion = C_PROVIDER - - do while (not_converged) - - print*,'' - print*,'******************' - print*,'Iteration', nb_iter - print*,'******************' - print*,'' - - if (nb_iter > 0) then - PROVIDE H_PROVIDER g_PROVIDER - endif - - ! Diagonalization of the hessian - call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W) - - cancel_step = .True. ! To enter in the loop just after - nb_sub_iter = 0 - - ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho - do while (cancel_step) - - print*,'-----------------------------' - print*,'Iteration:', nb_iter - print*,'Sub iteration:', nb_sub_iter - print*,'-----------------------------' - - ! Hessian,gradient,Criterion -> x - call trust_region_step_w_expected_e(tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, & - prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit) - - if (must_exit) then - ! if step_in_trust_region sets must_exit on true for numerical reasons - print*,'trust_region_step_w_expected_e sent the message : Exit' - exit - endif - - ! New coordinates, check the sign - X_PROVIDER = X_PROVIDER - tmp_x - - ! touch X_PROVIDER - TOUCH X_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER - - ! To update the other parameters if needed - call #update_parameters() - - ! New criterion - PROVIDE C_PROVIDER ! Unnecessary - criterion = C_PROVIDER - - ! Criterion -> step accepted or rejected - call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step) - - ! Cancel the previous step - if (cancel_step) then - ! Replacement by the previous coordinates, check the sign - X_PROVIDER = X_PROVIDER + tmp_x - - ! Avoid the recomputation of the hessian and the gradient - TOUCH X_PROVIDER H_PROVIDER g_PROVIDER C_PROVIDER cc_PROVIDER - endif - - nb_sub_iter = nb_sub_iter + 1 - enddo - - ! To exit the external loop if must_exit = .True. - if (must_exit) then - exit - endif - - ! Step accepted, nb iteration + 1 - nb_iter = nb_iter + 1 - - PROVIDE cc_PROVIDER - - ! To exit - if (dabs(cc_PROVIDER) < thresh_opt_max_elem_grad) then - not_converged = .False. - endif - - if (nb_iter > optimization_max_nb_iter) then - not_converged = .False. - endif - - if (delta < thresh_delta) then - not_converged = .False. - endif - - enddo - - deallocate(e_val, W, tmp_x) - -end -#+END_SRC - -** Script template -#+BEGIN_SRC bash :tangle script_template_mos.sh -#!/bin/bash - -your_file= - -your_C_PROVIDER= -your_H_PROVIDER= -your_g_PROVIDER= -your_cc_PROVIDER= - -sed "s/C_PROVIDER/$your_C_PROVIDER/g" trust_region_template_mos.txt > $your_file -sed -i "s/H_PROVIDER/$your_H_PROVIDER/g" $your_file -sed -i "s/g_PROVIDER/$your_g_PROVIDER/g" $your_file -sed -i "s/cc_PROVIDER/$your_cc_PROVIDER/g" $your_file -#+END_SRC - -#+BEGIN_SRC bash :tangle script_template_xyz.sh -#!/bin/bash - -your_file= - -your_C_PROVIDER= -your_X_PROVIDER= -your_H_PROVIDER= -your_g_PROVIDER= -your_cc_PROVIDER= - -sed "s/C_PROVIDER/$your_C_PROVIDER/g" trust_region_template_xyz.txt > $your_file -sed -i "s/X_PROVIDER/$your_X_PROVIDER/g" $your_file -sed -i "s/H_PROVIDER/$your_H_PROVIDER/g" $your_file -sed -i "s/g_PROVIDER/$your_g_PROVIDER/g" $your_file -sed -i "s/cc_PROVIDER/$your_cc_PROVIDER/g" $your_file -#+END_SRC - diff --git a/src/utils_trust_region/apply_mo_rotation.irp.f b/src/utils_trust_region/apply_mo_rotation.irp.f deleted file mode 100644 index e274ec11..00000000 --- a/src/utils_trust_region/apply_mo_rotation.irp.f +++ /dev/null @@ -1,85 +0,0 @@ -! Apply MO rotation -! Subroutine to apply the rotation matrix to the coefficients of the -! MOs. - -! New MOs = Old MOs . Rotation matrix - -! *Compute the new MOs with the previous MOs and a rotation matrix* - -! Provided: -! | mo_num | integer | number of MOs | -! | ao_num | integer | number of AOs | -! | mo_coef(ao_num,mo_num) | double precision | coefficients of the MOs | - -! Intent in: -! | R(mo_num,mo_num) | double precision | rotation matrix | - -! Intent out: -! | prev_mos(ao_num,mo_num) | double precision | MOs before the rotation | - -! Internal: -! | new_mos(ao_num,mo_num) | double precision | MOs after the rotation | -! | i,j | integer | indexes | - -subroutine apply_mo_rotation(R,prev_mos) - - include 'pi.h' - - BEGIN_DOC - ! Compute the new MOs knowing the rotation matrix - END_DOC - - implicit none - - ! Variables - - ! in - double precision, intent(in) :: R(mo_num,mo_num) - - ! out - double precision, intent(out) :: prev_mos(ao_num,mo_num) - - ! internal - double precision, allocatable :: new_mos(:,:) - integer :: i,j - double precision :: t1,t2,t3 - - print*,'' - print*,'---apply_mo_rotation---' - - call wall_time(t1) - - ! Allocation - allocate(new_mos(ao_num,mo_num)) - - ! Calculation - - ! Product of old MOs (mo_coef) by Rotation matrix (R) - call dgemm('N','N',ao_num,mo_num,mo_num,1d0,mo_coef,size(mo_coef,1),R,size(R,1),0d0,new_mos,size(new_mos,1)) - - prev_mos = mo_coef - mo_coef = new_mos - - !if (debug) then - ! print*,'New mo_coef : ' - ! do i = 1, mo_num - ! write(*,'(100(F10.5))') mo_coef(i,:) - ! enddo - !endif - - ! Save the new MOs and change the label - mo_label = 'MCSCF' - !call save_mos - call ezfio_set_determinants_mo_label(mo_label) - - !print*,'Done, MOs saved' - - ! Deallocation, end - deallocate(new_mos) - - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in apply mo rotation:', t3 - print*,'---End apply_mo_rotation---' - -end subroutine diff --git a/src/utils_trust_region/apply_mo_rotation.org b/src/utils_trust_region/apply_mo_rotation.org deleted file mode 100644 index 918581b7..00000000 --- a/src/utils_trust_region/apply_mo_rotation.org +++ /dev/null @@ -1,86 +0,0 @@ -* Apply MO rotation -Subroutine to apply the rotation matrix to the coefficients of the -MOs. - -New MOs = Old MOs . Rotation matrix - -*Compute the new MOs with the previous MOs and a rotation matrix* - -Provided: -| mo_num | integer | number of MOs | -| ao_num | integer | number of AOs | -| mo_coef(ao_num,mo_num) | double precision | coefficients of the MOs | - -Intent in: -| R(mo_num,mo_num) | double precision | rotation matrix | - -Intent out: -| prev_mos(ao_num,mo_num) | double precision | MOs before the rotation | - -Internal: -| new_mos(ao_num,mo_num) | double precision | MOs after the rotation | -| i,j | integer | indexes | -#+BEGIN_SRC f90 :comments org :tangle apply_mo_rotation.irp.f -subroutine apply_mo_rotation(R,prev_mos) - - include 'pi.h' - - BEGIN_DOC - ! Compute the new MOs knowing the rotation matrix - END_DOC - - implicit none - - ! Variables - - ! in - double precision, intent(in) :: R(mo_num,mo_num) - - ! out - double precision, intent(out) :: prev_mos(ao_num,mo_num) - - ! internal - double precision, allocatable :: new_mos(:,:) - integer :: i,j - double precision :: t1,t2,t3 - - print*,'' - print*,'---apply_mo_rotation---' - - call wall_time(t1) - - ! Allocation - allocate(new_mos(ao_num,mo_num)) - - ! Calculation - - ! Product of old MOs (mo_coef) by Rotation matrix (R) - call dgemm('N','N',ao_num,mo_num,mo_num,1d0,mo_coef,size(mo_coef,1),R,size(R,1),0d0,new_mos,size(new_mos,1)) - - prev_mos = mo_coef - mo_coef = new_mos - - !if (debug) then - ! print*,'New mo_coef : ' - ! do i = 1, mo_num - ! write(*,'(100(F10.5))') mo_coef(i,:) - ! enddo - !endif - - ! Save the new MOs and change the label - mo_label = 'MCSCF' - !call save_mos - call ezfio_set_determinants_mo_label(mo_label) - - !print*,'Done, MOs saved' - - ! Deallocation, end - deallocate(new_mos) - - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in apply mo rotation:', t3 - print*,'---End apply_mo_rotation---' - -end subroutine -#+END_SRC diff --git a/src/utils_trust_region/mat_to_vec_index.irp.f b/src/utils_trust_region/mat_to_vec_index.irp.f deleted file mode 100644 index 35e12232..00000000 --- a/src/utils_trust_region/mat_to_vec_index.irp.f +++ /dev/null @@ -1,61 +0,0 @@ -! Matrix to vector index - -! *Compute the index i of a vector element from the indexes p,q of a -! matrix element* - -! Lower diagonal matrix (p,q), p > q -> vector (i) - -! If a matrix is antisymmetric it can be reshaped as a vector. And the -! vector can be reshaped as an antisymmetric matrix - -! \begin{align*} -! \begin{pmatrix} -! 0 & -1 & -2 & -4 \\ -! 1 & 0 & -3 & -5 \\ -! 2 & 3 & 0 & -6 \\ -! 4 & 5 & 6 & 0 -! \end{pmatrix} -! \Leftrightarrow -! \begin{pmatrix} -! 1 & 2 & 3 & 4 & 5 & 6 -! \end{pmatrix} -! \end{align*} - -! !!! Here the algorithm only work for the lower diagonal !!! - -! Input: -! | p,q | integer | indexes of a matrix element in the lower diagonal | -! | | | p > q, q -> column | -! | | | p -> row, | -! | | | q -> column | - -! Input: -! | i | integer | corresponding index in the vector | - - -subroutine mat_to_vec_index(p,q,i) - - include 'pi.h' - - implicit none - - ! Variables - - ! in - integer, intent(in) :: p,q - - ! out - integer, intent(out) :: i - - ! internal - integer :: a,b - double precision :: da - - ! Calculation - - a = p-1 - b = a*(a-1)/2 - - i = q+b - -end subroutine diff --git a/src/utils_trust_region/mat_to_vec_index.org b/src/utils_trust_region/mat_to_vec_index.org deleted file mode 100644 index 50840584..00000000 --- a/src/utils_trust_region/mat_to_vec_index.org +++ /dev/null @@ -1,63 +0,0 @@ -* Matrix to vector index - -*Compute the index i of a vector element from the indexes p,q of a -matrix element* - -Lower diagonal matrix (p,q), p > q -> vector (i) - -If a matrix is antisymmetric it can be reshaped as a vector. And the -vector can be reshaped as an antisymmetric matrix - -\begin{align*} -\begin{pmatrix} -0 & -1 & -2 & -4 \\ -1 & 0 & -3 & -5 \\ -2 & 3 & 0 & -6 \\ -4 & 5 & 6 & 0 -\end{pmatrix} -\Leftrightarrow -\begin{pmatrix} -1 & 2 & 3 & 4 & 5 & 6 -\end{pmatrix} -\end{align*} - -!!! Here the algorithm only work for the lower diagonal !!! - -Input: -| p,q | integer | indexes of a matrix element in the lower diagonal | -| | | p > q, q -> column | -| | | p -> row, | -| | | q -> column | - -Input: -| i | integer | corresponding index in the vector | - -#+BEGIN_SRC f90 :comments org :tangle mat_to_vec_index.irp.f -subroutine mat_to_vec_index(p,q,i) - - include 'pi.h' - - implicit none - - ! Variables - - ! in - integer, intent(in) :: p,q - - ! out - integer, intent(out) :: i - - ! internal - integer :: a,b - double precision :: da - - ! Calculation - - a = p-1 - b = a*(a-1)/2 - - i = q+b - -end subroutine -#+END_SRC - diff --git a/src/utils_trust_region/pi.h b/src/utils_trust_region/pi.h deleted file mode 100644 index bbfabfec..00000000 --- a/src/utils_trust_region/pi.h +++ /dev/null @@ -1,2 +0,0 @@ - !logical, parameter :: debug=.False. - double precision, parameter :: pi = 3.1415926535897932d0 diff --git a/src/utils_trust_region/rotation_matrix.irp.f b/src/utils_trust_region/rotation_matrix.irp.f deleted file mode 100644 index 4738fd67..00000000 --- a/src/utils_trust_region/rotation_matrix.irp.f +++ /dev/null @@ -1,443 +0,0 @@ -! Rotation matrix - -! *Build a rotation matrix from an antisymmetric matrix* - -! Compute a rotation matrix $\textbf{R}$ from an antisymmetric matrix $$\textbf{A}$$ such as : -! $$ -! \textbf{R}=\exp(\textbf{A}) -! $$ - -! So : -! \begin{align*} -! \textbf{R}=& \exp(\textbf{A}) \\ -! =& \sum_k^{\infty} \frac{1}{k!}\textbf{A}^k \\ -! =& \textbf{W} \cdot \cos(\tau) \cdot \textbf{W}^{\dagger} + \textbf{W} \cdot \tau^{-1} \cdot \sin(\tau) \cdot \textbf{W}^{\dagger} \cdot \textbf{A} -! \end{align*} - -! With : -! $\textbf{W}$ : eigenvectors of $\textbf{A}^2$ -! $\tau$ : $\sqrt{-x}$ -! $x$ : eigenvalues of $\textbf{A}^2$ - -! Input: -! | A(n,n) | double precision | antisymmetric matrix | -! | n | integer | number of columns of the A matrix | -! | LDA | integer | specifies the leading dimension of A, must be at least max(1,n) | -! | LDR | integer | specifies the leading dimension of R, must be at least max(1,n) | - -! Output: -! | R(n,n) | double precision | Rotation matrix | -! | info | integer | if info = 0, the execution is successful | -! | | | if info = k, the k-th parameter has an illegal value | -! | | | if info = -k, the algorithm failed | - -! Internal: -! | B(n,n) | double precision | B = A.A | -! | work(lwork,n) | double precision | work matrix for dysev, dimension max(1,lwork) | -! | lwork | integer | dimension of the syev work array >= max(1, 3n-1) | -! | W(n,n) | double precision | eigenvectors of B | -! | e_val(n) | double precision | eigenvalues of B | -! | m_diag(n,n) | double precision | diagonal matrix with the eigenvalues of B | -! | cos_tau(n,n) | double precision | diagonal matrix with cos(tau) values | -! | sin_tau(n,n) | double precision | diagonal matrix with sin cos(tau) values | -! | tau_m1(n,n) | double precision | diagonal matrix with (tau)^-1 values | -! | part_1(n,n) | double precision | matrix W.cos_tau.W^t | -! | part_1a(n,n) | double precision | matrix cos_tau.W^t | -! | part_2(n,n) | double precision | matrix W.tau_m1.sin_tau.W^t.A | -! | part_2a(n,n) | double precision | matrix W^t.A | -! | part_2b(n,n) | double precision | matrix sin_tau.W^t.A | -! | part_2c(n,n) | double precision | matrix tau_m1.sin_tau.W^t.A | -! | RR_t(n,n) | double precision | R.R^t must be equal to the identity<=> R.R^t-1=0 <=> norm = 0 | -! | norm | integer | norm of R.R^t-1, must be equal to 0 | -! | i,j | integer | indexes | - -! Functions: -! | dnrm2 | double precision | Lapack function, compute the norm of a matrix | -! | disnan | logical | Lapack function, check if an element is NaN | - - - -subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) - - implicit none - - BEGIN_DOC - ! Rotation matrix to rotate the molecular orbitals. - ! If the rotation is too large the transformation is not unitary and must be cancelled. - END_DOC - - include 'pi.h' - - ! Variables - - ! in - integer, intent(in) :: n,LDA,LDR - double precision, intent(inout) :: A(LDA,n) - - ! out - double precision, intent(out) :: R(LDR,n) - integer, intent(out) :: info - logical, intent(out) :: enforce_step_cancellation - - ! internal - double precision, allocatable :: B(:,:) - double precision, allocatable :: work(:,:) - double precision, allocatable :: W(:,:), e_val(:) - double precision, allocatable :: m_diag(:,:),cos_tau(:,:),sin_tau(:,:),tau_m1(:,:) - double precision, allocatable :: part_1(:,:),part_1a(:,:) - double precision, allocatable :: part_2(:,:),part_2a(:,:),part_2b(:,:),part_2c(:,:) - double precision, allocatable :: RR_t(:,:) - integer :: i,j - integer :: info2, lwork ! for dsyev - double precision :: norm, max_elem, max_elem_A, t1,t2,t3 - - ! function - double precision :: dnrm2 - logical :: disnan - - print*,'' - print*,'---rotation_matrix---' - - call wall_time(t1) - - ! Allocation - allocate(B(n,n)) - allocate(m_diag(n,n),cos_tau(n,n),sin_tau(n,n),tau_m1(n,n)) - allocate(W(n,n),e_val(n)) - allocate(part_1(n,n),part_1a(n,n)) - allocate(part_2(n,n),part_2a(n,n),part_2b(n,n),part_2c(n,n)) - allocate(RR_t(n,n)) - -! Pre-conditions - -! Initialization -info=0 -enforce_step_cancellation = .False. - -! Size of matrix A must be at least 1 by 1 -if (n<1) then - info = 3 - print*, 'WARNING: invalid parameter 5' - print*, 'n<1' - return -endif - -! Leading dimension of A must be >= n -if (LDA < n) then - info = 25 - print*, 'WARNING: invalid parameter 2 or 5' - print*, 'LDA < n' - return -endif - -! Leading dimension of A must be >= n -if (LDR < n) then - info = 4 - print*, 'WARNING: invalid parameter 4' - print*, 'LDR < n' - return -endif - -! Matrix elements of A must by non-NaN -do j = 1, n - do i = 1, n - if (disnan(A(i,j))) then - info=1 - print*, 'WARNING: invalid parameter 1' - print*, 'NaN element in A matrix' - return - endif - enddo -enddo - -do i = 1, n - if (A(i,i) /= 0d0) then - print*, 'WARNING: matrix A is not antisymmetric' - print*, 'Non 0 element on the diagonal', i, A(i,i) - call ABORT - endif -enddo - -do j = 1, n - do i = 1, n - if (A(i,j)+A(j,i)>1d-16) then - print*, 'WANRING: matrix A is not antisymmetric' - print*, 'A(i,j) /= - A(j,i):', i,j,A(i,j), A(j,i) - print*, 'diff:', A(i,j)+A(j,i) - call ABORT - endif - enddo -enddo - -! Fix for too big elements ! bad idea better to cancel if the error is too big -!do j = 1, n -! do i = 1, n -! A(i,j) = mod(A(i,j),2d0*pi) -! if (dabs(A(i,j)) > pi) then -! A(i,j) = 0d0 -! endif -! enddo -!enddo - -max_elem_A = 0d0 -do j = 1, n - do i = 1, n - if (ABS(A(i,j)) > ABS(max_elem_A)) then - max_elem_A = A(i,j) - endif - enddo -enddo -print*,'max element in A', max_elem_A - -if (ABS(max_elem_A) > 2 * pi) then - print*,'' - print*,'WARNING: ABS(max_elem_A) > 2 pi ' - print*,'' -endif - -! B=A.A -! - Calculation of the matrix $\textbf{B} = \textbf{A}^2$ -! - Diagonalization of $\textbf{B}$ -! W, the eigenvectors -! e_val, the eigenvalues - - -! Compute B=A.A - -call dgemm('N','N',n,n,n,1d0,A,size(A,1),A,size(A,1),0d0,B,size(B,1)) - -! Copy B in W, diagonalization will put the eigenvectors in W -W=B - -! Diagonalization of B -! Eigenvalues -> e_val -! Eigenvectors -> W -lwork = 3*n-1 -allocate(work(lwork,n)) - -print*,'Starting diagonalization ...' - -call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2) - -deallocate(work) - -if (info2 == 0) then - print*, 'Diagonalization : Done' -elseif (info2 < 0) then - print*, 'WARNING: error in the diagonalization' - print*, 'Illegal value of the ', info2,'-th parameter' -else - print*, "WARNING: Diagonalization failed to converge" -endif - -! Tau^-1, cos(tau), sin(tau) -! $$\tau = \sqrt{-x}$$ -! - Calculation of $\cos(\tau)$ $\Leftrightarrow$ $\cos(\sqrt{-x})$ -! - Calculation of $\sin(\tau)$ $\Leftrightarrow$ $\sin(\sqrt{-x})$ -! - Calculation of $\tau^{-1}$ $\Leftrightarrow$ $(\sqrt{-x})^{-1}$ -! These matrices are diagonals - -! Diagonal matrix m_diag -do j = 1, n - if (e_val(j) >= -1d-12) then !0.d0) then !!! e_avl(i) must be < -1d-12 to avoid numerical problems - e_val(j) = 0.d0 - else - e_val(j) = - e_val(j) - endif -enddo - -m_diag = 0.d0 -do i = 1, n - m_diag(i,i) = e_val(i) -enddo - -! cos_tau -do j = 1, n - do i = 1, n - if (i==j) then - cos_tau(i,j) = dcos(dsqrt(e_val(i))) - else - cos_tau(i,j) = 0d0 - endif - enddo -enddo - -! sin_tau -do j = 1, n - do i = 1, n - if (i==j) then - sin_tau(i,j) = dsin(dsqrt(e_val(i))) - else - sin_tau(i,j) = 0d0 - endif - enddo -enddo - -! Debug, display the cos_tau and sin_tau matrix -!if (debug) then -! print*, 'cos_tau' -! do i = 1, n -! print*, cos_tau(i,:) -! enddo -! print*, 'sin_tau' -! do i = 1, n -! print*, sin_tau(i,:) -! enddo -!endif - -! tau^-1 -do j = 1, n - do i = 1, n - if ((i==j) .and. (e_val(i) > 1d-16)) then!0d0)) then !!! Convergence problem can come from here if the threshold is too big/small - tau_m1(i,j) = 1d0/(dsqrt(e_val(i))) - else - tau_m1(i,j) = 0d0 - endif - enddo -enddo - -max_elem = 0d0 -do i = 1, n - if (ABS(tau_m1(i,i)) > ABS(max_elem)) then - max_elem = tau_m1(i,i) - endif -enddo -print*,'max elem tau^-1:', max_elem - -! Debug -!print*,'eigenvalues:' -!do i = 1, n -! print*, e_val(i) -!enddo - -!Debug, display tau^-1 -!if (debug) then -! print*, 'tau^-1' -! do i = 1, n -! print*,tau_m1(i,:) -! enddo -!endif - -! Rotation matrix -! \begin{align*} -! \textbf{R} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} -! \end{align*} -! \begin{align*} -! \textbf{Part1} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} -! \end{align*} -! \begin{align*} -! \textbf{Part2} = \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} -! \end{align*} - -! First: -! part_1 = dgemm(W, dgemm(cos_tau, W^t)) -! part_1a = dgemm(cos_tau, W^t) -! part_1 = dgemm(W, part_1a) -! And: -! part_2= dgemm(W, dgemm(tau_m1, dgemm(sin_tau, dgemm(W^t, A)))) -! part_2a = dgemm(W^t, A) -! part_2b = dgemm(sin_tau, part_2a) -! part_2c = dgemm(tau_m1, part_2b) -! part_2 = dgemm(W, part_2c) -! Finally: -! Rotation matrix, R = part_1+part_2 - -! If $R$ is a rotation matrix: -! $R.R^T=R^T.R=\textbf{1}$ - -! part_1 -call dgemm('N','T',n,n,n,1d0,cos_tau,size(cos_tau,1),W,size(W,1),0d0,part_1a,size(part_1a,1)) -call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_1a,size(part_1a,1),0d0,part_1,size(part_1,1)) - -! part_2 -call dgemm('T','N',n,n,n,1d0,W,size(W,1),A,size(A,1),0d0,part_2a,size(part_2a,1)) -call dgemm('N','N',n,n,n,1d0,sin_tau,size(sin_tau,1),part_2a,size(part_2a,1),0d0,part_2b,size(part_2b,1)) -call dgemm('N','N',n,n,n,1d0,tau_m1,size(tau_m1,1),part_2b,size(part_2b,1),0d0,part_2c,size(part_2c,1)) -call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_2c,size(part_2c,1),0d0,part_2,size(part_2,1)) - -! Rotation matrix R -R = part_1 + part_2 - -! Matrix check -! R.R^t and R^t.R must be equal to identity matrix -do j = 1, n - do i=1,n - if (i==j) then - RR_t(i,j) = 1d0 - else - RR_t(i,j) = 0d0 - endif - enddo -enddo - -call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1)) - -norm = dnrm2(n*n,RR_t,1) -print*, 'Rotation matrix check, norm R.R^T = ', norm - -! Debug -!if (debug) then -! print*, 'RR_t' -! do i = 1, n -! print*, RR_t(i,:) -! enddo -!endif - -! Post conditions - -! Check if R.R^T=1 -max_elem = 0d0 -do j = 1, n - do i = 1, n - if (ABS(RR_t(i,j)) > ABS(max_elem)) then - max_elem = RR_t(i,j) - endif - enddo -enddo - -print*, 'Max error in R.R^T:', max_elem -print*, 'e_val(1):', e_val(1) -print*, 'e_val(n):', e_val(n) -print*, 'max elem in A:', max_elem_A - -if (ABS(max_elem) > 1d-12) then - print*, 'WARNING: max error in R.R^T > 1d-12' - print*, 'Enforce the step cancellation' - enforce_step_cancellation = .True. -endif - -! Matrix elements of R must by non-NaN -do j = 1,n - do i = 1,LDR - if (disnan(R(i,j))) then - info = 666 - print*, 'NaN in rotation matrix' - call ABORT - endif - enddo -enddo - -! Display -!if (debug) then -! print*,'Rotation matrix :' -! do i = 1, n -! write(*,'(100(F10.5))') R(i,:) -! enddo -!endif - -! Deallocation, end - -deallocate(B) - deallocate(m_diag,cos_tau,sin_tau,tau_m1) - deallocate(W,e_val) - deallocate(part_1,part_1a) - deallocate(part_2,part_2a,part_2b,part_2c) - deallocate(RR_t) - - call wall_time(t2) - t3 = t2-t1 - print*,'Time in rotation matrix:', t3 - - print*,'---End rotation_matrix---' - -end subroutine diff --git a/src/utils_trust_region/rotation_matrix.org b/src/utils_trust_region/rotation_matrix.org deleted file mode 100644 index 73ba0298..00000000 --- a/src/utils_trust_region/rotation_matrix.org +++ /dev/null @@ -1,454 +0,0 @@ -* Rotation matrix - -*Build a rotation matrix from an antisymmetric matrix* - -Compute a rotation matrix $\textbf{R}$ from an antisymmetric matrix $$\textbf{A}$$ such as : -$$ -\textbf{R}=\exp(\textbf{A}) -$$ - -So : -\begin{align*} -\textbf{R}=& \exp(\textbf{A}) \\ -=& \sum_k^{\infty} \frac{1}{k!}\textbf{A}^k \\ -=& \textbf{W} \cdot \cos(\tau) \cdot \textbf{W}^{\dagger} + \textbf{W} \cdot \tau^{-1} \cdot \sin(\tau) \cdot \textbf{W}^{\dagger} \cdot \textbf{A} -\end{align*} - -With : -$\textbf{W}$ : eigenvectors of $\textbf{A}^2$ -$\tau$ : $\sqrt{-x}$ -$x$ : eigenvalues of $\textbf{A}^2$ - -Input: -| A(n,n) | double precision | antisymmetric matrix | -| n | integer | number of columns of the A matrix | -| LDA | integer | specifies the leading dimension of A, must be at least max(1,n) | -| LDR | integer | specifies the leading dimension of R, must be at least max(1,n) | - -Output: -| R(n,n) | double precision | Rotation matrix | -| info | integer | if info = 0, the execution is successful | -| | | if info = k, the k-th parameter has an illegal value | -| | | if info = -k, the algorithm failed | - -Internal: -| B(n,n) | double precision | B = A.A | -| work(lwork,n) | double precision | work matrix for dysev, dimension max(1,lwork) | -| lwork | integer | dimension of the syev work array >= max(1, 3n-1) | -| W(n,n) | double precision | eigenvectors of B | -| e_val(n) | double precision | eigenvalues of B | -| m_diag(n,n) | double precision | diagonal matrix with the eigenvalues of B | -| cos_tau(n,n) | double precision | diagonal matrix with cos(tau) values | -| sin_tau(n,n) | double precision | diagonal matrix with sin cos(tau) values | -| tau_m1(n,n) | double precision | diagonal matrix with (tau)^-1 values | -| part_1(n,n) | double precision | matrix W.cos_tau.W^t | -| part_1a(n,n) | double precision | matrix cos_tau.W^t | -| part_2(n,n) | double precision | matrix W.tau_m1.sin_tau.W^t.A | -| part_2a(n,n) | double precision | matrix W^t.A | -| part_2b(n,n) | double precision | matrix sin_tau.W^t.A | -| part_2c(n,n) | double precision | matrix tau_m1.sin_tau.W^t.A | -| RR_t(n,n) | double precision | R.R^t must be equal to the identity<=> R.R^t-1=0 <=> norm = 0 | -| norm | integer | norm of R.R^t-1, must be equal to 0 | -| i,j | integer | indexes | - -Functions: -| dnrm2 | double precision | Lapack function, compute the norm of a matrix | -| disnan | logical | Lapack function, check if an element is NaN | - - -#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f -subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) - - implicit none - - BEGIN_DOC - ! Rotation matrix to rotate the molecular orbitals. - ! If the rotation is too large the transformation is not unitary and must be cancelled. - END_DOC - - include 'pi.h' - - ! Variables - - ! in - integer, intent(in) :: n,LDA,LDR - double precision, intent(inout) :: A(LDA,n) - - ! out - double precision, intent(out) :: R(LDR,n) - integer, intent(out) :: info - logical, intent(out) :: enforce_step_cancellation - - ! internal - double precision, allocatable :: B(:,:) - double precision, allocatable :: work(:,:) - double precision, allocatable :: W(:,:), e_val(:) - double precision, allocatable :: m_diag(:,:),cos_tau(:,:),sin_tau(:,:),tau_m1(:,:) - double precision, allocatable :: part_1(:,:),part_1a(:,:) - double precision, allocatable :: part_2(:,:),part_2a(:,:),part_2b(:,:),part_2c(:,:) - double precision, allocatable :: RR_t(:,:) - integer :: i,j - integer :: info2, lwork ! for dsyev - double precision :: norm, max_elem, max_elem_A, t1,t2,t3 - - ! function - double precision :: dnrm2 - logical :: disnan - - print*,'' - print*,'---rotation_matrix---' - - call wall_time(t1) - - ! Allocation - allocate(B(n,n)) - allocate(m_diag(n,n),cos_tau(n,n),sin_tau(n,n),tau_m1(n,n)) - allocate(W(n,n),e_val(n)) - allocate(part_1(n,n),part_1a(n,n)) - allocate(part_2(n,n),part_2a(n,n),part_2b(n,n),part_2c(n,n)) - allocate(RR_t(n,n)) -#+END_SRC - -** Pre-conditions -#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f - ! Initialization - info=0 - enforce_step_cancellation = .False. - - ! Size of matrix A must be at least 1 by 1 - if (n<1) then - info = 3 - print*, 'WARNING: invalid parameter 5' - print*, 'n<1' - return - endif - - ! Leading dimension of A must be >= n - if (LDA < n) then - info = 25 - print*, 'WARNING: invalid parameter 2 or 5' - print*, 'LDA < n' - return - endif - - ! Leading dimension of A must be >= n - if (LDR < n) then - info = 4 - print*, 'WARNING: invalid parameter 4' - print*, 'LDR < n' - return - endif - - ! Matrix elements of A must by non-NaN - do j = 1, n - do i = 1, n - if (disnan(A(i,j))) then - info=1 - print*, 'WARNING: invalid parameter 1' - print*, 'NaN element in A matrix' - return - endif - enddo - enddo - - do i = 1, n - if (A(i,i) /= 0d0) then - print*, 'WARNING: matrix A is not antisymmetric' - print*, 'Non 0 element on the diagonal', i, A(i,i) - call ABORT - endif - enddo - - do j = 1, n - do i = 1, n - if (A(i,j)+A(j,i)>1d-16) then - print*, 'WANRING: matrix A is not antisymmetric' - print*, 'A(i,j) /= - A(j,i):', i,j,A(i,j), A(j,i) - print*, 'diff:', A(i,j)+A(j,i) - call ABORT - endif - enddo - enddo - - ! Fix for too big elements ! bad idea better to cancel if the error is too big - !do j = 1, n - ! do i = 1, n - ! A(i,j) = mod(A(i,j),2d0*pi) - ! if (dabs(A(i,j)) > pi) then - ! A(i,j) = 0d0 - ! endif - ! enddo - !enddo - - max_elem_A = 0d0 - do j = 1, n - do i = 1, n - if (ABS(A(i,j)) > ABS(max_elem_A)) then - max_elem_A = A(i,j) - endif - enddo - enddo - print*,'max element in A', max_elem_A - - if (ABS(max_elem_A) > 2 * pi) then - print*,'' - print*,'WARNING: ABS(max_elem_A) > 2 pi ' - print*,'' - endif - -#+END_SRC - -** Calculations - -*** B=A.A - - Calculation of the matrix $\textbf{B} = \textbf{A}^2$ - - Diagonalization of $\textbf{B}$ - W, the eigenvectors - e_val, the eigenvalues - - #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f - ! Compute B=A.A - - call dgemm('N','N',n,n,n,1d0,A,size(A,1),A,size(A,1),0d0,B,size(B,1)) - - ! Copy B in W, diagonalization will put the eigenvectors in W - W=B - - ! Diagonalization of B - ! Eigenvalues -> e_val - ! Eigenvectors -> W - lwork = 3*n-1 - allocate(work(lwork,n)) - - print*,'Starting diagonalization ...' - - call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2) - - deallocate(work) - - if (info2 == 0) then - print*, 'Diagonalization : Done' - elseif (info2 < 0) then - print*, 'WARNING: error in the diagonalization' - print*, 'Illegal value of the ', info2,'-th parameter' - else - print*, "WARNING: Diagonalization failed to converge" - endif - #+END_SRC - -*** Tau^-1, cos(tau), sin(tau) - $$\tau = \sqrt{-x}$$ - - Calculation of $\cos(\tau)$ $\Leftrightarrow$ $\cos(\sqrt{-x})$ - - Calculation of $\sin(\tau)$ $\Leftrightarrow$ $\sin(\sqrt{-x})$ - - Calculation of $\tau^{-1}$ $\Leftrightarrow$ $(\sqrt{-x})^{-1}$ - These matrices are diagonals - #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f - ! Diagonal matrix m_diag - do j = 1, n - if (e_val(j) >= -1d-12) then !0.d0) then !!! e_avl(i) must be < -1d-12 to avoid numerical problems - e_val(j) = 0.d0 - else - e_val(j) = - e_val(j) - endif - enddo - - m_diag = 0.d0 - do i = 1, n - m_diag(i,i) = e_val(i) - enddo - - ! cos_tau - do j = 1, n - do i = 1, n - if (i==j) then - cos_tau(i,j) = dcos(dsqrt(e_val(i))) - else - cos_tau(i,j) = 0d0 - endif - enddo - enddo - - ! sin_tau - do j = 1, n - do i = 1, n - if (i==j) then - sin_tau(i,j) = dsin(dsqrt(e_val(i))) - else - sin_tau(i,j) = 0d0 - endif - enddo - enddo - - ! Debug, display the cos_tau and sin_tau matrix - !if (debug) then - ! print*, 'cos_tau' - ! do i = 1, n - ! print*, cos_tau(i,:) - ! enddo - ! print*, 'sin_tau' - ! do i = 1, n - ! print*, sin_tau(i,:) - ! enddo - !endif - - ! tau^-1 - do j = 1, n - do i = 1, n - if ((i==j) .and. (e_val(i) > 1d-16)) then!0d0)) then !!! Convergence problem can come from here if the threshold is too big/small - tau_m1(i,j) = 1d0/(dsqrt(e_val(i))) - else - tau_m1(i,j) = 0d0 - endif - enddo - enddo - - max_elem = 0d0 - do i = 1, n - if (ABS(tau_m1(i,i)) > ABS(max_elem)) then - max_elem = tau_m1(i,i) - endif - enddo - print*,'max elem tau^-1:', max_elem - - ! Debug - !print*,'eigenvalues:' - !do i = 1, n - ! print*, e_val(i) - !enddo - - !Debug, display tau^-1 - !if (debug) then - ! print*, 'tau^-1' - ! do i = 1, n - ! print*,tau_m1(i,:) - ! enddo - !endif - #+END_SRC - -*** Rotation matrix - \begin{align*} - \textbf{R} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} - \end{align*} - \begin{align*} - \textbf{Part1} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} - \end{align*} - \begin{align*} - \textbf{Part2} = \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} - \end{align*} - - First: - part_1 = dgemm(W, dgemm(cos_tau, W^t)) - part_1a = dgemm(cos_tau, W^t) - part_1 = dgemm(W, part_1a) - And: - part_2= dgemm(W, dgemm(tau_m1, dgemm(sin_tau, dgemm(W^t, A)))) - part_2a = dgemm(W^t, A) - part_2b = dgemm(sin_tau, part_2a) - part_2c = dgemm(tau_m1, part_2b) - part_2 = dgemm(W, part_2c) - Finally: - Rotation matrix, R = part_1+part_2 - - If $R$ is a rotation matrix: - $R.R^T=R^T.R=\textbf{1}$ - #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f - ! part_1 - call dgemm('N','T',n,n,n,1d0,cos_tau,size(cos_tau,1),W,size(W,1),0d0,part_1a,size(part_1a,1)) - call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_1a,size(part_1a,1),0d0,part_1,size(part_1,1)) - - ! part_2 - call dgemm('T','N',n,n,n,1d0,W,size(W,1),A,size(A,1),0d0,part_2a,size(part_2a,1)) - call dgemm('N','N',n,n,n,1d0,sin_tau,size(sin_tau,1),part_2a,size(part_2a,1),0d0,part_2b,size(part_2b,1)) - call dgemm('N','N',n,n,n,1d0,tau_m1,size(tau_m1,1),part_2b,size(part_2b,1),0d0,part_2c,size(part_2c,1)) - call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_2c,size(part_2c,1),0d0,part_2,size(part_2,1)) - - ! Rotation matrix R - R = part_1 + part_2 - - ! Matrix check - ! R.R^t and R^t.R must be equal to identity matrix - do j = 1, n - do i=1,n - if (i==j) then - RR_t(i,j) = 1d0 - else - RR_t(i,j) = 0d0 - endif - enddo - enddo - - call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1)) - - norm = dnrm2(n*n,RR_t,1) - print*, 'Rotation matrix check, norm R.R^T = ', norm - - ! Debug - !if (debug) then - ! print*, 'RR_t' - ! do i = 1, n - ! print*, RR_t(i,:) - ! enddo - !endif - #+END_SRC - -*** Post conditions - #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f - ! Check if R.R^T=1 - max_elem = 0d0 - do j = 1, n - do i = 1, n - if (ABS(RR_t(i,j)) > ABS(max_elem)) then - max_elem = RR_t(i,j) - endif - enddo - enddo - - print*, 'Max error in R.R^T:', max_elem - print*, 'e_val(1):', e_val(1) - print*, 'e_val(n):', e_val(n) - print*, 'max elem in A:', max_elem_A - - if (ABS(max_elem) > 1d-12) then - print*, 'WARNING: max error in R.R^T > 1d-12' - print*, 'Enforce the step cancellation' - enforce_step_cancellation = .True. - endif - - ! Matrix elements of R must by non-NaN - do j = 1,n - do i = 1,LDR - if (disnan(R(i,j))) then - info = 666 - print*, 'NaN in rotation matrix' - call ABORT - endif - enddo - enddo - - ! Display - !if (debug) then - ! print*,'Rotation matrix :' - ! do i = 1, n - ! write(*,'(100(F10.5))') R(i,:) - ! enddo - !endif - #+END_SRC - -** Deallocation, end - #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f - deallocate(B) - deallocate(m_diag,cos_tau,sin_tau,tau_m1) - deallocate(W,e_val) - deallocate(part_1,part_1a) - deallocate(part_2,part_2a,part_2b,part_2c) - deallocate(RR_t) - - call wall_time(t2) - t3 = t2-t1 - print*,'Time in rotation matrix:', t3 - - print*,'---End rotation_matrix---' - -end subroutine - #+END_SRC - diff --git a/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f b/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f deleted file mode 100644 index bdd1f6ba..00000000 --- a/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f +++ /dev/null @@ -1,64 +0,0 @@ -! Rotation matrix in a subspace to rotation matrix in the full space - -! Usually, we are using a list of MOs, for exemple the active ones. When -! we compute a rotation matrix to rotate the MOs, we just compute a -! rotation matrix for these MOs in order to reduce the size of the -! matrix which has to be computed. Since the computation of a rotation -! matrix scale in $O(N^3)$ with $N$ the number of MOs, it's better to -! reuce the number of MOs involved. -! After that we replace the rotation matrix in the full space by -! building the elements of the rotation matrix in the full space from -! the elements of the rotation matrix in the subspace and adding some 0 -! on the extradiagonal elements and some 1 on the diagonal elements, -! for the MOs that are not involved in the rotation. - -! Provided: -! | mo_num | integer | Number of MOs | - -! Input: -! | m | integer | Size of tmp_list, m <= mo_num | -! | tmp_list(m) | integer | List of MOs | -! | tmp_R(m,m) | double precision | Rotation matrix in the space of | -! | | | the MOs containing by tmp_list | - -! Output: -! | R(mo_num,mo_num | double precision | Rotation matrix in the space | -! | | | of all the MOs | - -! Internal: -! | i,j | integer | indexes in the full space | -! | tmp_i,tmp_j | integer | indexes in the subspace | - - -subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R) - - BEGIN_DOC - ! Compute the full rotation matrix from a smaller one - END_DOC - - implicit none - - ! in - integer, intent(in) :: m, tmp_list(m) - double precision, intent(in) :: tmp_R(m,m) - - ! out - double precision, intent(out) :: R(mo_num,mo_num) - - ! internal - integer :: i,j,tmp_i,tmp_j - - ! tmp_R to R, subspace to full space - R = 0d0 - do i = 1, mo_num - R(i,i) = 1d0 ! 1 on the diagonal because it is a rotation matrix, 1 = nothing change for the corresponding orbital - enddo - do tmp_j = 1, m - j = tmp_list(tmp_j) - do tmp_i = 1, m - i = tmp_list(tmp_i) - R(i,j) = tmp_R(tmp_i,tmp_j) - enddo - enddo - -end diff --git a/src/utils_trust_region/sub_to_full_rotation_matrix.org b/src/utils_trust_region/sub_to_full_rotation_matrix.org deleted file mode 100644 index 16434dc8..00000000 --- a/src/utils_trust_region/sub_to_full_rotation_matrix.org +++ /dev/null @@ -1,65 +0,0 @@ -* Rotation matrix in a subspace to rotation matrix in the full space - -Usually, we are using a list of MOs, for exemple the active ones. When -we compute a rotation matrix to rotate the MOs, we just compute a -rotation matrix for these MOs in order to reduce the size of the -matrix which has to be computed. Since the computation of a rotation -matrix scale in $O(N^3)$ with $N$ the number of MOs, it's better to -reuce the number of MOs involved. -After that we replace the rotation matrix in the full space by -building the elements of the rotation matrix in the full space from -the elements of the rotation matrix in the subspace and adding some 0 -on the extradiagonal elements and some 1 on the diagonal elements, -for the MOs that are not involved in the rotation. - -Provided: -| mo_num | integer | Number of MOs | - -Input: -| m | integer | Size of tmp_list, m <= mo_num | -| tmp_list(m) | integer | List of MOs | -| tmp_R(m,m) | double precision | Rotation matrix in the space of | -| | | the MOs containing by tmp_list | - -Output: -| R(mo_num,mo_num | double precision | Rotation matrix in the space | -| | | of all the MOs | - -Internal: -| i,j | integer | indexes in the full space | -| tmp_i,tmp_j | integer | indexes in the subspace | - -#+BEGIN_SRC f90 :comments org :tangle sub_to_full_rotation_matrix.irp.f -subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R) - - BEGIN_DOC - ! Compute the full rotation matrix from a smaller one - END_DOC - - implicit none - - ! in - integer, intent(in) :: m, tmp_list(m) - double precision, intent(in) :: tmp_R(m,m) - - ! out - double precision, intent(out) :: R(mo_num,mo_num) - - ! internal - integer :: i,j,tmp_i,tmp_j - - ! tmp_R to R, subspace to full space - R = 0d0 - do i = 1, mo_num - R(i,i) = 1d0 ! 1 on the diagonal because it is a rotation matrix, 1 = nothing change for the corresponding orbital - enddo - do tmp_j = 1, m - j = tmp_list(tmp_j) - do tmp_i = 1, m - i = tmp_list(tmp_i) - R(i,j) = tmp_R(tmp_i,tmp_j) - enddo - enddo - -end -#+END_SRC diff --git a/src/utils_trust_region/trust_region_expected_e.irp.f b/src/utils_trust_region/trust_region_expected_e.irp.f deleted file mode 100644 index b7d849d1..00000000 --- a/src/utils_trust_region/trust_region_expected_e.irp.f +++ /dev/null @@ -1,119 +0,0 @@ -! Predicted energy : e_model - -! *Compute the energy predicted by the Taylor series* - -! The energy is predicted using a Taylor expansion truncated at te 2nd -! order : - -! \begin{align*} -! E_{k+1} = E_{k} + \textbf{g}_k^{T} \cdot \textbf{x}_{k+1} + \frac{1}{2} \cdot \textbf{x}_{k+1}^T \cdot \textbf{H}_{k} \cdot \textbf{x}_{k+1} + \mathcal{O}(\textbf{x}_{k+1}^2) -! \end{align*} - -! Input: -! | n | integer | m*(m-1)/2 | -! | v_grad(n) | double precision | gradient | -! | H(n,n) | double precision | hessian | -! | x(n) | double precision | Step in the trust region | -! | prev_energy | double precision | previous energy | - -! Output: -! | e_model | double precision | predicted energy after the rotation of the MOs | - -! Internal: -! | part_1 | double precision | v_grad^T.x | -! | part_2 | double precision | 1/2 . x^T.H.x | -! | part_2a | double precision | H.x | -! | i,j | integer | indexes | - -! Function: -! | ddot | double precision | dot product (Lapack) | - - -subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model) - - include 'pi.h' - - BEGIN_DOC - ! Compute the expected criterion/energy after the application of the step x - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: v_grad(n),H(n,n),x(n) - double precision, intent(in) :: prev_energy - - ! out - double precision, intent(out) :: e_model - - ! internal - double precision :: part_1, part_2, t1,t2,t3 - double precision, allocatable :: part_2a(:) - - integer :: i,j - - !Function - double precision :: ddot - - print*,'' - print*,'---Trust_e_model---' - - call wall_time(t1) - - ! Allocation - allocate(part_2a(n)) - -! Calculations - -! part_1 corresponds to the product g.x -! part_2a corresponds to the product H.x -! part_2 corresponds to the product 0.5*(x^T.H.x) - -! TODO: remove the dot products - - -! Product v_grad.x - part_1 = ddot(n,v_grad,1,x,1) - - !if (debug) then - print*,'g.x : ', part_1 - !endif - - ! Product H.x - call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1) - - ! Product 1/2 . x^T.H.x - part_2 = 0.5d0 * ddot(n,x,1,part_2a,1) - - !if (debug) then - print*,'1/2*x^T.H.x : ', part_2 - !endif - - print*,'prev_energy', prev_energy - - ! Sum - e_model = prev_energy + part_1 + part_2 - - ! Writing the predicted energy - print*, 'Predicted energy after the rotation : ', e_model - print*, 'Previous energy - predicted energy:', prev_energy - e_model - - ! Can be deleted, already in another subroutine - if (DABS(prev_energy - e_model) < 1d-12 ) then - print*,'WARNING: ABS(prev_energy - e_model) < 1d-12' - endif - - ! Deallocation - deallocate(part_2a) - - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in trust e model:', t3 - - print*,'---End trust_e_model---' - print*,'' - -end subroutine diff --git a/src/utils_trust_region/trust_region_expected_e.org b/src/utils_trust_region/trust_region_expected_e.org deleted file mode 100644 index 58c8f804..00000000 --- a/src/utils_trust_region/trust_region_expected_e.org +++ /dev/null @@ -1,121 +0,0 @@ -* Predicted energy : e_model - -*Compute the energy predicted by the Taylor series* - -The energy is predicted using a Taylor expansion truncated at te 2nd -order : - -\begin{align*} -E_{k+1} = E_{k} + \textbf{g}_k^{T} \cdot \textbf{x}_{k+1} + \frac{1}{2} \cdot \textbf{x}_{k+1}^T \cdot \textbf{H}_{k} \cdot \textbf{x}_{k+1} + \mathcal{O}(\textbf{x}_{k+1}^2) -\end{align*} - -Input: -| n | integer | m*(m-1)/2 | -| v_grad(n) | double precision | gradient | -| H(n,n) | double precision | hessian | -| x(n) | double precision | Step in the trust region | -| prev_energy | double precision | previous energy | - -Output: -| e_model | double precision | predicted energy after the rotation of the MOs | - -Internal: -| part_1 | double precision | v_grad^T.x | -| part_2 | double precision | 1/2 . x^T.H.x | -| part_2a | double precision | H.x | -| i,j | integer | indexes | - -Function: -| ddot | double precision | dot product (Lapack) | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_expected_e.irp.f -subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model) - - include 'pi.h' - - BEGIN_DOC - ! Compute the expected criterion/energy after the application of the step x - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: v_grad(n),H(n,n),x(n) - double precision, intent(in) :: prev_energy - - ! out - double precision, intent(out) :: e_model - - ! internal - double precision :: part_1, part_2, t1,t2,t3 - double precision, allocatable :: part_2a(:) - - integer :: i,j - - !Function - double precision :: ddot - - print*,'' - print*,'---Trust_e_model---' - - call wall_time(t1) - - ! Allocation - allocate(part_2a(n)) -#+END_SRC - -** Calculations - -part_1 corresponds to the product g.x -part_2a corresponds to the product H.x -part_2 corresponds to the product 0.5*(x^T.H.x) - -TODO: remove the dot products - -#+BEGIN_SRC f90 :comments org :tangle trust_region_expected_e.irp.f - ! Product v_grad.x - part_1 = ddot(n,v_grad,1,x,1) - - !if (debug) then - print*,'g.x : ', part_1 - !endif - - ! Product H.x - call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1) - - ! Product 1/2 . x^T.H.x - part_2 = 0.5d0 * ddot(n,x,1,part_2a,1) - - !if (debug) then - print*,'1/2*x^T.H.x : ', part_2 - !endif - - print*,'prev_energy', prev_energy - - ! Sum - e_model = prev_energy + part_1 + part_2 - - ! Writing the predicted energy - print*, 'Predicted energy after the rotation : ', e_model - print*, 'Previous energy - predicted energy:', prev_energy - e_model - - ! Can be deleted, already in another subroutine - if (DABS(prev_energy - e_model) < 1d-12 ) then - print*,'WARNING: ABS(prev_energy - e_model) < 1d-12' - endif - - ! Deallocation - deallocate(part_2a) - - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in trust e model:', t3 - - print*,'---End trust_e_model---' - print*,'' - -end subroutine -#+END_SRC diff --git a/src/utils_trust_region/trust_region_optimal_lambda.irp.f b/src/utils_trust_region/trust_region_optimal_lambda.irp.f deleted file mode 100644 index f71bb405..00000000 --- a/src/utils_trust_region/trust_region_optimal_lambda.irp.f +++ /dev/null @@ -1,1655 +0,0 @@ -! Newton's method to find the optimal lambda - -! *Compute the lambda value for the trust region* - -! This subroutine uses the Newton method in order to find the optimal -! lambda. This constant is added on the diagonal of the hessian to shift -! the eiganvalues. It has a double role: -! - ensure that the resulting hessian is positive definite for the -! Newton method -! - constrain the step in the trust region, i.e., -! $||\textbf{x}(\lambda)|| \leq \Delta$, where $\Delta$ is the radius -! of the trust region. -! We search $\lambda$ which minimizes -! \begin{align*} -! f(\lambda) = (||\textbf{x}_{(k+1)}(\lambda)||^2 -\Delta^2)^2 -! \end{align*} -! or -! \begin{align*} -! \tilde{f}(\lambda) = (\frac{1}{||\textbf{x}_{(k+1)}(\lambda)||^2}-\frac{1}{\Delta^2})^2 -! \end{align*} -! and gives obviously 0 in both cases. \newline - -! There are several cases: -! - If $\textbf{H}$ is positive definite the interval containing the -! solution is $\lambda \in (0, \infty)$ (and $-h_1 < 0$). -! - If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot -! \textbf{g} \neq 0$ then the interval containing -! the solution is $\lambda \in (-h_1, \infty)$. -! - If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot -! \textbf{g} = 0$ then the interval containing the solution is -! $\lambda \in (-h_1, \infty)$. The terms where $|h_i - \lambda| < -! 10^{-12}$ are not computed, so the term where $i = 1$ is -! automatically removed and this case becomes similar to the previous one. - -! So to avoid numerical problems (cf. trust_region) we start the -! algorithm at $\lambda=\max(0 + \epsilon,-h_1 + \epsilon)$, -! with $\epsilon$ a little constant. -! The research must be restricted to the interval containing the -! solution. For that reason a little trust region in 1D is used. - -! The Newton method to find the optimal $\lambda$ is : -! \begin{align*} -! \lambda_{(l+1)} &= \lambda_{(l)} - f^{''}(\lambda)_{(l)}^{-1} f^{'}(\lambda)_{(l)}^{} \\ -! \end{align*} -! $f^{'}(\lambda)_{(l)}$: the first derivative of $f$ with respect to -! $\lambda$ at the l-th iteration, -! $f^{''}(\lambda)_{(l)}$: the second derivative of $f$ with respect to -! $\lambda$ at the l-th iteration.\newline - -! Noting the Newton step $y = - f^{''}(\lambda)_{(l)}^{-1} -! f^{'}(\lambda)_{(l)}^{}$ we constrain $y$ such as -! \begin{align*} -! y \leq \alpha -! \end{align*} -! with $\alpha$ a scalar representing the trust length (trust region in -! 1D) where the function $f$ or $\tilde{f}$ is correctly describe by the -! Taylor series truncated at the second order. Thus, if $y > \alpha$, -! the constraint is applied as -! \begin{align*} -! y^* = \alpha \frac{y}{|y|} -! \end{align*} -! with $y^*$ the solution in the trust region. - -! The size of the trust region evolves in function of $\rho$ as for the -! trust region seen previously cf. trust_region, rho_model. -! The prediction of the value of $f$ or $\tilde{f}$ is done using the -! Taylor series truncated at the second order cf. "trust_region", -! "trust_e_model". - -! The first and second derivatives of $f(\lambda) = (||\textbf{x}(\lambda)||^2 - -! \Delta^2)^2$ with respect to $\lambda$ are: -! \begin{align*} -! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -! = 2 \left(\sum_{i=1}^n \frac{-2(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) -! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) -! \end{align*} -! \begin{align*} -! \frac{\partial^2}{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] -! \end{align*} - -! The first and second derivatives of $\tilde{f}(\lambda) = (1/||\textbf{x}(\lambda)||^2 - -! 1/\Delta^2)^2$ with respect to $\lambda$ are: -! \begin{align*} -! \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 -! &= 4 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} -! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} -! - \frac{4}{\Delta^2} \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} -! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ -! &= 4 \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} -! \left( \frac{1}{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} -! - \frac{1}{\Delta^2 (\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) -! \end{align*} - -! \begin{align*} -! \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 -! &= 4 \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2} -! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} -! - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} -! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ -! &- \frac{4}{\Delta^2} \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2} -! {(h_i + \lambda)^3)})^2}{(\sum_ {i=1}^n\frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} -! - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} -! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] -! \end{align*} - -! Provided in qp_edit: -! | thresh_rho_2 | -! | thresh_cc | -! | nb_it_max_lambda | -! | version_lambda_search | -! | nb_it_max_pre_search | -! see qp_edit for more details - -! Input: -! | n | integer | m*(m-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | tmp_wtg(n) | double precision | w_i^T.v_grad(i) | -! | delta | double precision | delta for the trust region | - -! Output: -! | lambda | double precision | Lagrange multiplier to constrain the norm of the size of the Newton step | -! | | | lambda > 0 | - -! Internal: -! | d1_N | double precision | value of d1_norm_trust_region | -! | d2_N | double precision | value of d2_norm_trust_region | -! | f_N | double precision | value of f_norm_trust_region | -! | prev_f_N | double precision | previous value of f_norm_trust_region | -! | f_R | double precision | (norm(x)^2 - delta^2)^2 or (1/norm(x)^2 - 1/delta^2)^2 | -! | prev_f_R | double precision | previous value of f_R | -! | model | double precision | predicted value of f_R from prev_f_R and y | -! | d_1 | double precision | value of the first derivative | -! | d_2 | double precision | value of the second derivative | -! | y | double precision | Newton's step, y = -f''^-1 . f' = lambda - prev_lambda | -! | prev_lambda | double precision | previous value of lambda | -! | t1,t2,t3 | double precision | wall time | -! | i | integer | index | -! | epsilon | double precision | little constant to avoid numerical problem | -! | rho_2 | double precision | (prev_f_R - f_R)/(prev_f_R - model), agreement between model and f_R | -! | version | integer | version of the root finding method | - -! Function: -! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | -! | d2_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | -! | d1_norm_inverse_trust_region | double precision | first derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | -! | d2_norm_inverse_trust_region | double precision | second derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | -! | f_norm_trust_region | double precision | value of norm(x)^2 | - - - -subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) - - include 'pi.h' - - BEGIN_DOC - ! Research the optimal lambda to constrain the step size in the trust region - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(inout) :: e_val(n) - double precision, intent(in) :: delta - double precision, intent(in) :: tmp_wtg(n) - - ! out - double precision, intent(out) :: lambda - - ! Internal - double precision :: d1_N, d2_N, f_N, prev_f_N - double precision :: prev_f_R, f_R - double precision :: model - double precision :: d_1, d_2 - double precision :: t1,t2,t3 - integer :: i - double precision :: epsilon - double precision :: y - double precision :: prev_lambda - double precision :: rho_2 - double precision :: alpha - integer :: version - - ! Functions - double precision :: d1_norm_trust_region,d1_norm_trust_region_omp - double precision :: d2_norm_trust_region, d2_norm_trust_region_omp - double precision :: f_norm_trust_region, f_norm_trust_region_omp - double precision :: d1_norm_inverse_trust_region - double precision :: d2_norm_inverse_trust_region - double precision :: d1_norm_inverse_trust_region_omp - double precision :: d2_norm_inverse_trust_region_omp - - print*,'' - print*,'---Trust_newton---' - print*,'' - - call wall_time(t1) - - ! version_lambda_search - ! 1 -> ||x||^2 - delta^2 = 0, - ! 2 -> 1/||x||^2 - 1/delta^2 = 0 (better) - if (version_lambda_search == 1) then - print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0' - else - print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0' - endif - ! Version 2 is normally better - - - -! Resolution with the Newton method: - - -! Initialization - epsilon = 1d-4 - lambda =MAX(0d0, -e_val(1)) - - ! Pre research of lambda to start near the optimal lambda - ! by adding a constant epsilon and changing the constant to - ! have ||x(lambda + epsilon)|| ~ delta, before setting - ! lambda = lambda + epsilon - print*, 'Pre research of lambda:' - print*,'Initial lambda =', lambda - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta - i = 1 - - ! To increase lambda - if (f_N > delta**2) then - print*,'Increasing lambda...' - do while (f_N > delta**2 .and. i <= nb_it_max_pre_search) - - ! Update the previous norm - prev_f_N = f_N - ! New epsilon - epsilon = epsilon * 2d0 - ! New norm - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - - print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta - - ! Security - if (prev_f_N < f_N) then - print*,'WARNING, error: prev_f_N < f_N, exit' - epsilon = epsilon * 0.5d0 - i = nb_it_max_pre_search + 1 - endif - - i = i + 1 - enddo - - ! To reduce lambda - else - print*,'Reducing lambda...' - do while (f_N < delta**2 .and. i <= nb_it_max_pre_search) - - ! Update the previous norm - prev_f_N = f_N - ! New epsilon - epsilon = epsilon * 0.5d0 - ! New norm - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - - print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta - - ! Security - if (prev_f_N > f_N) then - print*,'WARNING, error: prev_f_N > f_N, exit' - epsilon = epsilon * 2d0 - i = nb_it_max_pre_search + 1 - endif - - i = i + 1 - enddo - endif - - print*,'End of the pre research of lambda' - - ! New value of lambda - lambda = lambda + epsilon - - print*, 'e_val(1):', e_val(1) - print*, 'Staring point, lambda =', lambda - - ! thresh_cc, threshold for the research of the optimal lambda - ! Leaves the loop when ABS(1d0-||x||^2/delta^2) > thresh_cc - ! thresh_rho_2, threshold to cancel the step in the research - ! of the optimal lambda, the step is cancelled if rho_2 < thresh_rho_2 - print*,'Threshold for the CC:', thresh_cc - print*,'Threshold for rho_2:', thresh_rho_2 - - print*, 'w_1^T . g =', tmp_wtg(1) - - ! Debug - !if (debug) then - ! print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' - !endif - - ! Initialization - i = 1 - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) ! Value of the ||x(lambda)||^2 - model = 0d0 ! predicted value of (||x||^2 - delta^2)^2 - prev_f_N = 0d0 ! previous value of ||x||^2 - prev_f_R = 0d0 ! previous value of (||x||^2 - delta^2)^2 - f_R = 0d0 ! value of (||x||^2 - delta^2)^2 - rho_2 = 0d0 ! (prev_f_R - f_R)/(prev_f_R - m) - y = 0d0 ! step size - prev_lambda = 0d0 ! previous lambda - - ! Derivatives - if (version_lambda_search == 1) then - d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 - d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 - else - d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 - d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 - endif - - ! Trust length - alpha = DABS((1d0/d_2)*d_1) - - ! Newton's method - do while (i <= 100 .and. DABS(1d0-f_N/delta**2) > thresh_cc) - print*,'--------------------------------------' - print*,'Research of lambda, iteration:', i - print*,'--------------------------------------' - - ! Update of f_N, f_R and the derivatives - prev_f_N = f_N - if (version_lambda_search == 1) then - prev_f_R = (prev_f_N - delta**2)**2 - d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 - d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 - else - prev_f_R = (1d0/prev_f_N - 1d0/delta**2)**2 - d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 - d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 - endif - write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 - - ! Newton's step - y = -(1d0/DABS(d_2))*d_1 - - ! Constraint on y (the newton step) - if (DABS(y) > alpha) then - y = alpha * (y/DABS(y)) ! preservation of the sign of y - endif - write(*,'(a,E12.5)') ' Step length: ', y - - ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series - model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2 - - ! Updates lambda - prev_lambda = lambda - lambda = prev_lambda + y - print*,'prev lambda:', prev_lambda - print*,'new lambda:', lambda - - ! Checks if lambda is in (-h_1, \infty) - if (lambda > MAX(0d0, -e_val(1))) then - ! New value of ||x(lambda)||^2 - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) - - ! New f_R - if (version_lambda_search == 1) then - f_R = (f_N - delta**2)**2 ! new value of (||x(lambda)||^2 - delta^2)^2 - else - f_R = (1d0/f_N - 1d0/delta**2)**2 ! new value of (1/||x(lambda)||^2 -1/delta^2)^2 - endif - - if (version_lambda_search == 1) then - print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R - print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R - print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model - else - print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R - print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R - print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model - endif - - print*,'previous - actual:', prev_f_R - f_R - print*,'previous - model:', prev_f_R - model - - ! Check the gain - if (DABS(prev_f_R - model) < thresh_model_2) then - print*,'' - print*,'WARNING: ABS(previous - model) <', thresh_model_2, 'rho_2 will tend toward infinity' - print*,'' - endif - - ! Will be deleted - !if (prev_f_R - f_R <= 1d-16 .or. prev_f_R - model <= 1d-16) then - ! print*,'' - ! print*,'WARNING: ABS(previous - model) <= 1d-16, exit' - ! print*,'' - ! exit - !endif - - ! Computes rho_2 - rho_2 = (prev_f_R - f_R)/(prev_f_R - model) - print*,'rho_2:', rho_2 - else - rho_2 = 0d0 ! in order to reduce the size of the trust region, alpha, until lambda is in (-h_1, \infty) - print*,'lambda < -e_val(1) ===> rho_2 = 0' - endif - - ! Evolution of the trust length, alpha - if (rho_2 >= 0.75d0) then - alpha = 2d0 * alpha - elseif (rho_2 >= 0.5d0) then - alpha = alpha - elseif (rho_2 >= 0.25d0) then - alpha = 0.5d0 * alpha - else - alpha = 0.25d0 * alpha - endif - write(*,'(a,E12.5)') ' New trust length alpha: ', alpha - - ! cancellaion of the step if rho < 0.1 - if (rho_2 < thresh_rho_2) then !0.1d0) then - lambda = prev_lambda - f_N = prev_f_N - print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda' - endif - - print*,'' - print*,'lambda, ||x||, delta:' - print*, lambda, dsqrt(f_N), delta - print*,'CC:', DABS(1d0 - f_N/delta**2) - print*,'' - - i = i + 1 - enddo - - ! if trust newton failed - if (i > nb_it_max_lambda) then - print*,'' - print*,'######################################################' - print*,'WARNING: i >', nb_it_max_lambda,'for the trust Newton' - print*,'The research of the optimal lambda has failed' - print*,'######################################################' - print*,'' - endif - - print*,'Number of iterations :', i - print*,'Value of lambda :', lambda - print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2 - print*,'Error on the trust region (||x||^2 - delta^2)^2) :', (f_N - delta**2)**2 - print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2 - - ! Time - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in trust_newton:', t3 - - print*,'' - print*,'---End trust_newton---' - print*,'' - -end subroutine - -! OMP: First derivative of (||x||^2 - Delta^2)^2 - -! *Function to compute the first derivative of (||x||^2 - Delta^2)^2* - -! This function computes the first derivative of (||x||^2 - Delta^2)^2 -! with respect to lambda. - -! \begin{align*} -! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -! = -4 \left(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} \right) -! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i+ \lambda)^2} \right) -! \end{align*} - -! \begin{align*} -! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2} \\ -! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} -! \end{align*} - -! Provided: -! | mo_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | -! | delta | double precision | Delta of the trust region | - -! Internal: -! | accu1 | double precision | first sum of the formula | -! | accu2 | double precision | second sum of the formula | -! | tmp_accu1 | double precision | temporary array for the first sum | -! | tmp_accu2 | double precision | temporary array for the second sum | -! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | -! | i,j | integer | indexes | - -! Function: -! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | - - -function d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) - - use omp_lib - include 'pi.h' - - BEGIN_DOC - ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC - - implicit none - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: wtg,accu1,accu2 - integer :: i,j - double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) - - ! Functions - double precision :: d1_norm_trust_region_omp - - ! Allocation - allocate(tmp_accu1(n), tmp_accu2(n)) - - ! OMP - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1,accu2) & - !$OMP DEFAULT(NONE) - - !$OMP MASTER - accu1 = 0d0 - accu2 = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_accu1(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - tmp_accu2(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu1 = accu1 + tmp_accu1(i) - enddo - !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (ABS(e_val(i)) > thresh_eig) then - tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu2 = accu2 + tmp_accu2(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - call omp_set_max_active_levels(4) - - d1_norm_trust_region_omp = -4d0 * accu2 * (accu1 - delta**2) - - deallocate(tmp_accu1, tmp_accu2) - -end function - -! OMP: Second derivative of (||x||^2 - Delta^2)^2 - -! *Function to compute the second derivative of (||x||^2 - Delta^2)^2* - -! This function computes the second derivative of (||x||^2 - Delta^2)^2 -! with respect to lambda. -! \begin{align*} -! \frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] -! \end{align*} - -! \begin{align*} -! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ -! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} -! \end{align*} - -! Provided: -! | m_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | -! | delta | double precision | Delta of the trust region | - -! Internal: -! | accu1 | double precision | first sum of the formula | -! | accu2 | double precision | second sum of the formula | -! | accu3 | double precision | third sum of the formula | -! | tmp_accu1 | double precision | temporary array for the first sum | -! | tmp_accu2 | double precision | temporary array for the second sum | -! | tmp_accu2 | double precision | temporary array for the third sum | -! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | -! | i,j | integer | indexes | - -! Function: -! | d2_norm_trust_region | double precision | second derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | - - -function d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) - - use omp_lib - include 'pi.h' - - BEGIN_DOC - ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Functions - double precision :: d2_norm_trust_region_omp - double precision :: ddot - - ! Internal - double precision :: accu1,accu2,accu3 - double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) - integer :: i, j - - ! Allocation - allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) - - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & - !$OMP accu1, accu2, accu3) & - !$OMP DEFAULT(NONE) - - ! Initialization - - !$OMP MASTER - accu1 = 0d0 - accu2 = 0d0 - accu3 = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_accu1(i) = 0d0 - enddo - !$OMP END DO - !$OMP DO - do i = 1, n - tmp_accu2(i) = 0d0 - enddo - !$OMP END DO - !$OMP DO - do i = 1, n - tmp_accu3(i) = 0d0 - enddo - !$OMP END DO - - ! Calculations - - ! accu1 - !$OMP DO - do i = 1, n - if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu1 = accu1 + tmp_accu1(i) - enddo - !$OMP END MASTER - - ! accu2 - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 - endif - enddo - !$OMP END DO - - ! accu3 - !$OMP MASTER - do i = 1, n - accu2 = accu2 + tmp_accu2(i) - enddo - !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu3 = accu3 + tmp_accu3(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - d2_norm_trust_region_omp = 2d0 * (6d0 * accu3 * (- delta**2 + accu1) + (-2d0 * accu2)**2) - - deallocate(tmp_accu1, tmp_accu2, tmp_accu3) - -end function - -! OMP: Function value of ||x||^2 - -! *Compute the value of ||x||^2* - -! This function computes the value of ||x(lambda)||^2 - -! \begin{align*} -! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} -! \end{align*} - -! Provided: -! | m_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | - -! Internal: -! | tmp_wtg(n) | double precision | temporary array for W^T.v_grad | -! | tmp_fN | double precision | temporary array for the function | -! | i,j | integer | indexes | - - -function f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) - - use omp_lib - - include 'pi.h' - - BEGIN_DOC - ! Compute ||x(lambda)||^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - - ! functions - double precision :: f_norm_trust_region_omp - - ! internal - double precision, allocatable :: tmp_fN(:) - integer :: i,j - - ! Allocation - allocate(tmp_fN(n)) - - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_fN, tmp_wtg, f_norm_trust_region_omp) & - !$OMP DEFAULT(NONE) - - ! Initialization - - !$OMP MASTER - f_norm_trust_region_omp = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_fN(i) = 0d0 - enddo - !$OMP END DO - - ! Calculations - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_fN(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - f_norm_trust_region_omp = f_norm_trust_region_omp + tmp_fN(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - deallocate(tmp_fN) - -end function - -! First derivative of (||x||^2 - Delta^2)^2 -! Version without omp - -! *Function to compute the first derivative of ||x||^2 - Delta* - -! This function computes the first derivative of (||x||^2 - Delta^2)^2 -! with respect to lambda. - -! \begin{align*} -! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -! = 2 \left(-2\sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) -! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) -! \end{align*} - -! \begin{align*} -! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} -! \end{align*} - -! Provided: -! | m_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | -! | delta | double precision | Delta of the trust region | - -! Internal: -! | accu1 | double precision | first sum of the formula | -! | accu2 | double precision | second sum of the formula | -! | wtg | double precision | temporary variable to store W^T.v_grad | -! | i,j | integer | indexes | - -! Function: -! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | -! | ddot | double precision | blas dot product | - - -function d1_norm_trust_region(n,e_val,w,v_grad,lambda,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: w(n,n) - double precision, intent(in) :: v_grad(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: wtg, accu1, accu2 - integer :: i, j - - ! Functions - double precision :: d1_norm_trust_region - double precision :: ddot - - ! Initialization - accu1 = 0d0 - accu2 = 0d0 - - do i = 1, n - wtg = 0d0 - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 - endif - enddo - - do i = 1, n - wtg = 0d0 - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 - endif - enddo - - d1_norm_trust_region = 2d0 * accu2 * (accu1 - delta**2) - -end function - -! Second derivative of (||x||^2 - Delta^2)^2 -! Version without OMP - -! *Function to compute the second derivative of ||x||^2 - Delta* - - -! \begin{equation} -! \frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] -! \end{equation} - -! \begin{align*} -! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ -! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} -! \end{align*} -! Provided: -! | m_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | -! | delta | double precision | Delta of the trust region | - -! Internal: -! | accu1 | double precision | first sum of the formula | -! | accu2 | double precision | second sum of the formula | -! | accu3 | double precision | third sum of the formula | -! | wtg | double precision | temporary variable to store W^T.v_grad | -! | i,j | integer | indexes | - -! Function: -! | d2_norm_trust_region | double precision | second derivative with respect to lambda of norm(x)^2 - Delta^2 | -! | ddot | double precision | blas dot product | - - -function d2_norm_trust_region(n,e_val,w,v_grad,lambda,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: w(n,n) - double precision, intent(in) :: v_grad(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Functions - double precision :: d2_norm_trust_region - double precision :: ddot - - ! Internal - double precision :: wtg,accu1,accu2,accu3 - integer :: i, j - - ! Initialization - accu1 = 0d0 - accu2 = 0d0 - accu3 = 0d0 - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 !4 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 !2 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu3 = accu3 + 6d0 * wtg**2 / (e_val(i) + lambda)**4 !3 - endif - enddo - - d2_norm_trust_region = 2d0 * (accu3 * (- delta**2 + accu1) + accu2**2) - -end function - -! Function value of ||x||^2 -! Version without OMP - -! *Compute the value of ||x||^2* - -! This function computes the value of ||x(lambda)||^2 - -! \begin{align*} -! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} -! \end{align*} - -! Provided: -! | m_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | -! | delta | double precision | Delta of the trust region | - -! Internal: -! | wtg | double precision | temporary variable to store W^T.v_grad | -! | i,j | integer | indexes | - -! Function: -! | f_norm_trust_region | double precision | value of norm(x)^2 | -! | ddot | double precision | blas dot product | - - - -function f_norm_trust_region(n,e_val,tmp_wtg,lambda) - - include 'pi.h' - - BEGIN_DOC - ! Compute ||x(lambda)||^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - - ! function - double precision :: f_norm_trust_region - double precision :: ddot - - ! internal - integer :: i,j - - ! Initialization - f_norm_trust_region = 0d0 - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - f_norm_trust_region = f_norm_trust_region + tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - -end function - -! OMP: First derivative of (1/||x||^2 - 1/Delta^2)^2 -! Version with OMP - -! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* - -! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 - -! \begin{align*} -! \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 -! &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} -! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} -! - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} -! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ -! &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} -! \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} -! - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) -! \end{align*} - -! \begin{align*} -! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} -! \end{align*} - -! Provided: -! | m_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | -! | delta | double precision | Delta of the trust region | - -! Internal: -! | wtg | double precision | temporary variable to store W^T.v_grad | -! | tmp_accu1 | double precision | temporary array for the first sum | -! | tmp_accu2 | double precision | temporary array for the second sum | -! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | -! | i,j | integer | indexes | - -! Function: -! | d1_norm_inverse_trust_region | double precision | value of the first derivative | - - -function d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) - - use omp_lib - include 'pi.h' - - BEGIN_DOC - ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: accu1, accu2 - integer :: i,j - double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) - - ! Functions - double precision :: d1_norm_inverse_trust_region_omp - - ! Allocation - allocate(tmp_accu1(n), tmp_accu2(n)) - - ! OMP - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1, accu2) & - !$OMP DEFAULT(NONE) - - !$OMP MASTER - accu1 = 0d0 - accu2 = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_accu1(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - tmp_accu2(i) = 0d0 - enddo - !$OMP END DO - -! !$OMP MASTER -! do i = 1, n -! if (ABS(e_val(i)+lambda) > 1d-12) then -! tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 -! endif -! enddo -! !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu1 = accu1 + tmp_accu1(i) - enddo - !$OMP END MASTER - -! !$OMP MASTER -! do i = 1, n -! if (ABS(e_val(i)+lambda) > 1d-12) then -! tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 -! endif -! enddo -! !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu2 = accu2 + tmp_accu2(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - call omp_set_max_active_levels(4) - - d1_norm_inverse_trust_region_omp = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) - - deallocate(tmp_accu1, tmp_accu2) - -end - -! OMP: Second derivative of (1/||x||^2 - 1/Delta^2)^2 -! Version with OMP - -! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* - -! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 - -! \begin{align*} -! \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 -! &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} -! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ -! &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} -! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] -! \end{align*} - - -! \begin{align*} -! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ -! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} -! \end{align*} - -! Provided: -! | m_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | -! | delta | double precision | Delta of the trust region | - -! Internal: -! | wtg | double precision | temporary variable to store W^T.v_grad | -! | tmp_accu1 | double precision | temporary array for the first sum | -! | tmp_accu2 | double precision | temporary array for the second sum | -! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | -! | i,j | integer | indexes | - -! Function: -! | d1_norm_inverse_trust_region | double precision | value of the first derivative | - - -function d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) - - use omp_lib - include 'pi.h' - - BEGIN_DOC - ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: accu1, accu2, accu3 - integer :: i,j - double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) - - ! Functions - double precision :: d2_norm_inverse_trust_region_omp - - ! Allocation - allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) - - ! OMP - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & - !$OMP accu1, accu2, accu3) & - !$OMP DEFAULT(NONE) - - !$OMP MASTER - accu1 = 0d0 - accu2 = 0d0 - accu3 = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_accu1(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - tmp_accu2(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - tmp_accu3(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu1 = accu1 + tmp_accu1(i) - enddo - !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu2 = accu2 + tmp_accu2(i) - enddo - !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu3 = accu3 + tmp_accu3(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - call omp_set_max_active_levels(4) - - d2_norm_inverse_trust_region_omp = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & - - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) - - deallocate(tmp_accu1,tmp_accu2,tmp_accu3) - -end - -! First derivative of (1/||x||^2 - 1/Delta^2)^2 -! Version without OMP - -! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* - -! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 - -! \begin{align*} -! \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 -! &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} -! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} -! - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} -! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ -! &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} -! \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} -! - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) -! \end{align*} -! \begin{align*} -! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} -! \end{align*} -! Provided: -! | m_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | -! | delta | double precision | Delta of the trust region | - -! Internal: -! | wtg | double precision | temporary variable to store W^T.v_grad | -! | i,j | integer | indexes | - -! Function: -! | d1_norm_inverse_trust_region | double precision | value of the first derivative | - - -function d1_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: w(n,n) - double precision, intent(in) :: v_grad(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: wtg, accu1, accu2 - integer :: i,j - - ! Functions - double precision :: d1_norm_inverse_trust_region - - accu1 = 0d0 - accu2 = 0d0 - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 - endif - enddo - - d1_norm_inverse_trust_region = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) - -end - -! Second derivative of (1/||x||^2 - 1/Delta^2)^2 -! Version without OMP - -! *Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2* - -! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 - -! \begin{align*} -! \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 -! &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} -! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ -! &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} -! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] -! \end{align*} - -! \begin{align*} -! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ -! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} -! \end{align*} - -! Provided: -! | m_num | integer | number of MOs | - -! Input: -! | n | integer | mo_num*(mo_num-1)/2 | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n,n) | double precision | eigenvectors of the hessian | -! | v_grad(n) | double precision | gradient | -! | lambda | double precision | Lagrange multiplier | -! | delta | double precision | Delta of the trust region | - -! Internal: -! | wtg | double precision | temporary variable to store W^T.v_grad | -! | i,j | integer | indexes | - -! Function: -! | d2_norm_inverse_trust_region | double precision | value of the first derivative | - - -function d2_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: w(n,n) - double precision, intent(in) :: v_grad(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: wtg, accu1, accu2, accu3 - integer :: i,j - - ! Functions - double precision :: d2_norm_inverse_trust_region - - accu1 = 0d0 - accu2 = 0d0 - accu3 = 0d0 - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu3 = accu3 + wtg**2 / (e_val(i) + lambda)**4 - endif - enddo - - d2_norm_inverse_trust_region = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & - - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) - -end diff --git a/src/utils_trust_region/trust_region_optimal_lambda.org b/src/utils_trust_region/trust_region_optimal_lambda.org deleted file mode 100644 index b39c9a10..00000000 --- a/src/utils_trust_region/trust_region_optimal_lambda.org +++ /dev/null @@ -1,1665 +0,0 @@ -* Newton's method to find the optimal lambda - -*Compute the lambda value for the trust region* - -This subroutine uses the Newton method in order to find the optimal -lambda. This constant is added on the diagonal of the hessian to shift -the eiganvalues. It has a double role: -- ensure that the resulting hessian is positive definite for the - Newton method -- constrain the step in the trust region, i.e., - $||\textbf{x}(\lambda)|| \leq \Delta$, where $\Delta$ is the radius - of the trust region. -We search $\lambda$ which minimizes -\begin{align*} - f(\lambda) = (||\textbf{x}_{(k+1)}(\lambda)||^2 -\Delta^2)^2 -\end{align*} -or -\begin{align*} - \tilde{f}(\lambda) = (\frac{1}{||\textbf{x}_{(k+1)}(\lambda)||^2}-\frac{1}{\Delta^2})^2 -\end{align*} -and gives obviously 0 in both cases. \newline - -There are several cases: -- If $\textbf{H}$ is positive definite the interval containing the - solution is $\lambda \in (0, \infty)$ (and $-h_1 < 0$). -- If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot - \textbf{g} \neq 0$ then the interval containing - the solution is $\lambda \in (-h_1, \infty)$. -- If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot - \textbf{g} = 0$ then the interval containing the solution is - $\lambda \in (-h_1, \infty)$. The terms where $|h_i - \lambda| < - 10^{-12}$ are not computed, so the term where $i = 1$ is - automatically removed and this case becomes similar to the previous one. - -So to avoid numerical problems (cf. trust_region) we start the -algorithm at $\lambda=\max(0 + \epsilon,-h_1 + \epsilon)$, -with $\epsilon$ a little constant. -The research must be restricted to the interval containing the -solution. For that reason a little trust region in 1D is used. - -The Newton method to find the optimal $\lambda$ is : -\begin{align*} - \lambda_{(l+1)} &= \lambda_{(l)} - f^{''}(\lambda)_{(l)}^{-1} f^{'}(\lambda)_{(l)}^{} \\ -\end{align*} -$f^{'}(\lambda)_{(l)}$: the first derivative of $f$ with respect to -$\lambda$ at the l-th iteration, -$f^{''}(\lambda)_{(l)}$: the second derivative of $f$ with respect to -$\lambda$ at the l-th iteration.\newline - -Noting the Newton step $y = - f^{''}(\lambda)_{(l)}^{-1} -f^{'}(\lambda)_{(l)}^{}$ we constrain $y$ such as -\begin{align*} - y \leq \alpha -\end{align*} -with $\alpha$ a scalar representing the trust length (trust region in -1D) where the function $f$ or $\tilde{f}$ is correctly describe by the -Taylor series truncated at the second order. Thus, if $y > \alpha$, -the constraint is applied as -\begin{align*} - y^* = \alpha \frac{y}{|y|} -\end{align*} -with $y^*$ the solution in the trust region. - -The size of the trust region evolves in function of $\rho$ as for the -trust region seen previously cf. trust_region, rho_model. -The prediction of the value of $f$ or $\tilde{f}$ is done using the -Taylor series truncated at the second order cf. "trust_region", -"trust_e_model". - -The first and second derivatives of $f(\lambda) = (||\textbf{x}(\lambda)||^2 - -\Delta^2)^2$ with respect to $\lambda$ are: -\begin{align*} - \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 - = 2 \left(\sum_{i=1}^n \frac{-2(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) - \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) -\end{align*} -\begin{align*} -\frac{\partial^2}{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -= 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] -\end{align*} - -The first and second derivatives of $\tilde{f}(\lambda) = (1/||\textbf{x}(\lambda)||^2 - -1/\Delta^2)^2$ with respect to $\lambda$ are: -\begin{align*} - \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 - &= 4 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} - {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} - - \frac{4}{\Delta^2} \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} - {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ - &= 4 \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} - \left( \frac{1}{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} - - \frac{1}{\Delta^2 (\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) -\end{align*} - -\begin{align*} - \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 - &= 4 \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2} - {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} - - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} - {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ - &- \frac{4}{\Delta^2} \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2} - {(h_i + \lambda)^3)})^2}{(\sum_ {i=1}^n\frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} - - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} - {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] -\end{align*} - -Provided in qp_edit: -| thresh_rho_2 | -| thresh_cc | -| nb_it_max_lambda | -| version_lambda_search | -| nb_it_max_pre_search | -see qp_edit for more details - -Input: -| n | integer | m*(m-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| tmp_wtg(n) | double precision | w_i^T.v_grad(i) | -| delta | double precision | delta for the trust region | - -Output: -| lambda | double precision | Lagrange multiplier to constrain the norm of the size of the Newton step | -| | | lambda > 0 | - -Internal: -| d1_N | double precision | value of d1_norm_trust_region | -| d2_N | double precision | value of d2_norm_trust_region | -| f_N | double precision | value of f_norm_trust_region | -| prev_f_N | double precision | previous value of f_norm_trust_region | -| f_R | double precision | (norm(x)^2 - delta^2)^2 or (1/norm(x)^2 - 1/delta^2)^2 | -| prev_f_R | double precision | previous value of f_R | -| model | double precision | predicted value of f_R from prev_f_R and y | -| d_1 | double precision | value of the first derivative | -| d_2 | double precision | value of the second derivative | -| y | double precision | Newton's step, y = -f''^-1 . f' = lambda - prev_lambda | -| prev_lambda | double precision | previous value of lambda | -| t1,t2,t3 | double precision | wall time | -| i | integer | index | -| epsilon | double precision | little constant to avoid numerical problem | -| rho_2 | double precision | (prev_f_R - f_R)/(prev_f_R - model), agreement between model and f_R | -| version | integer | version of the root finding method | - -Function: -| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | -| d2_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | -| d1_norm_inverse_trust_region | double precision | first derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | -| d2_norm_inverse_trust_region | double precision | second derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | -| f_norm_trust_region | double precision | value of norm(x)^2 | - - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) - - include 'pi.h' - - BEGIN_DOC - ! Research the optimal lambda to constrain the step size in the trust region - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(inout) :: e_val(n) - double precision, intent(in) :: delta - double precision, intent(in) :: tmp_wtg(n) - - ! out - double precision, intent(out) :: lambda - - ! Internal - double precision :: d1_N, d2_N, f_N, prev_f_N - double precision :: prev_f_R, f_R - double precision :: model - double precision :: d_1, d_2 - double precision :: t1,t2,t3 - integer :: i - double precision :: epsilon - double precision :: y - double precision :: prev_lambda - double precision :: rho_2 - double precision :: alpha - integer :: version - - ! Functions - double precision :: d1_norm_trust_region,d1_norm_trust_region_omp - double precision :: d2_norm_trust_region, d2_norm_trust_region_omp - double precision :: f_norm_trust_region, f_norm_trust_region_omp - double precision :: d1_norm_inverse_trust_region - double precision :: d2_norm_inverse_trust_region - double precision :: d1_norm_inverse_trust_region_omp - double precision :: d2_norm_inverse_trust_region_omp - - print*,'' - print*,'---Trust_newton---' - print*,'' - - call wall_time(t1) - - ! version_lambda_search - ! 1 -> ||x||^2 - delta^2 = 0, - ! 2 -> 1/||x||^2 - 1/delta^2 = 0 (better) - if (version_lambda_search == 1) then - print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0' - else - print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0' - endif - ! Version 2 is normally better -#+END_SRC - -Resolution with the Newton method: - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f - ! Initialization - epsilon = 1d-4 - lambda =MAX(0d0, -e_val(1)) - - ! Pre research of lambda to start near the optimal lambda - ! by adding a constant epsilon and changing the constant to - ! have ||x(lambda + epsilon)|| ~ delta, before setting - ! lambda = lambda + epsilon - print*, 'Pre research of lambda:' - print*,'Initial lambda =', lambda - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta - i = 1 - - ! To increase lambda - if (f_N > delta**2) then - print*,'Increasing lambda...' - do while (f_N > delta**2 .and. i <= nb_it_max_pre_search) - - ! Update the previous norm - prev_f_N = f_N - ! New epsilon - epsilon = epsilon * 2d0 - ! New norm - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - - print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta - - ! Security - if (prev_f_N < f_N) then - print*,'WARNING, error: prev_f_N < f_N, exit' - epsilon = epsilon * 0.5d0 - i = nb_it_max_pre_search + 1 - endif - - i = i + 1 - enddo - - ! To reduce lambda - else - print*,'Reducing lambda...' - do while (f_N < delta**2 .and. i <= nb_it_max_pre_search) - - ! Update the previous norm - prev_f_N = f_N - ! New epsilon - epsilon = epsilon * 0.5d0 - ! New norm - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - - print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta - - ! Security - if (prev_f_N > f_N) then - print*,'WARNING, error: prev_f_N > f_N, exit' - epsilon = epsilon * 2d0 - i = nb_it_max_pre_search + 1 - endif - - i = i + 1 - enddo - endif - - print*,'End of the pre research of lambda' - - ! New value of lambda - lambda = lambda + epsilon - - print*, 'e_val(1):', e_val(1) - print*, 'Staring point, lambda =', lambda - - ! thresh_cc, threshold for the research of the optimal lambda - ! Leaves the loop when ABS(1d0-||x||^2/delta^2) > thresh_cc - ! thresh_rho_2, threshold to cancel the step in the research - ! of the optimal lambda, the step is cancelled if rho_2 < thresh_rho_2 - print*,'Threshold for the CC:', thresh_cc - print*,'Threshold for rho_2:', thresh_rho_2 - - print*, 'w_1^T . g =', tmp_wtg(1) - - ! Debug - !if (debug) then - ! print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' - !endif - - ! Initialization - i = 1 - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) ! Value of the ||x(lambda)||^2 - model = 0d0 ! predicted value of (||x||^2 - delta^2)^2 - prev_f_N = 0d0 ! previous value of ||x||^2 - prev_f_R = 0d0 ! previous value of (||x||^2 - delta^2)^2 - f_R = 0d0 ! value of (||x||^2 - delta^2)^2 - rho_2 = 0d0 ! (prev_f_R - f_R)/(prev_f_R - m) - y = 0d0 ! step size - prev_lambda = 0d0 ! previous lambda - - ! Derivatives - if (version_lambda_search == 1) then - d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 - d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 - else - d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 - d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 - endif - - ! Trust length - alpha = DABS((1d0/d_2)*d_1) - - ! Newton's method - do while (i <= 100 .and. DABS(1d0-f_N/delta**2) > thresh_cc) - print*,'--------------------------------------' - print*,'Research of lambda, iteration:', i - print*,'--------------------------------------' - - ! Update of f_N, f_R and the derivatives - prev_f_N = f_N - if (version_lambda_search == 1) then - prev_f_R = (prev_f_N - delta**2)**2 - d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 - d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 - else - prev_f_R = (1d0/prev_f_N - 1d0/delta**2)**2 - d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 - d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 - endif - write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 - - ! Newton's step - y = -(1d0/DABS(d_2))*d_1 - - ! Constraint on y (the newton step) - if (DABS(y) > alpha) then - y = alpha * (y/DABS(y)) ! preservation of the sign of y - endif - write(*,'(a,E12.5)') ' Step length: ', y - - ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series - model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2 - - ! Updates lambda - prev_lambda = lambda - lambda = prev_lambda + y - print*,'prev lambda:', prev_lambda - print*,'new lambda:', lambda - - ! Checks if lambda is in (-h_1, \infty) - if (lambda > MAX(0d0, -e_val(1))) then - ! New value of ||x(lambda)||^2 - f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) - - ! New f_R - if (version_lambda_search == 1) then - f_R = (f_N - delta**2)**2 ! new value of (||x(lambda)||^2 - delta^2)^2 - else - f_R = (1d0/f_N - 1d0/delta**2)**2 ! new value of (1/||x(lambda)||^2 -1/delta^2)^2 - endif - - if (version_lambda_search == 1) then - print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R - print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R - print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model - else - print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R - print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R - print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model - endif - - print*,'previous - actual:', prev_f_R - f_R - print*,'previous - model:', prev_f_R - model - - ! Check the gain - if (DABS(prev_f_R - model) < thresh_model_2) then - print*,'' - print*,'WARNING: ABS(previous - model) <', thresh_model_2, 'rho_2 will tend toward infinity' - print*,'' - endif - - ! Will be deleted - !if (prev_f_R - f_R <= 1d-16 .or. prev_f_R - model <= 1d-16) then - ! print*,'' - ! print*,'WARNING: ABS(previous - model) <= 1d-16, exit' - ! print*,'' - ! exit - !endif - - ! Computes rho_2 - rho_2 = (prev_f_R - f_R)/(prev_f_R - model) - print*,'rho_2:', rho_2 - else - rho_2 = 0d0 ! in order to reduce the size of the trust region, alpha, until lambda is in (-h_1, \infty) - print*,'lambda < -e_val(1) ===> rho_2 = 0' - endif - - ! Evolution of the trust length, alpha - if (rho_2 >= 0.75d0) then - alpha = 2d0 * alpha - elseif (rho_2 >= 0.5d0) then - alpha = alpha - elseif (rho_2 >= 0.25d0) then - alpha = 0.5d0 * alpha - else - alpha = 0.25d0 * alpha - endif - write(*,'(a,E12.5)') ' New trust length alpha: ', alpha - - ! cancellaion of the step if rho < 0.1 - if (rho_2 < thresh_rho_2) then !0.1d0) then - lambda = prev_lambda - f_N = prev_f_N - print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda' - endif - - print*,'' - print*,'lambda, ||x||, delta:' - print*, lambda, dsqrt(f_N), delta - print*,'CC:', DABS(1d0 - f_N/delta**2) - print*,'' - - i = i + 1 - enddo - - ! if trust newton failed - if (i > nb_it_max_lambda) then - print*,'' - print*,'######################################################' - print*,'WARNING: i >', nb_it_max_lambda,'for the trust Newton' - print*,'The research of the optimal lambda has failed' - print*,'######################################################' - print*,'' - endif - - print*,'Number of iterations :', i - print*,'Value of lambda :', lambda - print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2 - print*,'Error on the trust region (||x||^2 - delta^2)^2) :', (f_N - delta**2)**2 - print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2 - - ! Time - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in trust_newton:', t3 - - print*,'' - print*,'---End trust_newton---' - print*,'' - -end subroutine -#+END_SRC - -* OMP: First derivative of (||x||^2 - Delta^2)^2 - -*Function to compute the first derivative of (||x||^2 - Delta^2)^2* - -This function computes the first derivative of (||x||^2 - Delta^2)^2 -with respect to lambda. - -\begin{align*} -\frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -= -4 \left(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} \right) -\left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i+ \lambda)^2} \right) -\end{align*} - -\begin{align*} - \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2} \\ - \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} -\end{align*} - -Provided: -| mo_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | -| delta | double precision | Delta of the trust region | - -Internal: -| accu1 | double precision | first sum of the formula | -| accu2 | double precision | second sum of the formula | -| tmp_accu1 | double precision | temporary array for the first sum | -| tmp_accu2 | double precision | temporary array for the second sum | -| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | -| i,j | integer | indexes | - -Function: -| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) - - use omp_lib - include 'pi.h' - - BEGIN_DOC - ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC - - implicit none - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: wtg,accu1,accu2 - integer :: i,j - double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) - - ! Functions - double precision :: d1_norm_trust_region_omp - - ! Allocation - allocate(tmp_accu1(n), tmp_accu2(n)) - - ! OMP - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1,accu2) & - !$OMP DEFAULT(NONE) - - !$OMP MASTER - accu1 = 0d0 - accu2 = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_accu1(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - tmp_accu2(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu1 = accu1 + tmp_accu1(i) - enddo - !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (ABS(e_val(i)) > thresh_eig) then - tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu2 = accu2 + tmp_accu2(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - call omp_set_max_active_levels(4) - - d1_norm_trust_region_omp = -4d0 * accu2 * (accu1 - delta**2) - - deallocate(tmp_accu1, tmp_accu2) - -end function -#+END_SRC - -* OMP: Second derivative of (||x||^2 - Delta^2)^2 - -*Function to compute the second derivative of (||x||^2 - Delta^2)^2* - -This function computes the second derivative of (||x||^2 - Delta^2)^2 -with respect to lambda. -\begin{align*} -\frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -= 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] -\end{align*} - -\begin{align*} - \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ - \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ - \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} -\end{align*} - -Provided: -| m_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | -| delta | double precision | Delta of the trust region | - -Internal: -| accu1 | double precision | first sum of the formula | -| accu2 | double precision | second sum of the formula | -| accu3 | double precision | third sum of the formula | -| tmp_accu1 | double precision | temporary array for the first sum | -| tmp_accu2 | double precision | temporary array for the second sum | -| tmp_accu2 | double precision | temporary array for the third sum | -| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | -| i,j | integer | indexes | - -Function: -| d2_norm_trust_region | double precision | second derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) - - use omp_lib - include 'pi.h' - - BEGIN_DOC - ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Functions - double precision :: d2_norm_trust_region_omp - double precision :: ddot - - ! Internal - double precision :: accu1,accu2,accu3 - double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) - integer :: i, j - - ! Allocation - allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) - - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & - !$OMP accu1, accu2, accu3) & - !$OMP DEFAULT(NONE) - - ! Initialization - - !$OMP MASTER - accu1 = 0d0 - accu2 = 0d0 - accu3 = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_accu1(i) = 0d0 - enddo - !$OMP END DO - !$OMP DO - do i = 1, n - tmp_accu2(i) = 0d0 - enddo - !$OMP END DO - !$OMP DO - do i = 1, n - tmp_accu3(i) = 0d0 - enddo - !$OMP END DO - - ! Calculations - - ! accu1 - !$OMP DO - do i = 1, n - if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu1 = accu1 + tmp_accu1(i) - enddo - !$OMP END MASTER - - ! accu2 - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 - endif - enddo - !$OMP END DO - - ! accu3 - !$OMP MASTER - do i = 1, n - accu2 = accu2 + tmp_accu2(i) - enddo - !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu3 = accu3 + tmp_accu3(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - d2_norm_trust_region_omp = 2d0 * (6d0 * accu3 * (- delta**2 + accu1) + (-2d0 * accu2)**2) - - deallocate(tmp_accu1, tmp_accu2, tmp_accu3) - -end function -#+END_SRC - -* OMP: Function value of ||x||^2 - -*Compute the value of ||x||^2* - -This function computes the value of ||x(lambda)||^2 - -\begin{align*} -||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} -\end{align*} - -Provided: -| m_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | - -Internal: -| tmp_wtg(n) | double precision | temporary array for W^T.v_grad | -| tmp_fN | double precision | temporary array for the function | -| i,j | integer | indexes | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) - - use omp_lib - - include 'pi.h' - - BEGIN_DOC - ! Compute ||x(lambda)||^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - - ! functions - double precision :: f_norm_trust_region_omp - - ! internal - double precision, allocatable :: tmp_fN(:) - integer :: i,j - - ! Allocation - allocate(tmp_fN(n)) - - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_fN, tmp_wtg, f_norm_trust_region_omp) & - !$OMP DEFAULT(NONE) - - ! Initialization - - !$OMP MASTER - f_norm_trust_region_omp = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_fN(i) = 0d0 - enddo - !$OMP END DO - - ! Calculations - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_fN(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - f_norm_trust_region_omp = f_norm_trust_region_omp + tmp_fN(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - deallocate(tmp_fN) - -end function -#+END_SRC - -* First derivative of (||x||^2 - Delta^2)^2 -Version without omp - -*Function to compute the first derivative of ||x||^2 - Delta* - -This function computes the first derivative of (||x||^2 - Delta^2)^2 -with respect to lambda. - -\begin{align*} -\frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -= 2 \left(-2\sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) -\left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) -\end{align*} - -\begin{align*} -\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} -\end{align*} - -Provided: -| m_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | -| delta | double precision | Delta of the trust region | - -Internal: -| accu1 | double precision | first sum of the formula | -| accu2 | double precision | second sum of the formula | -| wtg | double precision | temporary variable to store W^T.v_grad | -| i,j | integer | indexes | - -Function: -| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | -| ddot | double precision | blas dot product | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function d1_norm_trust_region(n,e_val,w,v_grad,lambda,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: w(n,n) - double precision, intent(in) :: v_grad(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: wtg, accu1, accu2 - integer :: i, j - - ! Functions - double precision :: d1_norm_trust_region - double precision :: ddot - - ! Initialization - accu1 = 0d0 - accu2 = 0d0 - - do i = 1, n - wtg = 0d0 - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 - endif - enddo - - do i = 1, n - wtg = 0d0 - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 - endif - enddo - - d1_norm_trust_region = 2d0 * accu2 * (accu1 - delta**2) - -end function -#+END_SRC - -* Second derivative of (||x||^2 - Delta^2)^2 -Version without OMP - -*Function to compute the second derivative of ||x||^2 - Delta* - - -\begin{equation} -\frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 -= 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] -\end{equation} - -\begin{align*} -\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ -\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} -\end{align*} -Provided: -| m_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | -| delta | double precision | Delta of the trust region | - -Internal: -| accu1 | double precision | first sum of the formula | -| accu2 | double precision | second sum of the formula | -| accu3 | double precision | third sum of the formula | -| wtg | double precision | temporary variable to store W^T.v_grad | -| i,j | integer | indexes | - -Function: -| d2_norm_trust_region | double precision | second derivative with respect to lambda of norm(x)^2 - Delta^2 | -| ddot | double precision | blas dot product | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function d2_norm_trust_region(n,e_val,w,v_grad,lambda,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: w(n,n) - double precision, intent(in) :: v_grad(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Functions - double precision :: d2_norm_trust_region - double precision :: ddot - - ! Internal - double precision :: wtg,accu1,accu2,accu3 - integer :: i, j - - ! Initialization - accu1 = 0d0 - accu2 = 0d0 - accu3 = 0d0 - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 !4 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 !2 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - !wtg = ddot(n,w(:,i),1,v_grad,1) - accu3 = accu3 + 6d0 * wtg**2 / (e_val(i) + lambda)**4 !3 - endif - enddo - - d2_norm_trust_region = 2d0 * (accu3 * (- delta**2 + accu1) + accu2**2) - -end function -#+END_SRC - -* Function value of ||x||^2 -Version without OMP - -*Compute the value of ||x||^2* - -This function computes the value of ||x(lambda)||^2 - -\begin{align*} -||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} -\end{align*} - -Provided: -| m_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | -| delta | double precision | Delta of the trust region | - -Internal: -| wtg | double precision | temporary variable to store W^T.v_grad | -| i,j | integer | indexes | - -Function: -| f_norm_trust_region | double precision | value of norm(x)^2 | -| ddot | double precision | blas dot product | - - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function f_norm_trust_region(n,e_val,tmp_wtg,lambda) - - include 'pi.h' - - BEGIN_DOC - ! Compute ||x(lambda)||^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - - ! function - double precision :: f_norm_trust_region - double precision :: ddot - - ! internal - integer :: i,j - - ! Initialization - f_norm_trust_region = 0d0 - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - f_norm_trust_region = f_norm_trust_region + tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - -end function -#+END_SRC - -* OMP: First derivative of (1/||x||^2 - 1/Delta^2)^2 -Version with OMP - -*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* - -This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 - -\begin{align*} - \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 - &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} - {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} - - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} - {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ - &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} - \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} - - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) -\end{align*} - -\begin{align*} -\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} -\end{align*} - -Provided: -| m_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | -| delta | double precision | Delta of the trust region | - -Internal: -| wtg | double precision | temporary variable to store W^T.v_grad | -| tmp_accu1 | double precision | temporary array for the first sum | -| tmp_accu2 | double precision | temporary array for the second sum | -| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | -| i,j | integer | indexes | - -Function: -| d1_norm_inverse_trust_region | double precision | value of the first derivative | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) - - use omp_lib - include 'pi.h' - - BEGIN_DOC - ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: accu1, accu2 - integer :: i,j - double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) - - ! Functions - double precision :: d1_norm_inverse_trust_region_omp - - ! Allocation - allocate(tmp_accu1(n), tmp_accu2(n)) - - ! OMP - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1, accu2) & - !$OMP DEFAULT(NONE) - - !$OMP MASTER - accu1 = 0d0 - accu2 = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_accu1(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - tmp_accu2(i) = 0d0 - enddo - !$OMP END DO - -! !$OMP MASTER -! do i = 1, n -! if (ABS(e_val(i)+lambda) > 1d-12) then -! tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 -! endif -! enddo -! !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu1 = accu1 + tmp_accu1(i) - enddo - !$OMP END MASTER - -! !$OMP MASTER -! do i = 1, n -! if (ABS(e_val(i)+lambda) > 1d-12) then -! tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 -! endif -! enddo -! !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu2 = accu2 + tmp_accu2(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - call omp_set_max_active_levels(4) - - d1_norm_inverse_trust_region_omp = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) - - deallocate(tmp_accu1, tmp_accu2) - -end -#+END_SRC - -* OMP: Second derivative of (1/||x||^2 - 1/Delta^2)^2 -Version with OMP - -*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* - -This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 - -\begin{align*} - \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 - &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} - - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ - &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} - - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] -\end{align*} - - -\begin{align*} -\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ -\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} -\end{align*} - -Provided: -| m_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | -| delta | double precision | Delta of the trust region | - -Internal: -| wtg | double precision | temporary variable to store W^T.v_grad | -| tmp_accu1 | double precision | temporary array for the first sum | -| tmp_accu2 | double precision | temporary array for the second sum | -| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | -| i,j | integer | indexes | - -Function: -| d1_norm_inverse_trust_region | double precision | value of the first derivative | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) - - use omp_lib - include 'pi.h' - - BEGIN_DOC - ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: tmp_wtg(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: accu1, accu2, accu3 - integer :: i,j - double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) - - ! Functions - double precision :: d2_norm_inverse_trust_region_omp - - ! Allocation - allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) - - ! OMP - call omp_set_max_active_levels(1) - - ! OMP - !$OMP PARALLEL & - !$OMP PRIVATE(i,j) & - !$OMP SHARED(n,lambda, e_val, thresh_eig,& - !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & - !$OMP accu1, accu2, accu3) & - !$OMP DEFAULT(NONE) - - !$OMP MASTER - accu1 = 0d0 - accu2 = 0d0 - accu3 = 0d0 - !$OMP END MASTER - - !$OMP DO - do i = 1, n - tmp_accu1(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - tmp_accu2(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - tmp_accu3(i) = 0d0 - enddo - !$OMP END DO - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu1 = accu1 + tmp_accu1(i) - enddo - !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu2 = accu2 + tmp_accu2(i) - enddo - !$OMP END MASTER - - !$OMP DO - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 - endif - enddo - !$OMP END DO - - !$OMP MASTER - do i = 1, n - accu3 = accu3 + tmp_accu3(i) - enddo - !$OMP END MASTER - - !$OMP END PARALLEL - - call omp_set_max_active_levels(4) - - d2_norm_inverse_trust_region_omp = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & - - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) - - deallocate(tmp_accu1,tmp_accu2,tmp_accu3) - -end -#+END_SRC - -* First derivative of (1/||x||^2 - 1/Delta^2)^2 -Version without OMP - -*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* - -This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 - -\begin{align*} - \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 - &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} - {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} - - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} - {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ - &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} - \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} - - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) -\end{align*} -\begin{align*} -\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} -\end{align*} -Provided: -| m_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | -| delta | double precision | Delta of the trust region | - -Internal: -| wtg | double precision | temporary variable to store W^T.v_grad | -| i,j | integer | indexes | - -Function: -| d1_norm_inverse_trust_region | double precision | value of the first derivative | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function d1_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: w(n,n) - double precision, intent(in) :: v_grad(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: wtg, accu1, accu2 - integer :: i,j - - ! Functions - double precision :: d1_norm_inverse_trust_region - - accu1 = 0d0 - accu2 = 0d0 - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 - endif - enddo - - d1_norm_inverse_trust_region = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) - -end -#+END_SRC - -* Second derivative of (1/||x||^2 - 1/Delta^2)^2 -Version without OMP - -*Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2* - -This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 - -\begin{align*} - \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 - &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} - - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ - &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} - - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] -\end{align*} - -\begin{align*} -\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ -\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ -\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} -\end{align*} - -Provided: -| m_num | integer | number of MOs | - -Input: -| n | integer | mo_num*(mo_num-1)/2 | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n,n) | double precision | eigenvectors of the hessian | -| v_grad(n) | double precision | gradient | -| lambda | double precision | Lagrange multiplier | -| delta | double precision | Delta of the trust region | - -Internal: -| wtg | double precision | temporary variable to store W^T.v_grad | -| i,j | integer | indexes | - -Function: -| d2_norm_inverse_trust_region | double precision | value of the first derivative | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f -function d2_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: e_val(n) - double precision, intent(in) :: w(n,n) - double precision, intent(in) :: v_grad(n) - double precision, intent(in) :: lambda - double precision, intent(in) :: delta - - ! Internal - double precision :: wtg, accu1, accu2, accu3 - integer :: i,j - - ! Functions - double precision :: d2_norm_inverse_trust_region - - accu1 = 0d0 - accu2 = 0d0 - accu3 = 0d0 - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 - endif - enddo - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - wtg = 0d0 - do j = 1, n - wtg = wtg + w(j,i) * v_grad(j) - enddo - accu3 = accu3 + wtg**2 / (e_val(i) + lambda)**4 - endif - enddo - - d2_norm_inverse_trust_region = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & - - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) - -end -#+END_SRC diff --git a/src/utils_trust_region/trust_region_rho.irp.f b/src/utils_trust_region/trust_region_rho.irp.f deleted file mode 100644 index 45738736..00000000 --- a/src/utils_trust_region/trust_region_rho.irp.f +++ /dev/null @@ -1,121 +0,0 @@ -! Agreement with the model: Rho - -! *Compute the ratio : rho = (prev_energy - energy) / (prev_energy - e_model)* - -! Rho represents the agreement between the model (the predicted energy -! by the Taylor expansion truncated at the 2nd order) and the real -! energy : - -! \begin{equation} -! \rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} -! \end{equation} -! With : -! $E^{k}$ the energy at the previous iteration -! $E^{k+1}$ the energy at the actual iteration -! $m^{k+1}$ the predicted energy for the actual iteration -! (cf. trust_e_model) - -! If $\rho \approx 1$, the agreement is good, contrary to $\rho \approx 0$. -! If $\rho \leq 0$ the previous energy is lower than the actual -! energy. We have to cancel the last step and use a smaller trust -! region. -! Here we cancel the last step if $\rho < 0.1$, because even if -! the energy decreases, the agreement is bad, i.e., the Taylor expansion -! truncated at the second order doesn't represent correctly the energy -! landscape. So it's better to cancel the step and restart with a -! smaller trust region. - -! Provided in qp_edit: -! | thresh_rho | - -! Input: -! | prev_energy | double precision | previous energy (energy before the rotation) | -! | e_model | double precision | predicted energy after the rotation | - -! Output: -! | rho | double precision | the agreement between the model (predicted) and the real energy | -! | prev_energy | double precision | if rho >= 0.1 the actual energy becomes the previous energy | -! | | | else the previous energy doesn't change | - -! Internal: -! | energy | double precision | energy (real) after the rotation | -! | i | integer | index | -! | t* | double precision | time | - - -subroutine trust_region_rho(prev_energy, energy,e_model,rho) - - include 'pi.h' - - BEGIN_DOC - ! Compute rho, the agreement between the predicted criterion/energy and the real one - END_DOC - - implicit none - - ! Variables - - ! In - double precision, intent(inout) :: prev_energy - double precision, intent(in) :: e_model, energy - - ! Out - double precision, intent(out) :: rho - - ! Internal - double precision :: t1, t2, t3 - integer :: i - - print*,'' - print*,'---Rho_model---' - - call wall_time(t1) - -! Rho -! \begin{equation} -! \rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} -! \end{equation} - -! In function of $\rho$ th step can be accepted or cancelled. - -! If we cancel the last step (k+1), the previous energy (k) doesn't -! change! -! If the step (k+1) is accepted, then the "previous energy" becomes E(k+1) - - -! Already done in an other subroutine - !if (ABS(prev_energy - e_model) < 1d-12) then - ! print*,'WARNING: prev_energy - e_model < 1d-12' - ! print*,'=> rho will tend toward infinity' - ! print*,'Check you convergence criterion !' - !endif - - rho = (prev_energy - energy) / (prev_energy - e_model) - - print*, 'previous energy, prev_energy :', prev_energy - print*, 'predicted energy, e_model :', e_model - print*, 'real energy, energy :', energy - print*, 'prev_energy - energy :', prev_energy - energy - print*, 'prev_energy - e_model :', prev_energy - e_model - print*, 'Rho :', rho - print*, 'Threshold for rho:', thresh_rho - - ! Modification of prev_energy in function of rho - if (rho < thresh_rho) then !0.1) then - ! the step is cancelled - print*, 'Rho <', thresh_rho,', the previous energy does not changed' - print*, 'prev_energy :', prev_energy - else - ! the step is accepted - prev_energy = energy - print*, 'Rho >=', thresh_rho,', energy -> prev_energy :', energy - endif - - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in rho model:', t3 - - print*,'---End rho_model---' - print*,'' - -end subroutine diff --git a/src/utils_trust_region/trust_region_rho.org b/src/utils_trust_region/trust_region_rho.org deleted file mode 100644 index 9b25ee29..00000000 --- a/src/utils_trust_region/trust_region_rho.org +++ /dev/null @@ -1,123 +0,0 @@ -* Agreement with the model: Rho - -*Compute the ratio : rho = (prev_energy - energy) / (prev_energy - e_model)* - -Rho represents the agreement between the model (the predicted energy -by the Taylor expansion truncated at the 2nd order) and the real -energy : - -\begin{equation} -\rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} -\end{equation} -With : -$E^{k}$ the energy at the previous iteration -$E^{k+1}$ the energy at the actual iteration -$m^{k+1}$ the predicted energy for the actual iteration -(cf. trust_e_model) - -If $\rho \approx 1$, the agreement is good, contrary to $\rho \approx 0$. -If $\rho \leq 0$ the previous energy is lower than the actual -energy. We have to cancel the last step and use a smaller trust -region. -Here we cancel the last step if $\rho < 0.1$, because even if -the energy decreases, the agreement is bad, i.e., the Taylor expansion -truncated at the second order doesn't represent correctly the energy -landscape. So it's better to cancel the step and restart with a -smaller trust region. - -Provided in qp_edit: -| thresh_rho | - -Input: -| prev_energy | double precision | previous energy (energy before the rotation) | -| e_model | double precision | predicted energy after the rotation | - -Output: -| rho | double precision | the agreement between the model (predicted) and the real energy | -| prev_energy | double precision | if rho >= 0.1 the actual energy becomes the previous energy | -| | | else the previous energy doesn't change | - -Internal: -| energy | double precision | energy (real) after the rotation | -| i | integer | index | -| t* | double precision | time | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_rho.irp.f -subroutine trust_region_rho(prev_energy, energy,e_model,rho) - - include 'pi.h' - - BEGIN_DOC - ! Compute rho, the agreement between the predicted criterion/energy and the real one - END_DOC - - implicit none - - ! Variables - - ! In - double precision, intent(inout) :: prev_energy - double precision, intent(in) :: e_model, energy - - ! Out - double precision, intent(out) :: rho - - ! Internal - double precision :: t1, t2, t3 - integer :: i - - print*,'' - print*,'---Rho_model---' - - call wall_time(t1) -#+END_SRC - -** Rho -\begin{equation} -\rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} -\end{equation} - -In function of $\rho$ th step can be accepted or cancelled. - -If we cancel the last step (k+1), the previous energy (k) doesn't -change! -If the step (k+1) is accepted, then the "previous energy" becomes E(k+1) - -#+BEGIN_SRC f90 :comments org :tangle trust_region_rho.irp.f - ! Already done in an other subroutine - !if (ABS(prev_energy - e_model) < 1d-12) then - ! print*,'WARNING: prev_energy - e_model < 1d-12' - ! print*,'=> rho will tend toward infinity' - ! print*,'Check you convergence criterion !' - !endif - - rho = (prev_energy - energy) / (prev_energy - e_model) - - print*, 'previous energy, prev_energy :', prev_energy - print*, 'predicted energy, e_model :', e_model - print*, 'real energy, energy :', energy - print*, 'prev_energy - energy :', prev_energy - energy - print*, 'prev_energy - e_model :', prev_energy - e_model - print*, 'Rho :', rho - print*, 'Threshold for rho:', thresh_rho - - ! Modification of prev_energy in function of rho - if (rho < thresh_rho) then !0.1) then - ! the step is cancelled - print*, 'Rho <', thresh_rho,', the previous energy does not changed' - print*, 'prev_energy :', prev_energy - else - ! the step is accepted - prev_energy = energy - print*, 'Rho >=', thresh_rho,', energy -> prev_energy :', energy - endif - - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in rho model:', t3 - - print*,'---End rho_model---' - print*,'' - -end subroutine -#+END_SRC diff --git a/src/utils_trust_region/trust_region_step.irp.f b/src/utils_trust_region/trust_region_step.irp.f deleted file mode 100644 index 42aa6ed4..00000000 --- a/src/utils_trust_region/trust_region_step.irp.f +++ /dev/null @@ -1,716 +0,0 @@ -! Trust region - -! *Compute the next step with the trust region algorithm* - -! The Newton method is an iterative method to find a minimum of a given -! function. It uses a Taylor series truncated at the second order of the -! targeted function and gives its minimizer. The minimizer is taken as -! the new position and the same thing is done. And by doing so -! iteratively the method find a minimum, a local or global one depending -! of the starting point and the convexity/nonconvexity of the targeted -! function. - -! The goal of the trust region is to constrain the step size of the -! Newton method in a certain area around the actual position, where the -! Taylor series is a good approximation of the targeted function. This -! area is called the "trust region". - -! In addition, in function of the agreement between the Taylor -! development of the energy and the real energy, the size of the trust -! region will be updated at each iteration. By doing so, the step sizes -! are not too larges. In addition, since we add a criterion to cancel the -! step if the energy increases (more precisely if rho < 0.1), so it's -! impossible to diverge. \newline - -! References: \newline -! Nocedal & Wright, Numerical Optimization, chapter 4 (1999), \newline -! https://link.springer.com/book/10.1007/978-0-387-40065-5, \newline -! ISBN: 978-0-387-40065-5 \newline - -! By using the first and the second derivatives, the Newton method gives -! a step: -! \begin{align*} -! \textbf{x}_{(k+1)}^{\text{Newton}} = - \textbf{H}_{(k)}^{-1} \cdot -! \textbf{g}_{(k)} -! \end{align*} -! which leads to the minimizer of the Taylor series. -! !!! Warning: the Newton method gives the minimizer if and only if -! $\textbf{H}$ is positive definite, else it leads to a saddle point !!! -! But we want a step $\textbf{x}_{(k+1)}$ with a constraint on its (euclidian) norm: -! \begin{align*} -! ||\textbf{x}_{(k+1)}|| \leq \Delta_{(k+1)} -! \end{align*} -! which is equivalent to -! \begin{align*} -! \textbf{x}_{(k+1)}^T \cdot \textbf{x}_{(k+1)} \leq \Delta_{(k+1)}^2 -! \end{align*} - -! with: \newline -! $\textbf{x}_{(k+1)}$ is the step for the k+1-th iteration (vector of -! size n) \newline -! $\textbf{H}_{(k)}$ is the hessian at the k-th iteration (n by n -! matrix) \newline -! $\textbf{g}_{(k)}$ is the gradient at the k-th iteration (vector of -! size n) \newline -! $\Delta_{(k+1)}$ is the trust radius for the (k+1)-th iteration -! \newline - -! Thus we want to constrain the step size $\textbf{x}_{(k+1)}$ into a -! hypersphere of radius $\Delta_{(k+1)}$.\newline - -! So, if $||\textbf{x}_{(k+1)}^{\text{Newton}}|| \leq \Delta_{(k)}$ and -! $\textbf{H}$ is positive definite, the -! solution is the step given by the Newton method -! $\textbf{x}_{(k+1)} = \textbf{x}_{(k+1)}^{\text{Newton}}$. -! Else we have to constrain the step size. For simplicity we will remove -! the index $_{(k)}$ and $_{(k+1)}$. To restict the step size, we have -! to put a constraint on $\textbf{x}$ with a Lagrange multiplier. -! Starting from the Taylor series of a function E (here, the energy) -! truncated at the 2nd order, we have: -! \begin{align*} -! E(\textbf{x}) = E +\textbf{g}^T \cdot \textbf{x} + \frac{1}{2} -! \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} + -! \mathcal{O}(\textbf{x}^2) -! \end{align*} - -! With the constraint on the norm of $\textbf{x}$ we can write the -! Lagrangian -! \begin{align*} -! \mathcal{L}(\textbf{x},\lambda) = E + \textbf{g}^T \cdot \textbf{x} -! + \frac{1}{2} \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} -! + \frac{1}{2} \lambda (\textbf{x}^T \cdot \textbf{x} - \Delta^2) -! \end{align*} -! Where: \newline -! $\lambda$ is the Lagrange multiplier \newline -! $E$ is the energy at the k-th iteration $\Leftrightarrow -! E(\textbf{x} = \textbf{0})$ \newline - -! To solve this equation, we search a stationary point where the first -! derivative of $\mathcal{L}$ with respect to $\textbf{x}$ becomes 0, i.e. -! \begin{align*} -! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}=0 -! \end{align*} - -! The derivative is: -! \begin{align*} -! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}} -! = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} -! \end{align*} - -! So, we search $\textbf{x}$ such as: -! \begin{align*} -! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}} -! = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} = 0 -! \end{align*} - -! We can rewrite that as: -! \begin{align*} -! \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} -! = \textbf{g} + (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x} = 0 -! \end{align*} -! with $\textbf{I}$ is the identity matrix. - -! By doing so, the solution is: -! \begin{align*} -! (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x}= -\textbf{g} -! \end{align*} -! \begin{align*} -! \textbf{x}= - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g} -! \end{align*} -! with $\textbf{x}^T \textbf{x} = \Delta^2$. - -! We have to solve this previous equation to find this $\textbf{x}$ in the -! trust region, i.e. $||\textbf{x}|| = \Delta$. Now, this problem is -! just a one dimension problem because we can express $\textbf{x}$ as a -! function of $\lambda$: -! \begin{align*} -! \textbf{x}(\lambda) = - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g} -! \end{align*} - -! We start from the fact that the hessian is diagonalizable. So we have: -! \begin{align*} -! \textbf{H} = \textbf{W} \cdot \textbf{h} \cdot \textbf{W}^T -! \end{align*} -! with: \newline -! $\textbf{H}$, the hessian matrix \newline -! $\textbf{W}$, the matrix containing the eigenvectors \newline -! $\textbf{w}_i$, the i-th eigenvector, i.e. i-th column of $\textbf{W}$ \newline -! $\textbf{h}$, the matrix containing the eigenvalues in ascending order \newline -! $h_i$, the i-th eigenvalue in ascending order \newline - -! Now we use the fact that adding a constant on the diagonal just shifts -! the eigenvalues: -! \begin{align*} -! \textbf{H} + \textbf{I} \lambda = \textbf{W} \cdot (\textbf{h} -! +\textbf{I} \lambda) \cdot \textbf{W}^T -! \end{align*} - -! By doing so we can express $\textbf{x}$ as a function of $\lambda$ -! \begin{align*} -! \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot -! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i -! \end{align*} -! with $\lambda \neq - h_i$. - -! An interesting thing in our case is the norm of $\textbf{x}$, -! because we want $||\textbf{x}|| = \Delta$. Due to the orthogonality of -! the eigenvectors $\left\{\textbf{w} \right\} _{i=1}^n$ we have: -! \begin{align*} -! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot -! \textbf{g})^2}{(h_i + \lambda)^2} -! \end{align*} - -! So the $||\textbf{x}(\lambda)||^2$ is just a function of $\lambda$. -! And if we study the properties of this function we see that: -! \begin{align*} -! \lim_{\lambda\to\infty} ||\textbf{x}(\lambda)|| = 0 -! \end{align*} -! and if $\textbf{w}_i^T \cdot \textbf{g} \neq 0$: -! \begin{align*} -! \lim_{\lambda\to -h_i} ||\textbf{x}(\lambda)|| = + \infty -! \end{align*} - -! From these limits and knowing that $h_1$ is the lowest eigenvalue, we -! can conclude that $||\textbf{x}(\lambda)||$ is a continuous and -! strictly decreasing function on the interval $\lambda \in -! (-h_1;\infty)$. Thus, there is one $\lambda$ in this interval which -! gives $||\textbf{x}(\lambda)|| = \Delta$, consequently there is one -! solution. - -! Since $\textbf{x} = - (\textbf{H} + \lambda \textbf{I})^{-1} \cdot -! \textbf{g}$ and we want to reduce the norm of $\textbf{x}$, clearly, -! $\lambda > 0$ ($\lambda = 0$ is the unconstraint solution). But the -! Newton method is only defined for a positive definite hessian matrix, -! so $(\textbf{H} + \textbf{I} \lambda)$ must be positive -! definite. Consequently, in the case where $\textbf{H}$ is not positive -! definite, to ensure the positive definiteness, $\lambda$ must be -! greater than $- h_1$. -! \begin{align*} -! \lambda > 0 \quad \text{and} \quad \lambda \geq - h_1 -! \end{align*} - -! From that there are five cases: -! - if $\textbf{H}$ is positive definite, $-h_1 < 0$, $\lambda \in (0,\infty)$ -! - if $\textbf{H}$ is not positive definite and $\textbf{w}_1^T \cdot -! \textbf{g} \neq 0$, $(\textbf{H} + \textbf{I} -! \lambda)$ -! must be positve definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty)$ -! - if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot -! \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| > \Delta$ by removing -! $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be -! positive definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty$) -! - if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot -! \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| \leq \Delta$ by removing -! $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be -! positive definite, $-h_1 > 0$, $\lambda = -h_1$). This case is -! similar to the case where $\textbf{H}$ and $||\textbf{x}(\lambda = -! 0)|| \leq \Delta$ -! but we can also add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ -! time a constant to ensure the condition $||\textbf{x}(\lambda = -! -h_1)|| = \Delta$ and escape from the saddle point - -! Thus to find the solution, we can write: -! \begin{align*} -! ||\textbf{x}(\lambda)|| = \Delta -! \end{align*} -! \begin{align*} -! ||\textbf{x}(\lambda)|| - \Delta = 0 -! \end{align*} - -! Taking the square of this equation -! \begin{align*} -! (||\textbf{x}(\lambda)|| - \Delta)^2 = 0 -! \end{align*} -! we have a function with one minimum for the optimal $\lambda$. -! Since we have the formula of $||\textbf{x}(\lambda)||^2$, we solve -! \begin{align*} -! (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 -! \end{align*} - -! But in practice, it is more effective to solve: -! \begin{align*} -! (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 -! \end{align*} - -! To do that, we just use the Newton method with "trust_newton" using -! first and second derivative of $(||\textbf{x}(\lambda)||^2 - -! \Delta^2)^2$ with respect to $\textbf{x}$. -! This will give the optimal $\lambda$ to compute the -! solution $\textbf{x}$ with the formula seen previously: -! \begin{align*} -! \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot -! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i -! \end{align*} - -! The solution $\textbf{x}(\lambda)$ with the optimal $\lambda$ is our -! step to go from the (k)-th to the (k+1)-th iteration, is noted $\textbf{x}^*$. - - - - -! Evolution of the trust region - -! We initialize the trust region at the first iteration using a radius -! \begin{align*} -! \Delta = ||\textbf{x}(\lambda=0)|| -! \end{align*} - -! And for the next iteration the trust region will evolves depending of -! the agreement of the energy prediction based on the Taylor series -! truncated at the 2nd order and the real energy. If the Taylor series -! truncated at the 2nd order represents correctly the energy landscape -! the trust region will be extent else it will be reduced. In order to -! mesure this agreement we use the ratio rho cf. "rho_model" and -! "trust_e_model". From that we use the following values: -! - if $\rho \geq 0.75$, then $\Delta = 2 \Delta$, -! - if $0.5 \geq \rho < 0.75$, then $\Delta = \Delta$, -! - if $0.25 \geq \rho < 0.5$, then $\Delta = 0.5 \Delta$, -! - if $\rho < 0.25$, then $\Delta = 0.25 \Delta$. - -! In addition, if $\rho < 0.1$ the iteration is cancelled, so it -! restarts with a smaller trust region until the energy decreases. - - - - -! Summary - -! To summarize, knowing the hessian (eigenvectors and eigenvalues), the -! gradient and the radius of the trust region we can compute the norm of -! the Newton step -! \begin{align*} -! ||\textbf{x}(\lambda = 0)||^2 = ||- \textbf{H}^{-1} \cdot \textbf{g}||^2 = \sum_{i=1}^n -! \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2}, \quad h_i \neq 0 -! \end{align*} - -! - if $h_1 \geq 0$, $||\textbf{x}(\lambda = 0)|| \leq \Delta$ and -! $\textbf{x}(\lambda=0)$ is in the trust region and it is not -! necessary to put a constraint on $\textbf{x}$, the solution is the -! unconstrained one, $\textbf{x}^* = \textbf{x}(\lambda = 0)$. -! - else if $h_1 < 0$, $\textbf{w}_1^T \cdot \textbf{g} = 0$ and -! $||\textbf{x}(\lambda = -h_1)|| \leq \Delta$ (by removing $j=1$ in -! the sum), the solution is $\textbf{x}^* = \textbf{x}(\lambda = -! -h_1)$, similarly to the previous case. -! But we can add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ -! time a constant to ensure the condition $||\textbf{x}(\lambda = -! -h_1)|| = \Delta$ and escape from the saddle point -! - else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} \neq 0$ we -! have to search $\lambda \in (-h_1, \infty)$ such as -! $\textbf{x}(\lambda) = \Delta$ by solving with the Newton method -! \begin{align*} -! (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 -! \end{align*} -! or -! \begin{align*} -! (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 -! \end{align*} -! which is numerically more stable. And finally compute -! \begin{align*} -! \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot -! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i -! \end{align*} -! - else if $h_1 \geq 0$ and $||\textbf{x}(\lambda = 0)|| > \Delta$ we -! do exactly the same thing that the previous case but we search -! $\lambda \in (0, \infty)$ -! - else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} = 0$ and -! $||\textbf{x}(\lambda = -h_1)|| > \Delta$ (by removing $j=1$ in the -! sum), again we do exactly the same thing that the previous case -! searching $\lambda \in (-h_1, \infty)$. - - -! For the cases where $\textbf{w}_1^T \cdot \textbf{g} = 0$ it is not -! necessary in fact to remove the $j = 1$ in the sum since the term -! where $h_i - \lambda < 10^{-6}$ are not computed. - -! After that, we take this vector $\textbf{x}^*$, called "x", and we do -! the transformation to an antisymmetric matrix $\textbf{X}$, called -! m_x. This matrix $\textbf{X}$ will be used to compute a rotation -! matrix $\textbf{R}= \exp(\textbf{X})$ in "rotation_matrix". - -! NB: -! An improvement can be done using a elleptical trust region. - - - - -! Code - -! Provided: -! | mo_num | integer | number of MOs | - -! Cf. qp_edit in orbital optimization section, for some constants/thresholds - -! Input: -! | m | integer | number of MOs | -! | n | integer | m*(m-1)/2 | -! | H(n, n) | double precision | hessian | -! | v_grad(n) | double precision | gradient | -! | e_val(n) | double precision | eigenvalues of the hessian | -! | W(n, n) | double precision | eigenvectors of the hessian | -! | rho | double precision | agreement between the model and the reality, | -! | | | represents the quality of the energy prediction | -! | nb_iter | integer | number of iteration | - -! Input/Ouput: -! | delta | double precision | radius of the trust region | - -! Output: -! | x(n) | double precision | vector containing the step | - -! Internal: -! | accu | double precision | temporary variable to compute the step | -! | lambda | double precision | lagrange multiplier | -! | trust_radius2 | double precision | square of the radius of the trust region | -! | norm2_x | double precision | norm^2 of the vector x | -! | norm2_g | double precision | norm^2 of the vector containing the gradient | -! | tmp_wtg(n) | double precision | tmp_wtg(i) = w_i^T . g | -! | i, j, k | integer | indexes | - -! Function: -! | dnrm2 | double precision | Blas function computing the norm | -! | f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) | - - -subroutine trust_region_step(n,nb_iter,v_grad,rho,e_val,w,x,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compuet the step in the trust region - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: v_grad(n), rho - integer, intent(inout) :: nb_iter - double precision, intent(in) :: e_val(n), w(n,n) - - ! inout - double precision, intent(inout) :: delta - - ! out - double precision, intent(out) :: x(n) - - ! Internal - double precision :: accu, lambda, trust_radius2 - double precision :: norm2_x, norm2_g - double precision, allocatable :: tmp_wtg(:) - integer :: i,j,k - double precision :: t1,t2,t3 - integer :: n_neg_eval - - - ! Functions - double precision :: ddot, dnrm2 - double precision :: f_norm_trust_region_omp - - print*,'' - print*,'==================' - print*,'---Trust_region---' - print*,'==================' - - call wall_time(t1) - - ! Allocation - allocate(tmp_wtg(n)) - -! Initialization and norm - -! The norm of the step size will be useful for the trust region -! algorithm. We start from a first guess and the radius of the trust -! region will evolve during the optimization. - -! avoid_saddle is actually a test to avoid saddle points - - -! Initialization of the Lagrange multiplier -lambda = 0d0 - -! List of w^T.g, to avoid the recomputation -tmp_wtg = 0d0 -do j = 1, n - do i = 1, n - tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i) - enddo -enddo - -! Replacement of the small tmp_wtg corresponding to a negative eigenvalue -! in the case of avoid_saddle -if (avoid_saddle .and. e_val(1) < - thresh_eig) then - i = 2 - ! Number of negative eigenvalues - do while (e_val(i) < - thresh_eig) - if (tmp_wtg(i) < thresh_wtg2) then - if (version_avoid_saddle == 1) then - tmp_wtg(i) = 1d0 - elseif (version_avoid_saddle == 2) then - tmp_wtg(i) = DABS(e_val(i)) - elseif (version_avoid_saddle == 3) then - tmp_wtg(i) = dsqrt(DABS(e_val(i))) - else - tmp_wtg(i) = thresh_wtg2 - endif - endif - i = i + 1 - enddo - - ! For the fist one it's a little bit different - if (tmp_wtg(1) < thresh_wtg2) then - tmp_wtg(1) = 0d0 - endif - -endif - -! Norm^2 of x, ||x||^2 -norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) -! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta -! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm -! Anyway if the step is too big it will be reduced -print*,'||x||^2 :', norm2_x - -! Norm^2 of the gradient, ||v_grad||^2 -norm2_g = (dnrm2(n,v_grad,1))**2 -print*,'||grad||^2 :', norm2_g - -! Trust radius initialization - -! At the first iteration (nb_iter = 0) we initialize the trust region -! with the norm of the step generate by the Newton's method ($\textbf{x}_1 = -! (\textbf{H}_0)^{-1} \cdot \textbf{g}_0$, -! we compute this norm using f_norm_trust_region_omp as explain just -! below) - - -! trust radius -if (nb_iter == 0) then - trust_radius2 = norm2_x - ! To avoid infinite loop of cancellation of this first step - ! without changing delta - nb_iter = 1 - - ! Compute delta, delta = sqrt(trust_radius) - delta = dsqrt(trust_radius2) -endif - -! Modification of the trust radius - -! In function of rho (which represents the agreement between the model -! and the reality, cf. rho_model) the trust region evolves. We update -! delta (the radius of the trust region). - -! To avoid too big trust region we put a maximum size. - - -! Modification of the trust radius in function of rho -if (rho >= 0.75d0) then - delta = 2d0 * delta -elseif (rho >= 0.5d0) then - delta = delta -elseif (rho >= 0.25d0) then - delta = 0.5d0 * delta -else - delta = 0.25d0 * delta -endif - -! Maximum size of the trust region -!if (delta > 0.5d0 * n * pi) then -! delta = 0.5d0 * n * pi -! print*,'Delta > delta_max, delta = 0.5d0 * n * pi' -!endif - -if (delta > 1d10) then - delta = 1d10 -endif - -print*, 'Delta :', delta - -! Calculation of the optimal lambda - -! We search the solution of $(||x||^2 - \Delta^2)^2 = 0$ -! - If $||\textbf{x}|| > \Delta$ or $h_1 < 0$ we have to add a constant -! $\lambda > 0 \quad \text{and} \quad \lambda > -h_1$ -! - If $||\textbf{x}|| \leq \Delta$ and $h_1 \geq 0$ the solution is the -! unconstrained one, $\lambda = 0$ - -! You will find more details at the beginning - - -! By giving delta, we search (||x||^2 - delta^2)^2 = 0 -! and not (||x||^2 - delta)^2 = 0 - -! Research of lambda to solve ||x(lambda)|| = Delta - -! Display -print*, 'e_val(1) = ', e_val(1) -print*, 'w_1^T.g =', tmp_wtg(1) - -! H positive definite -if (e_val(1) > - thresh_eig) then - norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) - print*, '||x(0)||=', dsqrt(norm2_x) - print*, 'Delta=', delta - - ! H positive definite, ||x(lambda = 0)|| <= Delta - if (dsqrt(norm2_x) <= delta) then - print*, 'H positive definite, ||x(lambda = 0)|| <= Delta' - print*, 'lambda = 0, no lambda optimization' - lambda = 0d0 - - ! H positive definite, ||x(lambda = 0)|| > Delta - else - ! Constraint solution - print*, 'H positive definite, ||x(lambda = 0)|| > Delta' - print*,'Computation of the optimal lambda...' - call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) - endif - -! H indefinite -else - if (DABS(tmp_wtg(1)) < thresh_wtg) then - norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1)) - print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) - endif - - ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta - if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then - ! Add e_val(1) in order to have (H - e_val(1) I) positive definite - print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta' - print*, 'lambda = -e_val(1), no lambda optimization' - lambda = - e_val(1) - - ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta - ! and - ! H indefinite, w_1^T.g =/= 0 - else - ! Constraint solution/ add lambda - if (DABS(tmp_wtg(1)) < thresh_wtg) then - print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta' - else - print*, 'H indefinite, w_1^T.g =/= 0' - endif - print*, 'Computation of the optimal lambda...' - call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) - endif - -endif - -! Recomputation of the norm^2 of the step x -norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) -print*,'' -print*,'Summary after the trust region:' -print*,'lambda:', lambda -print*,'||x||:', dsqrt(norm2_x) -print*,'delta:', delta - -! Calculation of the step x - -! x refers to $\textbf{x}^*$ -! We compute x in function of lambda using its formula : -! \begin{align*} -! \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot \textbf{g}}{h_i -! + \lambda} \cdot \textbf{w}_i -! \end{align*} - - -! Initialisation -x = 0d0 - -! Calculation of the step x - -! Normal version -if (.not. absolute_eig) then - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - do j = 1, n - x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda) - enddo - endif - enddo - -! Version to use the absolute value of the eigenvalues -else - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig) then - do j = 1, n - x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda) - enddo - endif - enddo - -endif - -double precision :: beta, norm_x - -! Test -! If w_1^T.g = 0, the lim of ||x(lambda)|| when lambda tend to -e_val(1) -! is not + infinity. So ||x(lambda=-e_val(1))|| < delta, we add the first -! eigenvectors multiply by a constant to ensure the condition -! ||x(lambda=-e_val(1))|| = delta and escape the saddle point -if (avoid_saddle .and. e_val(1) < - thresh_eig) then - if (tmp_wtg(1) < 1d-15 .and. (1d0 - dsqrt(norm2_x)/delta) > 1d-3 ) then - - ! norm of x - norm_x = dnrm2(n,x,1) - - ! Computes the coefficient for the w_1 - beta = delta**2 - norm_x**2 - - ! Updates the step x - x = x + W(:,1) * dsqrt(beta) - - ! Recomputes the norm to check - norm_x = dnrm2(n,x,1) - - print*, 'Add w_1 * dsqrt(delta^2 - ||x||^2):' - print*, '||x||', norm_x - endif -endif - -! Transformation of x - -! x is a vector of size n, so it can be write as a m by m -! antisymmetric matrix m_x cf. "mat_to_vec_index" and "vec_to_mat_index". - - -! ! Step transformation vector -> matrix -! ! Vector with n element -> mo_num by mo_num matrix -! do j = 1, m -! do i = 1, m -! if (i>j) then -! call mat_to_vec_index(i,j,k) -! m_x(i,j) = x(k) -! else -! m_x(i,j) = 0d0 -! endif -! enddo -! enddo -! -! ! Antisymmetrization of the previous matrix -! do j = 1, m -! do i = 1, m -! if (i 0$ ($\lambda = 0$ is the unconstraint solution). But the -Newton method is only defined for a positive definite hessian matrix, -so $(\textbf{H} + \textbf{I} \lambda)$ must be positive -definite. Consequently, in the case where $\textbf{H}$ is not positive -definite, to ensure the positive definiteness, $\lambda$ must be -greater than $- h_1$. -\begin{align*} - \lambda > 0 \quad \text{and} \quad \lambda \geq - h_1 -\end{align*} - -From that there are five cases: -- if $\textbf{H}$ is positive definite, $-h_1 < 0$, $\lambda \in (0,\infty)$ -- if $\textbf{H}$ is not positive definite and $\textbf{w}_1^T \cdot - \textbf{g} \neq 0$, $(\textbf{H} + \textbf{I} - \lambda)$ - must be positve definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty)$ -- if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot - \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| > \Delta$ by removing - $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be - positive definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty$) -- if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot - \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| \leq \Delta$ by removing - $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be - positive definite, $-h_1 > 0$, $\lambda = -h_1$). This case is - similar to the case where $\textbf{H}$ and $||\textbf{x}(\lambda = - 0)|| \leq \Delta$ - but we can also add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ - time a constant to ensure the condition $||\textbf{x}(\lambda = - -h_1)|| = \Delta$ and escape from the saddle point - -Thus to find the solution, we can write: -\begin{align*} - ||\textbf{x}(\lambda)|| = \Delta -\end{align*} -\begin{align*} - ||\textbf{x}(\lambda)|| - \Delta = 0 -\end{align*} - -Taking the square of this equation -\begin{align*} - (||\textbf{x}(\lambda)|| - \Delta)^2 = 0 -\end{align*} -we have a function with one minimum for the optimal $\lambda$. -Since we have the formula of $||\textbf{x}(\lambda)||^2$, we solve -\begin{align*} - (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 -\end{align*} - -But in practice, it is more effective to solve: -\begin{align*} - (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 -\end{align*} - -To do that, we just use the Newton method with "trust_newton" using -first and second derivative of $(||\textbf{x}(\lambda)||^2 - -\Delta^2)^2$ with respect to $\textbf{x}$. -This will give the optimal $\lambda$ to compute the -solution $\textbf{x}$ with the formula seen previously: -\begin{align*} - \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot - \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i -\end{align*} - -The solution $\textbf{x}(\lambda)$ with the optimal $\lambda$ is our -step to go from the (k)-th to the (k+1)-th iteration, is noted $\textbf{x}^*$. - -#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f -#+END_SRC - -** Evolution of the trust region - -We initialize the trust region at the first iteration using a radius -\begin{align*} - \Delta = ||\textbf{x}(\lambda=0)|| -\end{align*} - -And for the next iteration the trust region will evolves depending of -the agreement of the energy prediction based on the Taylor series -truncated at the 2nd order and the real energy. If the Taylor series -truncated at the 2nd order represents correctly the energy landscape -the trust region will be extent else it will be reduced. In order to -mesure this agreement we use the ratio rho cf. "rho_model" and -"trust_e_model". From that we use the following values: -- if $\rho \geq 0.75$, then $\Delta = 2 \Delta$, -- if $0.5 \geq \rho < 0.75$, then $\Delta = \Delta$, -- if $0.25 \geq \rho < 0.5$, then $\Delta = 0.5 \Delta$, -- if $\rho < 0.25$, then $\Delta = 0.25 \Delta$. - -In addition, if $\rho < 0.1$ the iteration is cancelled, so it -restarts with a smaller trust region until the energy decreases. - -#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f -#+END_SRC - -** Summary - -To summarize, knowing the hessian (eigenvectors and eigenvalues), the -gradient and the radius of the trust region we can compute the norm of -the Newton step -\begin{align*} - ||\textbf{x}(\lambda = 0)||^2 = ||- \textbf{H}^{-1} \cdot \textbf{g}||^2 = \sum_{i=1}^n - \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2}, \quad h_i \neq 0 -\end{align*} - -- if $h_1 \geq 0$, $||\textbf{x}(\lambda = 0)|| \leq \Delta$ and - $\textbf{x}(\lambda=0)$ is in the trust region and it is not - necessary to put a constraint on $\textbf{x}$, the solution is the - unconstrained one, $\textbf{x}^* = \textbf{x}(\lambda = 0)$. -- else if $h_1 < 0$, $\textbf{w}_1^T \cdot \textbf{g} = 0$ and - $||\textbf{x}(\lambda = -h_1)|| \leq \Delta$ (by removing $j=1$ in - the sum), the solution is $\textbf{x}^* = \textbf{x}(\lambda = - -h_1)$, similarly to the previous case. - But we can add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ - time a constant to ensure the condition $||\textbf{x}(\lambda = - -h_1)|| = \Delta$ and escape from the saddle point -- else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} \neq 0$ we - have to search $\lambda \in (-h_1, \infty)$ such as - $\textbf{x}(\lambda) = \Delta$ by solving with the Newton method - \begin{align*} - (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 - \end{align*} - or - \begin{align*} - (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 - \end{align*} - which is numerically more stable. And finally compute - \begin{align*} - \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot - \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i - \end{align*} -- else if $h_1 \geq 0$ and $||\textbf{x}(\lambda = 0)|| > \Delta$ we - do exactly the same thing that the previous case but we search - $\lambda \in (0, \infty)$ -- else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} = 0$ and - $||\textbf{x}(\lambda = -h_1)|| > \Delta$ (by removing $j=1$ in the - sum), again we do exactly the same thing that the previous case - searching $\lambda \in (-h_1, \infty)$. - - -For the cases where $\textbf{w}_1^T \cdot \textbf{g} = 0$ it is not -necessary in fact to remove the $j = 1$ in the sum since the term -where $h_i - \lambda < 10^{-6}$ are not computed. - -After that, we take this vector $\textbf{x}^*$, called "x", and we do -the transformation to an antisymmetric matrix $\textbf{X}$, called -m_x. This matrix $\textbf{X}$ will be used to compute a rotation -matrix $\textbf{R}= \exp(\textbf{X})$ in "rotation_matrix". - -NB: -An improvement can be done using a elleptical trust region. - -#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f -#+END_SRC - -** Code - -Provided: -| mo_num | integer | number of MOs | - -Cf. qp_edit in orbital optimization section, for some constants/thresholds - -Input: -| m | integer | number of MOs | -| n | integer | m*(m-1)/2 | -| H(n, n) | double precision | hessian | -| v_grad(n) | double precision | gradient | -| e_val(n) | double precision | eigenvalues of the hessian | -| W(n, n) | double precision | eigenvectors of the hessian | -| rho | double precision | agreement between the model and the reality, | -| | | represents the quality of the energy prediction | -| nb_iter | integer | number of iteration | - -Input/Ouput: -| delta | double precision | radius of the trust region | - -Output: -| x(n) | double precision | vector containing the step | - -Internal: -| accu | double precision | temporary variable to compute the step | -| lambda | double precision | lagrange multiplier | -| trust_radius2 | double precision | square of the radius of the trust region | -| norm2_x | double precision | norm^2 of the vector x | -| norm2_g | double precision | norm^2 of the vector containing the gradient | -| tmp_wtg(n) | double precision | tmp_wtg(i) = w_i^T . g | -| i, j, k | integer | indexes | - -Function: -| dnrm2 | double precision | Blas function computing the norm | -| f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) | - -#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f -subroutine trust_region_step(n,nb_iter,v_grad,rho,e_val,w,x,delta) - - include 'pi.h' - - BEGIN_DOC - ! Compuet the step in the trust region - END_DOC - - implicit none - - ! Variables - - ! in - integer, intent(in) :: n - double precision, intent(in) :: v_grad(n), rho - integer, intent(inout) :: nb_iter - double precision, intent(in) :: e_val(n), w(n,n) - - ! inout - double precision, intent(inout) :: delta - - ! out - double precision, intent(out) :: x(n) - - ! Internal - double precision :: accu, lambda, trust_radius2 - double precision :: norm2_x, norm2_g - double precision, allocatable :: tmp_wtg(:) - integer :: i,j,k - double precision :: t1,t2,t3 - integer :: n_neg_eval - - - ! Functions - double precision :: ddot, dnrm2 - double precision :: f_norm_trust_region_omp - - print*,'' - print*,'==================' - print*,'---Trust_region---' - print*,'==================' - - call wall_time(t1) - - ! Allocation - allocate(tmp_wtg(n)) -#+END_SRC - - -*** Initialization and norm - -The norm of the step size will be useful for the trust region -algorithm. We start from a first guess and the radius of the trust -region will evolve during the optimization. - -avoid_saddle is actually a test to avoid saddle points - -#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f - ! Initialization of the Lagrange multiplier - lambda = 0d0 - - ! List of w^T.g, to avoid the recomputation - tmp_wtg = 0d0 - do j = 1, n - do i = 1, n - tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i) - enddo - enddo - - ! Replacement of the small tmp_wtg corresponding to a negative eigenvalue - ! in the case of avoid_saddle - if (avoid_saddle .and. e_val(1) < - thresh_eig) then - i = 2 - ! Number of negative eigenvalues - do while (e_val(i) < - thresh_eig) - if (tmp_wtg(i) < thresh_wtg2) then - if (version_avoid_saddle == 1) then - tmp_wtg(i) = 1d0 - elseif (version_avoid_saddle == 2) then - tmp_wtg(i) = DABS(e_val(i)) - elseif (version_avoid_saddle == 3) then - tmp_wtg(i) = dsqrt(DABS(e_val(i))) - else - tmp_wtg(i) = thresh_wtg2 - endif - endif - i = i + 1 - enddo - - ! For the fist one it's a little bit different - if (tmp_wtg(1) < thresh_wtg2) then - tmp_wtg(1) = 0d0 - endif - - endif - - ! Norm^2 of x, ||x||^2 - norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) - ! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta - ! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm - ! Anyway if the step is too big it will be reduced - print*,'||x||^2 :', norm2_x - - ! Norm^2 of the gradient, ||v_grad||^2 - norm2_g = (dnrm2(n,v_grad,1))**2 - print*,'||grad||^2 :', norm2_g -#+END_SRC - -*** Trust radius initialization - - At the first iteration (nb_iter = 0) we initialize the trust region - with the norm of the step generate by the Newton's method ($\textbf{x}_1 = - (\textbf{H}_0)^{-1} \cdot \textbf{g}_0$, - we compute this norm using f_norm_trust_region_omp as explain just - below) - -#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f - ! trust radius - if (nb_iter == 0) then - trust_radius2 = norm2_x - ! To avoid infinite loop of cancellation of this first step - ! without changing delta - nb_iter = 1 - - ! Compute delta, delta = sqrt(trust_radius) - delta = dsqrt(trust_radius2) - endif -#+END_SRC - -*** Modification of the trust radius - -In function of rho (which represents the agreement between the model -and the reality, cf. rho_model) the trust region evolves. We update -delta (the radius of the trust region). - -To avoid too big trust region we put a maximum size. - -#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f - ! Modification of the trust radius in function of rho - if (rho >= 0.75d0) then - delta = 2d0 * delta - elseif (rho >= 0.5d0) then - delta = delta - elseif (rho >= 0.25d0) then - delta = 0.5d0 * delta - else - delta = 0.25d0 * delta - endif - - ! Maximum size of the trust region - !if (delta > 0.5d0 * n * pi) then - ! delta = 0.5d0 * n * pi - ! print*,'Delta > delta_max, delta = 0.5d0 * n * pi' - !endif - - if (delta > 1d10) then - delta = 1d10 - endif - - print*, 'Delta :', delta -#+END_SRC - -*** Calculation of the optimal lambda - -We search the solution of $(||x||^2 - \Delta^2)^2 = 0$ -- If $||\textbf{x}|| > \Delta$ or $h_1 < 0$ we have to add a constant - $\lambda > 0 \quad \text{and} \quad \lambda > -h_1$ -- If $||\textbf{x}|| \leq \Delta$ and $h_1 \geq 0$ the solution is the - unconstrained one, $\lambda = 0$ - -You will find more details at the beginning - -#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f - ! By giving delta, we search (||x||^2 - delta^2)^2 = 0 - ! and not (||x||^2 - delta)^2 = 0 - - ! Research of lambda to solve ||x(lambda)|| = Delta - - ! Display - print*, 'e_val(1) = ', e_val(1) - print*, 'w_1^T.g =', tmp_wtg(1) - - ! H positive definite - if (e_val(1) > - thresh_eig) then - norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) - print*, '||x(0)||=', dsqrt(norm2_x) - print*, 'Delta=', delta - - ! H positive definite, ||x(lambda = 0)|| <= Delta - if (dsqrt(norm2_x) <= delta) then - print*, 'H positive definite, ||x(lambda = 0)|| <= Delta' - print*, 'lambda = 0, no lambda optimization' - lambda = 0d0 - - ! H positive definite, ||x(lambda = 0)|| > Delta - else - ! Constraint solution - print*, 'H positive definite, ||x(lambda = 0)|| > Delta' - print*,'Computation of the optimal lambda...' - call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) - endif - - ! H indefinite - else - if (DABS(tmp_wtg(1)) < thresh_wtg) then - norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1)) - print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) - endif - - ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta - if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then - ! Add e_val(1) in order to have (H - e_val(1) I) positive definite - print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta' - print*, 'lambda = -e_val(1), no lambda optimization' - lambda = - e_val(1) - - ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta - ! and - ! H indefinite, w_1^T.g =/= 0 - else - ! Constraint solution/ add lambda - if (DABS(tmp_wtg(1)) < thresh_wtg) then - print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta' - else - print*, 'H indefinite, w_1^T.g =/= 0' - endif - print*, 'Computation of the optimal lambda...' - call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) - endif - - endif - - ! Recomputation of the norm^2 of the step x - norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) - print*,'' - print*,'Summary after the trust region:' - print*,'lambda:', lambda - print*,'||x||:', dsqrt(norm2_x) - print*,'delta:', delta -#+END_SRC - -*** Calculation of the step x - -x refers to $\textbf{x}^*$ -We compute x in function of lambda using its formula : -\begin{align*} -\textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot \textbf{g}}{h_i -+ \lambda} \cdot \textbf{w}_i -\end{align*} - -#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f - ! Initialisation - x = 0d0 - - ! Calculation of the step x - - ! Normal version - if (.not. absolute_eig) then - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - do j = 1, n - x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda) - enddo - endif - enddo - - ! Version to use the absolute value of the eigenvalues - else - - do i = 1, n - if (DABS(e_val(i)) > thresh_eig) then - do j = 1, n - x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda) - enddo - endif - enddo - - endif - - double precision :: beta, norm_x - - ! Test - ! If w_1^T.g = 0, the lim of ||x(lambda)|| when lambda tend to -e_val(1) - ! is not + infinity. So ||x(lambda=-e_val(1))|| < delta, we add the first - ! eigenvectors multiply by a constant to ensure the condition - ! ||x(lambda=-e_val(1))|| = delta and escape the saddle point - if (avoid_saddle .and. e_val(1) < - thresh_eig) then - if (tmp_wtg(1) < 1d-15 .and. (1d0 - dsqrt(norm2_x)/delta) > 1d-3 ) then - - ! norm of x - norm_x = dnrm2(n,x,1) - - ! Computes the coefficient for the w_1 - beta = delta**2 - norm_x**2 - - ! Updates the step x - x = x + W(:,1) * dsqrt(beta) - - ! Recomputes the norm to check - norm_x = dnrm2(n,x,1) - - print*, 'Add w_1 * dsqrt(delta^2 - ||x||^2):' - print*, '||x||', norm_x - endif - endif -#+END_SRC - -*** Transformation of x - -x is a vector of size n, so it can be write as a m by m -antisymmetric matrix m_x cf. "mat_to_vec_index" and "vec_to_mat_index". - - #+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f -! ! Step transformation vector -> matrix -! ! Vector with n element -> mo_num by mo_num matrix -! do j = 1, m -! do i = 1, m -! if (i>j) then -! call mat_to_vec_index(i,j,k) -! m_x(i,j) = x(k) -! else -! m_x(i,j) = 0d0 -! endif -! enddo -! enddo -! -! ! Antisymmetrization of the previous matrix -! do j = 1, m -! do i = 1, m -! if (i lower diagonal matrix (p,q), p > q - -! If a matrix is antisymmetric it can be reshaped as a vector. And the -! vector can be reshaped as an antisymmetric matrix - -! \begin{align*} -! \begin{pmatrix} -! 0 & -1 & -2 & -4 \\ -! 1 & 0 & -3 & -5 \\ -! 2 & 3 & 0 & -6 \\ -! 4 & 5 & 6 & 0 -! \end{pmatrix} -! \Leftrightarrow -! \begin{pmatrix} -! 1 & 2 & 3 & 4 & 5 & 6 -! \end{pmatrix} -! \end{align*} - -! !!! Here the algorithm only work for the lower diagonal !!! - -! Input: -! | i | integer | index in the vector | - -! Ouput: -! | p,q | integer | corresponding indexes in the lower diagonal of a matrix | -! | | | p > q, | -! | | | p -> row, | -! | | | q -> column | - - -subroutine vec_to_mat_index(i,p,q) - - include 'pi.h' - - BEGIN_DOC - ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing - ! its index i a vector - END_DOC - - implicit none - - ! Variables - - ! in - integer,intent(in) :: i - - ! out - integer, intent(out) :: p,q - - ! internal - integer :: a,b - double precision :: da - - da = 0.5d0*(1+ sqrt(1d0+8d0*DBLE(i))) - a = INT(da) - if ((a*(a-1))/2==i) then - p = a-1 - else - p = a - endif - b = p*(p-1)/2 - - ! Matrix element indexes - p = p + 1 - q = i - b - -end subroutine diff --git a/src/utils_trust_region/vec_to_mat_index.org b/src/utils_trust_region/vec_to_mat_index.org deleted file mode 100644 index 0a09fa86..00000000 --- a/src/utils_trust_region/vec_to_mat_index.org +++ /dev/null @@ -1,72 +0,0 @@ -* Vector to matrix indexes - -*Compute the indexes p,q of a matrix element with the vector index i* - -Vector (i) -> lower diagonal matrix (p,q), p > q - -If a matrix is antisymmetric it can be reshaped as a vector. And the -vector can be reshaped as an antisymmetric matrix - -\begin{align*} -\begin{pmatrix} -0 & -1 & -2 & -4 \\ -1 & 0 & -3 & -5 \\ -2 & 3 & 0 & -6 \\ -4 & 5 & 6 & 0 -\end{pmatrix} -\Leftrightarrow -\begin{pmatrix} -1 & 2 & 3 & 4 & 5 & 6 -\end{pmatrix} -\end{align*} - -!!! Here the algorithm only work for the lower diagonal !!! - -Input: -| i | integer | index in the vector | - -Ouput: -| p,q | integer | corresponding indexes in the lower diagonal of a matrix | -| | | p > q, | -| | | p -> row, | -| | | q -> column | - -#+BEGIN_SRC f90 :comments org :tangle vec_to_mat_index.irp.f -subroutine vec_to_mat_index(i,p,q) - - include 'pi.h' - - BEGIN_DOC - ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing - ! its index i a vector - END_DOC - - implicit none - - ! Variables - - ! in - integer,intent(in) :: i - - ! out - integer, intent(out) :: p,q - - ! internal - integer :: a,b - double precision :: da - - da = 0.5d0*(1+ sqrt(1d0+8d0*DBLE(i))) - a = INT(da) - if ((a*(a-1))/2==i) then - p = a-1 - else - p = a - endif - b = p*(p-1)/2 - - ! Matrix element indexes - p = p + 1 - q = i - b - -end subroutine -#+END_SRC diff --git a/src/utils_trust_region/vec_to_mat_v2.irp.f b/src/utils_trust_region/vec_to_mat_v2.irp.f deleted file mode 100644 index 9140b8d3..00000000 --- a/src/utils_trust_region/vec_to_mat_v2.irp.f +++ /dev/null @@ -1,39 +0,0 @@ -! Vect to antisymmetric matrix using mat_to_vec_index - -! Vector to antisymmetric matrix transformation using mat_to_vec_index -! subroutine. - -! Can be done in OMP (for the first part and with omp critical for the second) - - -subroutine vec_to_mat_v2(n,m,v_x,m_x) - - BEGIN_DOC - ! Vector to antisymmetric matrix - END_DOC - - implicit none - - integer, intent(in) :: n,m - double precision, intent(in) :: v_x(n) - double precision, intent(out) :: m_x(m,m) - - integer :: i,j,k - - ! 1D -> 2D lower diagonal - m_x = 0d0 - do j = 1, m - 1 - do i = j + 1, m - call mat_to_vec_index(i,j,k) - m_x(i,j) = v_x(k) - enddo - enddo - - ! Antisym - do i = 1, m - 1 - do j = i + 1, m - m_x(i,j) = - m_x(j,i) - enddo - enddo - -end diff --git a/src/utils_trust_region/vec_to_mat_v2.org b/src/utils_trust_region/vec_to_mat_v2.org deleted file mode 100644 index 4e358a88..00000000 --- a/src/utils_trust_region/vec_to_mat_v2.org +++ /dev/null @@ -1,40 +0,0 @@ -* Vect to antisymmetric matrix using mat_to_vec_index - -Vector to antisymmetric matrix transformation using mat_to_vec_index -subroutine. - -Can be done in OMP (for the first part and with omp critical for the second) - -#+BEGIN_SRC f90 :comments org :tangle vec_to_mat_v2.irp.f -subroutine vec_to_mat_v2(n,m,v_x,m_x) - - BEGIN_DOC - ! Vector to antisymmetric matrix - END_DOC - - implicit none - - integer, intent(in) :: n,m - double precision, intent(in) :: v_x(n) - double precision, intent(out) :: m_x(m,m) - - integer :: i,j,k - - ! 1D -> 2D lower diagonal - m_x = 0d0 - do j = 1, m - 1 - do i = j + 1, m - call mat_to_vec_index(i,j,k) - m_x(i,j) = v_x(k) - enddo - enddo - - ! Antisym - do i = 1, m - 1 - do j = i + 1, m - m_x(i,j) = - m_x(j,i) - enddo - enddo - -end -#+END_SRC From c687569bf488f6c5e93f7c56c2f415a4b22686be Mon Sep 17 00:00:00 2001 From: ydamour Date: Tue, 18 Apr 2023 13:01:25 +0200 Subject: [PATCH 041/337] create utils trust region --- src/utils_trust_region/EZFIO.cfg | 89 + src/utils_trust_region/NEED | 1 + src/utils_trust_region/README.md | 11 + src/utils_trust_region/algo_trust.irp.f | 248 +++ .../apply_mo_rotation.irp.f | 85 + src/utils_trust_region/mat_to_vec_index.irp.f | 61 + src/utils_trust_region/org/TANGLE_org_mode.sh | 7 + src/utils_trust_region/org/algo_trust.org | 593 ++++++ .../org/apply_mo_rotation.org | 86 + .../org/mat_to_vec_index.org | 63 + .../org/rotation_matrix.org | 452 +++++ .../org/rotation_matrix_iterative.org | 136 ++ .../org/sub_to_full_rotation_matrix.org | 65 + .../org/trust_region_expected_e.org | 128 ++ .../org/trust_region_optimal_lambda.org | 1661 +++++++++++++++++ .../org/trust_region_rho.org | 122 ++ .../org/trust_region_step.org | 759 ++++++++ .../org/vec_to_mat_index.org | 72 + src/utils_trust_region/org/vec_to_mat_v2.org | 40 + src/utils_trust_region/pi.h | 2 + src/utils_trust_region/rotation_matrix.irp.f | 441 +++++ .../rotation_matrix_iterative.irp.f | 134 ++ .../sub_to_full_rotation_matrix.irp.f | 64 + .../trust_region_expected_e.irp.f | 126 ++ .../trust_region_optimal_lambda.irp.f | 1651 ++++++++++++++++ src/utils_trust_region/trust_region_rho.irp.f | 120 ++ .../trust_region_step.irp.f | 749 ++++++++ src/utils_trust_region/vec_to_mat_index.irp.f | 71 + src/utils_trust_region/vec_to_mat_v2.irp.f | 39 + 29 files changed, 8076 insertions(+) create mode 100644 src/utils_trust_region/EZFIO.cfg create mode 100644 src/utils_trust_region/NEED create mode 100644 src/utils_trust_region/README.md create mode 100644 src/utils_trust_region/algo_trust.irp.f create mode 100644 src/utils_trust_region/apply_mo_rotation.irp.f create mode 100644 src/utils_trust_region/mat_to_vec_index.irp.f create mode 100755 src/utils_trust_region/org/TANGLE_org_mode.sh create mode 100644 src/utils_trust_region/org/algo_trust.org create mode 100644 src/utils_trust_region/org/apply_mo_rotation.org create mode 100644 src/utils_trust_region/org/mat_to_vec_index.org create mode 100644 src/utils_trust_region/org/rotation_matrix.org create mode 100644 src/utils_trust_region/org/rotation_matrix_iterative.org create mode 100644 src/utils_trust_region/org/sub_to_full_rotation_matrix.org create mode 100644 src/utils_trust_region/org/trust_region_expected_e.org create mode 100644 src/utils_trust_region/org/trust_region_optimal_lambda.org create mode 100644 src/utils_trust_region/org/trust_region_rho.org create mode 100644 src/utils_trust_region/org/trust_region_step.org create mode 100644 src/utils_trust_region/org/vec_to_mat_index.org create mode 100644 src/utils_trust_region/org/vec_to_mat_v2.org create mode 100644 src/utils_trust_region/pi.h create mode 100644 src/utils_trust_region/rotation_matrix.irp.f create mode 100644 src/utils_trust_region/rotation_matrix_iterative.irp.f create mode 100644 src/utils_trust_region/sub_to_full_rotation_matrix.irp.f create mode 100644 src/utils_trust_region/trust_region_expected_e.irp.f create mode 100644 src/utils_trust_region/trust_region_optimal_lambda.irp.f create mode 100644 src/utils_trust_region/trust_region_rho.irp.f create mode 100644 src/utils_trust_region/trust_region_step.irp.f create mode 100644 src/utils_trust_region/vec_to_mat_index.irp.f create mode 100644 src/utils_trust_region/vec_to_mat_v2.irp.f diff --git a/src/utils_trust_region/EZFIO.cfg b/src/utils_trust_region/EZFIO.cfg new file mode 100644 index 00000000..9c9f6248 --- /dev/null +++ b/src/utils_trust_region/EZFIO.cfg @@ -0,0 +1,89 @@ +[thresh_delta] +type: double precision +doc: Threshold to stop the optimization if the radius of the trust region delta < thresh_delta +interface: ezfio,provider,ocaml +default: 1.e-10 + +[thresh_rho] +type: double precision +doc: Threshold for the step acceptance in the trust region algorithm, if (rho .geq. thresh_rho) the step is accepted, else the step is cancelled and a smaller step is tried until (rho .geq. thresh_rho) +interface: ezfio,provider,ocaml +default: 0.1 + +[thresh_eig] +type: double precision +doc: Threshold to consider when an eigenvalue is 0 in the trust region algorithm +interface: ezfio,provider,ocaml +default: 1.e-12 + +[thresh_model] +type: double precision +doc: If if ABS(criterion - criterion_model) < thresh_model, the program exit the trust region algorithm +interface: ezfio,provider,ocaml +default: 1.e-12 + +[absolute_eig] +type: logical +doc: If True, the algorithm replace the eigenvalues of the hessian by their absolute value to compute the step (in the trust region) +interface: ezfio,provider,ocaml +default: false + +[thresh_wtg] +type: double precision +doc: Threshold in the trust region algorithm to considere when the dot product of the eigenvector W by the gradient v_grad is equal to 0. Must be smaller than thresh_eig by several order of magnitude to avoid numerical problem. If the research of the optimal lambda cannot reach the condition (||x|| .eq. delta) because (||x|| .lt. delta), the reason might be that thresh_wtg is too big or/and thresh_eig is too small +interface: ezfio,provider,ocaml +default: 1.e-6 + +[thresh_wtg2] +type: double precision +doc: Threshold in the trust region algorithm to considere when the dot product of the eigenvector W by the gradient v_grad is 0 in the case of avoid_saddle .eq. true. There is no particular reason to put a different value that thresh_wtg, but it can be useful one day +interface: ezfio,provider,ocaml +default: 1.e-6 + +[avoid_saddle] +type: logical +doc: Test to avoid saddle point, active if true +interface: ezfio,provider,ocaml +default: false + +[version_avoid_saddle] +type: integer +doc: cf. trust region, not stable +interface: ezfio,provider,ocaml +default: 3 + +[thresh_rho_2] +type: double precision +doc: Threshold for the step acceptance for the research of lambda in the trust region algorithm, if (rho_2 .geq. thresh_rho_2) the step is accepted, else the step is rejected +interface: ezfio,provider,ocaml +default: 0.1 + +[thresh_cc] +type: double precision +doc: Threshold to stop the research of the optimal lambda in the trust region algorithm when (dabs(1d0-||x||^2/delta^2) < thresh_cc) +interface: ezfio,provider,ocaml +default: 1.e-6 + +[thresh_model_2] +type: double precision +doc: if (ABS(criterion - criterion_model) < thresh_model_2), i.e., the difference between the actual criterion and the predicted next criterion, during the research of the optimal lambda in the trust region algorithm it prints a warning +interface: ezfio,provider,ocaml +default: 1.e-12 + +[version_lambda_search] +type: integer +doc: Research of the optimal lambda in the trust region algorithm to constrain the norm of the step by solving: 1 -> ||x||^2 - delta^2 .eq. 0, 2 -> 1/||x||^2 - 1/delta^2 .eq. 0 +interface: ezfio,provider,ocaml +default: 2 + +[nb_it_max_lambda] +type: integer +doc: Maximal number of iterations for the research of the optimal lambda in the trust region algorithm +interface: ezfio,provider,ocaml +default: 100 + +[nb_it_max_pre_search] +type: integer +doc: Maximal number of iterations for the pre-research of the optimal lambda in the trust region algorithm +interface: ezfio,provider,ocaml +default: 40 diff --git a/src/utils_trust_region/NEED b/src/utils_trust_region/NEED new file mode 100644 index 00000000..1a65ce38 --- /dev/null +++ b/src/utils_trust_region/NEED @@ -0,0 +1 @@ +hartree_fock diff --git a/src/utils_trust_region/README.md b/src/utils_trust_region/README.md new file mode 100644 index 00000000..72bfefef --- /dev/null +++ b/src/utils_trust_region/README.md @@ -0,0 +1,11 @@ +# Utils trust region + +The documentation can be found in the org files. + +# Org files +The org files are stored in the directory org in order to avoid overwriting on user changes. +The org files can be modified, to export the change to the source code, run +``` +./TANGLE_org_mode.sh +mv *.irp.f ../. +``` diff --git a/src/utils_trust_region/algo_trust.irp.f b/src/utils_trust_region/algo_trust.irp.f new file mode 100644 index 00000000..933d8eff --- /dev/null +++ b/src/utils_trust_region/algo_trust.irp.f @@ -0,0 +1,248 @@ +! Algorithm for the trust region + +! step_in_trust_region: +! Computes the step in the trust region (delta) +! (automatically sets at the iteration 0 and which evolves during the +! process in function of the evolution of rho). The step is computing by +! constraining its norm with a lagrange multiplier. +! Since the calculation of the step is based on the Newton method, an +! estimation of the gain in energy is given using the Taylors series +! truncated at the second order (criterion_model). +! If (DABS(criterion-criterion_model) < 1d-12) then +! must_exit = .True. +! else +! must_exit = .False. + +! This estimation of the gain in energy is used by +! is_step_cancel_trust_region to say if the step is accepted or cancelled. + +! If the step must be cancelled, the calculation restart from the same +! hessian and gradient and recomputes the step but in a smaller trust +! region and so on until the step is accepted. If the step is accepted +! the hessian and the gradient are recomputed to produce a new step. + +! Example: + + +! !### Initialization ### +! delta = 0d0 +! nb_iter = 0 ! Must start at 0 !!! +! rho = 0.5d0 +! not_converged = .True. +! +! ! ### TODO ### +! ! Compute the criterion before the loop +! call #your_criterion(prev_criterion) +! +! do while (not_converged) +! ! ### TODO ## +! ! Call your gradient +! ! Call you hessian +! call #your_gradient(v_grad) (1D array) +! call #your_hessian(H) (2D array) +! +! ! ### TODO ### +! ! Diagonalization of the hessian +! call diagonalization_hessian(n,H,e_val,w) +! +! cancel_step = .True. ! To enter in the loop just after +! ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho +! do while (cancel_step) +! +! ! Hessian,gradient,Criterion -> x +! call trust_region_step_w_expected_e(tmp_n,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit) +! +! if (must_exit) then +! ! ### Message ### +! ! if step_in_trust_region sets must_exit on true for numerical reasons +! print*,'algo_trust1 sends the message : Exit' +! !### exit ### +! endif +! +! !### TODO ### +! ! Compute x -> m_x +! ! Compute m_x -> R +! ! Apply R and keep the previous MOs... +! ! Update/touch +! ! Compute the new criterion/energy -> criterion +! +! call #your_routine_1D_to_2D_antisymmetric_array(x,m_x) +! call #your_routine_2D_antisymmetric_array_to_rotation_matrix(m_x,R) +! call #your_routine_to_apply_the_rotation_matrix(R,prev_mos) +! +! TOUCH #your_variables +! +! call #your_criterion(criterion) +! +! ! Criterion -> step accepted or rejected +! call trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) +! +! ! ### TODO ### +! !if (cancel_step) then +! ! Cancel the previous step (mo_coef = prev_mos if you keep them...) +! !endif +! #if (cancel_step) then +! #mo_coef = prev_mos +! #endif +! +! enddo +! +! !call save_mos() !### depend of the time for 1 iteration +! +! ! To exit the external loop if must_exit = .True. +! if (must_exit) then +! !### exit ### +! endif +! +! ! Step accepted, nb iteration + 1 +! nb_iter = nb_iter + 1 +! +! ! ### TODO ### +! !if (###Conditions###) then +! ! no_converged = .False. +! !endif +! #if (#your_conditions) then +! # not_converged = .False. +! #endif +! +! enddo + + + +! Variables: + +! Input: +! | n | integer | m*(m-1)/2 | +! | m | integer | number of mo in the mo_class | +! | H(n,n) | double precision | Hessian | +! | v_grad(n) | double precision | Gradient | +! | W(n,n) | double precision | Eigenvectors of the hessian | +! | e_val(n) | double precision | Eigenvalues of the hessian | +! | criterion | double precision | Actual criterion | +! | prev_criterion | double precision | Value of the criterion before the first iteration/after the previous iteration | +! | rho | double precision | Given by is_step_cancel_trus_region | +! | | | Agreement between the real function and the Taylor series (2nd order) | +! | nb_iter | integer | Actual number of iterations | + +! Input/output: +! | delta | double precision | Radius of the trust region | + +! Output: +! | criterion_model | double precision | Predicted criterion after the rotation | +! | x(n) | double precision | Step | +! | must_exit | logical | If the program must exit the loop | + + +subroutine trust_region_step_w_expected_e(n,n2,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the step and the expected criterion/energy after the step + !END_DOC + + implicit none + + ! in + integer, intent(in) :: n,n2, nb_iter + double precision, intent(in) :: H(n,n2), W(n,n2), v_grad(n) + double precision, intent(in) :: rho, prev_criterion + + ! inout + double precision, intent(inout) :: delta, e_val(n) + + ! out + double precision, intent(out) :: criterion_model, x(n) + logical, intent(out) :: must_exit + + ! internal + integer :: info + + must_exit = .False. + + call trust_region_step(n,n2,nb_iter,v_grad,rho,e_val,W,x,delta) + + call trust_region_expected_e(n,n2,v_grad,H,x,prev_criterion,criterion_model) + + ! exit if DABS(prev_criterion - criterion_model) < 1d-12 + if (DABS(prev_criterion - criterion_model) < thresh_model) then + print*,'' + print*,'###############################################################################' + print*,'DABS(prev_criterion - criterion_model) <', thresh_model, 'stop the trust region' + print*,'###############################################################################' + print*,'' + must_exit = .True. + endif + + if (delta < thresh_delta) then + print*,'' + print*,'##############################################' + print*,'Delta <', thresh_delta, 'stop the trust region' + print*,'##############################################' + print*,'' + must_exit = .True. + endif + + ! Add after the call to this subroutine, a statement: + ! "if (must_exit) then + ! exit + ! endif" + ! in order to exit the optimization loop + +end subroutine + + + +! Variables: + +! Input: +! | nb_iter | integer | actual number of iterations | +! | prev_criterion | double precision | criterion before the application of the step x | +! | criterion | double precision | criterion after the application of the step x | +! | criterion_model | double precision | predicted criterion after the application of x | + +! Output: +! | rho | double precision | Agreement between the predicted criterion and the real new criterion | +! | cancel_step | logical | If the step must be cancelled | + + +subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) + + include 'pi.h' + + !BEGIN_DOC + ! Compute if the step should be cancelled + !END_DOC + + implicit none + + ! in + double precision, intent(in) :: prev_criterion, criterion, criterion_model + + ! inout + integer, intent(inout) :: nb_iter + + ! out + logical, intent(out) :: cancel_step + double precision, intent(out) :: rho + + ! Computes rho + call trust_region_rho(prev_criterion,criterion,criterion_model,rho) + + if (nb_iter == 0) then + nb_iter = 1 ! in order to enable the change of delta if the first iteration is cancelled + endif + + ! If rho < thresh_rho -> give something in output to cancel the step + if (rho >= thresh_rho) then !0.1d0) then + ! The step is accepted + cancel_step = .False. + else + ! The step is rejected + cancel_step = .True. + print*, '***********************' + print*, 'Step cancel : rho <', thresh_rho + print*, '***********************' + endif + +end subroutine diff --git a/src/utils_trust_region/apply_mo_rotation.irp.f b/src/utils_trust_region/apply_mo_rotation.irp.f new file mode 100644 index 00000000..a313769d --- /dev/null +++ b/src/utils_trust_region/apply_mo_rotation.irp.f @@ -0,0 +1,85 @@ +! Apply MO rotation +! Subroutine to apply the rotation matrix to the coefficients of the +! MOs. + +! New MOs = Old MOs . Rotation matrix + +! *Compute the new MOs with the previous MOs and a rotation matrix* + +! Provided: +! | mo_num | integer | number of MOs | +! | ao_num | integer | number of AOs | +! | mo_coef(ao_num,mo_num) | double precision | coefficients of the MOs | + +! Intent in: +! | R(mo_num,mo_num) | double precision | rotation matrix | + +! Intent out: +! | prev_mos(ao_num,mo_num) | double precision | MOs before the rotation | + +! Internal: +! | new_mos(ao_num,mo_num) | double precision | MOs after the rotation | +! | i,j | integer | indexes | + +subroutine apply_mo_rotation(R,prev_mos) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the new MOs knowing the rotation matrix + !END_DOC + + implicit none + + ! Variables + + ! in + double precision, intent(in) :: R(mo_num,mo_num) + + ! out + double precision, intent(out) :: prev_mos(ao_num,mo_num) + + ! internal + double precision, allocatable :: new_mos(:,:) + integer :: i,j + double precision :: t1,t2,t3 + + print*,'' + print*,'---apply_mo_rotation---' + + call wall_time(t1) + + ! Allocation + allocate(new_mos(ao_num,mo_num)) + + ! Calculation + + ! Product of old MOs (mo_coef) by Rotation matrix (R) + call dgemm('N','N',ao_num,mo_num,mo_num,1d0,mo_coef,size(mo_coef,1),R,size(R,1),0d0,new_mos,size(new_mos,1)) + + prev_mos = mo_coef + mo_coef = new_mos + + if (debug) then + print*,'New mo_coef : ' + do i = 1, mo_num + write(*,'(100(F10.5))') mo_coef(i,:) + enddo + endif + + ! Save the new MOs and change the label + mo_label = 'MCSCF' + !call save_mos + call ezfio_set_determinants_mo_label(mo_label) + + !print*,'Done, MOs saved' + + ! Deallocation, end + deallocate(new_mos) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in apply mo rotation:', t3 + print*,'---End apply_mo_rotation---' + +end subroutine diff --git a/src/utils_trust_region/mat_to_vec_index.irp.f b/src/utils_trust_region/mat_to_vec_index.irp.f new file mode 100644 index 00000000..35e12232 --- /dev/null +++ b/src/utils_trust_region/mat_to_vec_index.irp.f @@ -0,0 +1,61 @@ +! Matrix to vector index + +! *Compute the index i of a vector element from the indexes p,q of a +! matrix element* + +! Lower diagonal matrix (p,q), p > q -> vector (i) + +! If a matrix is antisymmetric it can be reshaped as a vector. And the +! vector can be reshaped as an antisymmetric matrix + +! \begin{align*} +! \begin{pmatrix} +! 0 & -1 & -2 & -4 \\ +! 1 & 0 & -3 & -5 \\ +! 2 & 3 & 0 & -6 \\ +! 4 & 5 & 6 & 0 +! \end{pmatrix} +! \Leftrightarrow +! \begin{pmatrix} +! 1 & 2 & 3 & 4 & 5 & 6 +! \end{pmatrix} +! \end{align*} + +! !!! Here the algorithm only work for the lower diagonal !!! + +! Input: +! | p,q | integer | indexes of a matrix element in the lower diagonal | +! | | | p > q, q -> column | +! | | | p -> row, | +! | | | q -> column | + +! Input: +! | i | integer | corresponding index in the vector | + + +subroutine mat_to_vec_index(p,q,i) + + include 'pi.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: p,q + + ! out + integer, intent(out) :: i + + ! internal + integer :: a,b + double precision :: da + + ! Calculation + + a = p-1 + b = a*(a-1)/2 + + i = q+b + +end subroutine diff --git a/src/utils_trust_region/org/TANGLE_org_mode.sh b/src/utils_trust_region/org/TANGLE_org_mode.sh new file mode 100755 index 00000000..059cbe7d --- /dev/null +++ b/src/utils_trust_region/org/TANGLE_org_mode.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +list='ls *.org' +for element in $list +do + emacs --batch $element -f org-babel-tangle +done diff --git a/src/utils_trust_region/org/algo_trust.org b/src/utils_trust_region/org/algo_trust.org new file mode 100644 index 00000000..01e99c29 --- /dev/null +++ b/src/utils_trust_region/org/algo_trust.org @@ -0,0 +1,593 @@ +* Algorithm for the trust region + +step_in_trust_region: +Computes the step in the trust region (delta) +(automatically sets at the iteration 0 and which evolves during the +process in function of the evolution of rho). The step is computing by +constraining its norm with a lagrange multiplier. +Since the calculation of the step is based on the Newton method, an +estimation of the gain in energy is given using the Taylors series +truncated at the second order (criterion_model). +If (DABS(criterion-criterion_model) < 1d-12) then + must_exit = .True. +else + must_exit = .False. + +This estimation of the gain in energy is used by +is_step_cancel_trust_region to say if the step is accepted or cancelled. + +If the step must be cancelled, the calculation restart from the same +hessian and gradient and recomputes the step but in a smaller trust +region and so on until the step is accepted. If the step is accepted +the hessian and the gradient are recomputed to produce a new step. + +Example: + +#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f +! !### Initialization ### +! delta = 0d0 +! nb_iter = 0 ! Must start at 0 !!! +! rho = 0.5d0 +! not_converged = .True. +! +! ! ### TODO ### +! ! Compute the criterion before the loop +! call #your_criterion(prev_criterion) +! +! do while (not_converged) +! ! ### TODO ## +! ! Call your gradient +! ! Call you hessian +! call #your_gradient(v_grad) (1D array) +! call #your_hessian(H) (2D array) +! +! ! ### TODO ### +! ! Diagonalization of the hessian +! call diagonalization_hessian(n,H,e_val,w) +! +! cancel_step = .True. ! To enter in the loop just after +! ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho +! do while (cancel_step) +! +! ! Hessian,gradient,Criterion -> x +! call trust_region_step_w_expected_e(tmp_n,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit) +! +! if (must_exit) then +! ! ### Message ### +! ! if step_in_trust_region sets must_exit on true for numerical reasons +! print*,'algo_trust1 sends the message : Exit' +! !### exit ### +! endif +! +! !### TODO ### +! ! Compute x -> m_x +! ! Compute m_x -> R +! ! Apply R and keep the previous MOs... +! ! Update/touch +! ! Compute the new criterion/energy -> criterion +! +! call #your_routine_1D_to_2D_antisymmetric_array(x,m_x) +! call #your_routine_2D_antisymmetric_array_to_rotation_matrix(m_x,R) +! call #your_routine_to_apply_the_rotation_matrix(R,prev_mos) +! +! TOUCH #your_variables +! +! call #your_criterion(criterion) +! +! ! Criterion -> step accepted or rejected +! call trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) +! +! ! ### TODO ### +! !if (cancel_step) then +! ! Cancel the previous step (mo_coef = prev_mos if you keep them...) +! !endif +! #if (cancel_step) then +! #mo_coef = prev_mos +! #endif +! +! enddo +! +! !call save_mos() !### depend of the time for 1 iteration +! +! ! To exit the external loop if must_exit = .True. +! if (must_exit) then +! !### exit ### +! endif +! +! ! Step accepted, nb iteration + 1 +! nb_iter = nb_iter + 1 +! +! ! ### TODO ### +! !if (###Conditions###) then +! ! no_converged = .False. +! !endif +! #if (#your_conditions) then +! # not_converged = .False. +! #endif +! +! enddo +#+END_SRC + +Variables: + +Input: +| n | integer | m*(m-1)/2 | +| m | integer | number of mo in the mo_class | +| H(n,n) | double precision | Hessian | +| v_grad(n) | double precision | Gradient | +| W(n,n) | double precision | Eigenvectors of the hessian | +| e_val(n) | double precision | Eigenvalues of the hessian | +| criterion | double precision | Actual criterion | +| prev_criterion | double precision | Value of the criterion before the first iteration/after the previous iteration | +| rho | double precision | Given by is_step_cancel_trus_region | +| | | Agreement between the real function and the Taylor series (2nd order) | +| nb_iter | integer | Actual number of iterations | + +Input/output: +| delta | double precision | Radius of the trust region | + +Output: +| criterion_model | double precision | Predicted criterion after the rotation | +| x(n) | double precision | Step | +| must_exit | logical | If the program must exit the loop | + +#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f +subroutine trust_region_step_w_expected_e(n,n2,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the step and the expected criterion/energy after the step + !END_DOC + + implicit none + + ! in + integer, intent(in) :: n,n2, nb_iter + double precision, intent(in) :: H(n,n2), W(n,n2), v_grad(n) + double precision, intent(in) :: rho, prev_criterion + + ! inout + double precision, intent(inout) :: delta, e_val(n) + + ! out + double precision, intent(out) :: criterion_model, x(n) + logical, intent(out) :: must_exit + + ! internal + integer :: info + + must_exit = .False. + + call trust_region_step(n,n2,nb_iter,v_grad,rho,e_val,W,x,delta) + + call trust_region_expected_e(n,n2,v_grad,H,x,prev_criterion,criterion_model) + + ! exit if DABS(prev_criterion - criterion_model) < 1d-12 + if (DABS(prev_criterion - criterion_model) < thresh_model) then + print*,'' + print*,'###############################################################################' + print*,'DABS(prev_criterion - criterion_model) <', thresh_model, 'stop the trust region' + print*,'###############################################################################' + print*,'' + must_exit = .True. + endif + + if (delta < thresh_delta) then + print*,'' + print*,'##############################################' + print*,'Delta <', thresh_delta, 'stop the trust region' + print*,'##############################################' + print*,'' + must_exit = .True. + endif + + ! Add after the call to this subroutine, a statement: + ! "if (must_exit) then + ! exit + ! endif" + ! in order to exit the optimization loop + +end subroutine +#+END_SRC + +Variables: + +Input: +| nb_iter | integer | actual number of iterations | +| prev_criterion | double precision | criterion before the application of the step x | +| criterion | double precision | criterion after the application of the step x | +| criterion_model | double precision | predicted criterion after the application of x | + +Output: +| rho | double precision | Agreement between the predicted criterion and the real new criterion | +| cancel_step | logical | If the step must be cancelled | + +#+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f +subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) + + include 'pi.h' + + !BEGIN_DOC + ! Compute if the step should be cancelled + !END_DOC + + implicit none + + ! in + double precision, intent(in) :: prev_criterion, criterion, criterion_model + + ! inout + integer, intent(inout) :: nb_iter + + ! out + logical, intent(out) :: cancel_step + double precision, intent(out) :: rho + + ! Computes rho + call trust_region_rho(prev_criterion,criterion,criterion_model,rho) + + if (nb_iter == 0) then + nb_iter = 1 ! in order to enable the change of delta if the first iteration is cancelled + endif + + ! If rho < thresh_rho -> give something in output to cancel the step + if (rho >= thresh_rho) then !0.1d0) then + ! The step is accepted + cancel_step = .False. + else + ! The step is rejected + cancel_step = .True. + print*, '***********************' + print*, 'Step cancel : rho <', thresh_rho + print*, '***********************' + endif + +end subroutine +#+END_SRC + +** Template for MOs +#+BEGIN_SRC f90 :comments org :tangle trust_region_template_mos.txt +subroutine algo_trust_template(tmp_n, tmp_list_size, tmp_list) + + implicit none + + ! Variables + + ! In + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + + ! Out + ! Rien ou un truc pour savoir si ça c'est bien passé + + ! Internal + double precision, allocatable :: e_val(:), W(:,:), tmp_R(:,:), R(:,:), tmp_x(:), tmp_m_x(:,:) + double precision, allocatable :: prev_mos(:,:) + double precision :: criterion, prev_criterion, criterion_model + double precision :: delta, rho + logical :: not_converged, cancel_step, must_exit, enforce_step_cancellation + integer :: nb_iter, info, nb_sub_iter + integer :: i,j,tmp_i,tmp_j + + allocate(W(tmp_n, tmp_n),e_val(tmp_n),tmp_x(tmp_n),tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size), R(mo_num, mo_num)) + allocate(prev_mos(ao_num, mo_num)) + + ! Provide the criterion, but unnecessary because it's done + ! automatically + PROVIDE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + + ! Initialization + delta = 0d0 + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must start at 0.5 + not_converged = .True. ! Must be true + + ! Compute the criterion before the loop + prev_criterion = C_PROVIDER + + do while (not_converged) + + print*,'' + print*,'******************' + print*,'Iteration', nb_iter + print*,'******************' + print*,'' + + ! The new hessian and gradient are computed at the end of the previous iteration + ! Diagonalization of the hessian + call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W) + + cancel_step = .True. ! To enter in the loop just after + nb_sub_iter = 0 + + ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + + print*,'-----------------------------' + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n,tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, & + prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + if (must_exit) then + ! if step_in_trust_region sets must_exit on true for numerical reasons + print*,'trust_region_step_w_expected_e sent the message : Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) + + ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, info, enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Forces the step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + + ! Rotation of the MOs + call apply_mo_rotation(R, prev_mos) + + ! touch mo_coef + call clear_mo_map ! Only if you are using the bi-electronic integrals + ! mo_coef becomes valid + ! And avoid the recomputation of the providers which depend of mo_coef + TOUCH mo_coef C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + + ! To update the other parameters if needed + call #update_parameters() + + ! To enforce the program to provide new criterion after the update + ! of the parameters + FREE C_PROVIDER + PROVIDE C_PROVIDER + criterion = C_PROVIDER + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step) + + ! Cancellation of the step ? + if (cancel_step) then + ! Replacement by the previous MOs + mo_coef = prev_mos + ! call save_mos() ! depends of the time for 1 iteration + + ! No need to clear_mo_map since we don't recompute the gradient and the hessian + ! mo_coef becomes valid + ! Avoid the recomputation of the providers which depend of mo_coef + TOUCH mo_coef H_PROVIDER g_PROVIDER C_PROVIDER cc_PROVIDER + else + ! The step is accepted: + ! criterion -> prev criterion + + ! The replacement "criterion -> prev criterion" is already done + ! in trust_region_rho, so if the criterion does not have a reason + ! to change, it will change nothing for the criterion and will + ! force the program to provide the new hessian, gradient and + ! convergence criterion for the next iteration. + ! But in the case of orbital optimization we diagonalize the CI + ! matrix after the "FREE" statement, so the criterion will change + + FREE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + PROVIDE C_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + prev_criterion = C_PROVIDER + + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + + ! call save_mos() ! depends of the time for 1 iteration + + ! To exit the external loop if must_exit = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! Provide the convergence criterion + ! Provide the gradient and the hessian for the next iteration + PROVIDE cc_PROVIDER + + ! To exit + if (dabs(cc_PROVIDER) < thresh_opt_max_elem_grad) then + not_converged = .False. + endif + + if (nb_iter > optimization_max_nb_iter) then + not_converged = .False. + endif + + if (delta < thresh_delta) then + not_converged = .False. + endif + + enddo + + ! Save the final MOs + call save_mos() + + ! Diagonalization of the hessian + ! (To see the eigenvalues at the end of the optimization) + call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W) + + deallocate(e_val, W, tmp_R, R, tmp_x, prev_mos) + +end +#+END_SRC + +** Cartesian version +#+BEGIN_SRC f90 :comments org :tangle trust_region_template_xyz.txt +subroutine algo_trust_cartesian_template(tmp_n) + + implicit none + + ! Variables + + ! In + integer, intent(in) :: tmp_n + + ! Out + ! Rien ou un truc pour savoir si ça c'est bien passé + + ! Internal + double precision, allocatable :: e_val(:), W(:,:), tmp_x(:) + double precision :: criterion, prev_criterion, criterion_model + double precision :: delta, rho + logical :: not_converged, cancel_step, must_exit + integer :: nb_iter, nb_sub_iter + integer :: i,j + + allocate(W(tmp_n, tmp_n),e_val(tmp_n),tmp_x(tmp_n)) + + PROVIDE C_PROVIDER X_PROVIDER H_PROVIDER g_PROVIDER + + ! Initialization + delta = 0d0 + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must start at 0.5 + not_converged = .True. ! Must be true + + ! Compute the criterion before the loop + prev_criterion = C_PROVIDER + + do while (not_converged) + + print*,'' + print*,'******************' + print*,'Iteration', nb_iter + print*,'******************' + print*,'' + + if (nb_iter > 0) then + PROVIDE H_PROVIDER g_PROVIDER + endif + + ! Diagonalization of the hessian + call diagonalization_hessian(tmp_n, H_PROVIDER, e_val, W) + + cancel_step = .True. ! To enter in the loop just after + nb_sub_iter = 0 + + ! Loop to Reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + + print*,'-----------------------------' + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n,tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, & + prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + if (must_exit) then + ! if step_in_trust_region sets must_exit on true for numerical reasons + print*,'trust_region_step_w_expected_e sent the message : Exit' + exit + endif + + ! New coordinates, check the sign + X_PROVIDER = X_PROVIDER - tmp_x + + ! touch X_PROVIDER + TOUCH X_PROVIDER H_PROVIDER g_PROVIDER cc_PROVIDER + + ! To update the other parameters if needed + call #update_parameters() + + ! New criterion + PROVIDE C_PROVIDER ! Unnecessary + criterion = C_PROVIDER + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step) + + ! Cancel the previous step + if (cancel_step) then + ! Replacement by the previous coordinates, check the sign + X_PROVIDER = X_PROVIDER + tmp_x + + ! Avoid the recomputation of the hessian and the gradient + TOUCH X_PROVIDER H_PROVIDER g_PROVIDER C_PROVIDER cc_PROVIDER + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + + ! To exit the external loop if must_exit = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + PROVIDE cc_PROVIDER + + ! To exit + if (dabs(cc_PROVIDER) < thresh_opt_max_elem_grad) then + not_converged = .False. + endif + + if (nb_iter > optimization_max_nb_iter) then + not_converged = .False. + endif + + if (delta < thresh_delta) then + not_converged = .False. + endif + + enddo + + deallocate(e_val, W, tmp_x) + +end +#+END_SRC + +** Script template +#+BEGIN_SRC bash :tangle script_template_mos.sh +#!/bin/bash + +your_file= + +your_C_PROVIDER= +your_H_PROVIDER= +your_g_PROVIDER= +your_cc_PROVIDER= + +sed "s/C_PROVIDER/$your_C_PROVIDER/g" trust_region_template_mos.txt > $your_file +sed -i "s/H_PROVIDER/$your_H_PROVIDER/g" $your_file +sed -i "s/g_PROVIDER/$your_g_PROVIDER/g" $your_file +sed -i "s/cc_PROVIDER/$your_cc_PROVIDER/g" $your_file +#+END_SRC + +#+BEGIN_SRC bash :tangle script_template_xyz.sh +#!/bin/bash + +your_file= + +your_C_PROVIDER= +your_X_PROVIDER= +your_H_PROVIDER= +your_g_PROVIDER= +your_cc_PROVIDER= + +sed "s/C_PROVIDER/$your_C_PROVIDER/g" trust_region_template_xyz.txt > $your_file +sed -i "s/X_PROVIDER/$your_X_PROVIDER/g" $your_file +sed -i "s/H_PROVIDER/$your_H_PROVIDER/g" $your_file +sed -i "s/g_PROVIDER/$your_g_PROVIDER/g" $your_file +sed -i "s/cc_PROVIDER/$your_cc_PROVIDER/g" $your_file +#+END_SRC + diff --git a/src/utils_trust_region/org/apply_mo_rotation.org b/src/utils_trust_region/org/apply_mo_rotation.org new file mode 100644 index 00000000..955997e9 --- /dev/null +++ b/src/utils_trust_region/org/apply_mo_rotation.org @@ -0,0 +1,86 @@ +* Apply MO rotation +Subroutine to apply the rotation matrix to the coefficients of the +MOs. + +New MOs = Old MOs . Rotation matrix + +*Compute the new MOs with the previous MOs and a rotation matrix* + +Provided: +| mo_num | integer | number of MOs | +| ao_num | integer | number of AOs | +| mo_coef(ao_num,mo_num) | double precision | coefficients of the MOs | + +Intent in: +| R(mo_num,mo_num) | double precision | rotation matrix | + +Intent out: +| prev_mos(ao_num,mo_num) | double precision | MOs before the rotation | + +Internal: +| new_mos(ao_num,mo_num) | double precision | MOs after the rotation | +| i,j | integer | indexes | +#+BEGIN_SRC f90 :comments org :tangle apply_mo_rotation.irp.f +subroutine apply_mo_rotation(R,prev_mos) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the new MOs knowing the rotation matrix + !END_DOC + + implicit none + + ! Variables + + ! in + double precision, intent(in) :: R(mo_num,mo_num) + + ! out + double precision, intent(out) :: prev_mos(ao_num,mo_num) + + ! internal + double precision, allocatable :: new_mos(:,:) + integer :: i,j + double precision :: t1,t2,t3 + + print*,'' + print*,'---apply_mo_rotation---' + + call wall_time(t1) + + ! Allocation + allocate(new_mos(ao_num,mo_num)) + + ! Calculation + + ! Product of old MOs (mo_coef) by Rotation matrix (R) + call dgemm('N','N',ao_num,mo_num,mo_num,1d0,mo_coef,size(mo_coef,1),R,size(R,1),0d0,new_mos,size(new_mos,1)) + + prev_mos = mo_coef + mo_coef = new_mos + + if (debug) then + print*,'New mo_coef : ' + do i = 1, mo_num + write(*,'(100(F10.5))') mo_coef(i,:) + enddo + endif + + ! Save the new MOs and change the label + mo_label = 'MCSCF' + !call save_mos + call ezfio_set_determinants_mo_label(mo_label) + + !print*,'Done, MOs saved' + + ! Deallocation, end + deallocate(new_mos) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in apply mo rotation:', t3 + print*,'---End apply_mo_rotation---' + +end subroutine +#+END_SRC diff --git a/src/utils_trust_region/org/mat_to_vec_index.org b/src/utils_trust_region/org/mat_to_vec_index.org new file mode 100644 index 00000000..50840584 --- /dev/null +++ b/src/utils_trust_region/org/mat_to_vec_index.org @@ -0,0 +1,63 @@ +* Matrix to vector index + +*Compute the index i of a vector element from the indexes p,q of a +matrix element* + +Lower diagonal matrix (p,q), p > q -> vector (i) + +If a matrix is antisymmetric it can be reshaped as a vector. And the +vector can be reshaped as an antisymmetric matrix + +\begin{align*} +\begin{pmatrix} +0 & -1 & -2 & -4 \\ +1 & 0 & -3 & -5 \\ +2 & 3 & 0 & -6 \\ +4 & 5 & 6 & 0 +\end{pmatrix} +\Leftrightarrow +\begin{pmatrix} +1 & 2 & 3 & 4 & 5 & 6 +\end{pmatrix} +\end{align*} + +!!! Here the algorithm only work for the lower diagonal !!! + +Input: +| p,q | integer | indexes of a matrix element in the lower diagonal | +| | | p > q, q -> column | +| | | p -> row, | +| | | q -> column | + +Input: +| i | integer | corresponding index in the vector | + +#+BEGIN_SRC f90 :comments org :tangle mat_to_vec_index.irp.f +subroutine mat_to_vec_index(p,q,i) + + include 'pi.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: p,q + + ! out + integer, intent(out) :: i + + ! internal + integer :: a,b + double precision :: da + + ! Calculation + + a = p-1 + b = a*(a-1)/2 + + i = q+b + +end subroutine +#+END_SRC + diff --git a/src/utils_trust_region/org/rotation_matrix.org b/src/utils_trust_region/org/rotation_matrix.org new file mode 100644 index 00000000..3b2ff437 --- /dev/null +++ b/src/utils_trust_region/org/rotation_matrix.org @@ -0,0 +1,452 @@ +* Rotation matrix + +*Build a rotation matrix from an antisymmetric matrix* + +Compute a rotation matrix $\textbf{R}$ from an antisymmetric matrix $$\textbf{A}$$ such as : +$$ +\textbf{R}=\exp(\textbf{A}) +$$ + +So : +\begin{align*} +\textbf{R}=& \exp(\textbf{A}) \\ +=& \sum_k^{\infty} \frac{1}{k!}\textbf{A}^k \\ +=& \textbf{W} \cdot \cos(\tau) \cdot \textbf{W}^{\dagger} + \textbf{W} \cdot \tau^{-1} \cdot \sin(\tau) \cdot \textbf{W}^{\dagger} \cdot \textbf{A} +\end{align*} + +With : +$\textbf{W}$ : eigenvectors of $\textbf{A}^2$ +$\tau$ : $\sqrt{-x}$ +$x$ : eigenvalues of $\textbf{A}^2$ + +Input: +| A(n,n) | double precision | antisymmetric matrix | +| n | integer | number of columns of the A matrix | +| LDA | integer | specifies the leading dimension of A, must be at least max(1,n) | +| LDR | integer | specifies the leading dimension of R, must be at least max(1,n) | + +Output: +| R(n,n) | double precision | Rotation matrix | +| info | integer | if info = 0, the execution is successful | +| | | if info = k, the k-th parameter has an illegal value | +| | | if info = -k, the algorithm failed | + +Internal: +| B(n,n) | double precision | B = A.A | +| work(lwork,n) | double precision | work matrix for dysev, dimension max(1,lwork) | +| lwork | integer | dimension of the syev work array >= max(1, 3n-1) | +| W(n,n) | double precision | eigenvectors of B | +| e_val(n) | double precision | eigenvalues of B | +| m_diag(n,n) | double precision | diagonal matrix with the eigenvalues of B | +| cos_tau(n,n) | double precision | diagonal matrix with cos(tau) values | +| sin_tau(n,n) | double precision | diagonal matrix with sin cos(tau) values | +| tau_m1(n,n) | double precision | diagonal matrix with (tau)^-1 values | +| part_1(n,n) | double precision | matrix W.cos_tau.W^t | +| part_1a(n,n) | double precision | matrix cos_tau.W^t | +| part_2(n,n) | double precision | matrix W.tau_m1.sin_tau.W^t.A | +| part_2a(n,n) | double precision | matrix W^t.A | +| part_2b(n,n) | double precision | matrix sin_tau.W^t.A | +| part_2c(n,n) | double precision | matrix tau_m1.sin_tau.W^t.A | +| RR_t(n,n) | double precision | R.R^t must be equal to the identity<=> R.R^t-1=0 <=> norm = 0 | +| norm | integer | norm of R.R^t-1, must be equal to 0 | +| i,j | integer | indexes | + +Functions: +| dnrm2 | double precision | Lapack function, compute the norm of a matrix | +| disnan | logical | Lapack function, check if an element is NaN | + + +#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f +subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) + + implicit none + + !BEGIN_DOC + ! Rotation matrix to rotate the molecular orbitals. + ! If the rotation is too large the transformation is not unitary and must be cancelled. + !END_DOC + + include 'pi.h' + + ! Variables + + ! in + integer, intent(in) :: n,LDA,LDR + double precision, intent(inout) :: A(LDA,n) + + ! out + double precision, intent(out) :: R(LDR,n) + integer, intent(out) :: info + logical, intent(out) :: enforce_step_cancellation + + ! internal + double precision, allocatable :: B(:,:) + double precision, allocatable :: work(:,:) + double precision, allocatable :: W(:,:), e_val(:) + double precision, allocatable :: m_diag(:,:),cos_tau(:,:),sin_tau(:,:),tau_m1(:,:) + double precision, allocatable :: part_1(:,:),part_1a(:,:) + double precision, allocatable :: part_2(:,:),part_2a(:,:),part_2b(:,:),part_2c(:,:) + double precision, allocatable :: RR_t(:,:) + integer :: i,j + integer :: info2, lwork ! for dsyev + double precision :: norm, max_elem, max_elem_A, t1,t2,t3 + + ! function + double precision :: dnrm2 + logical :: disnan + + print*,'' + print*,'---rotation_matrix---' + + call wall_time(t1) + + ! Allocation + allocate(B(n,n)) + allocate(m_diag(n,n),cos_tau(n,n),sin_tau(n,n),tau_m1(n,n)) + allocate(W(n,n),e_val(n)) + allocate(part_1(n,n),part_1a(n,n)) + allocate(part_2(n,n),part_2a(n,n),part_2b(n,n),part_2c(n,n)) + allocate(RR_t(n,n)) +#+END_SRC + +** Pre-conditions +#+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! Initialization + info=0 + enforce_step_cancellation = .False. + + ! Size of matrix A must be at least 1 by 1 + if (n<1) then + info = 3 + print*, 'WARNING: invalid parameter 5' + print*, 'n<1' + return + endif + + ! Leading dimension of A must be >= n + if (LDA < n) then + info = 25 + print*, 'WARNING: invalid parameter 2 or 5' + print*, 'LDA < n' + return + endif + + ! Leading dimension of A must be >= n + if (LDR < n) then + info = 4 + print*, 'WARNING: invalid parameter 4' + print*, 'LDR < n' + return + endif + + ! Matrix elements of A must by non-NaN + do j = 1, n + do i = 1, n + if (disnan(A(i,j))) then + info=1 + print*, 'WARNING: invalid parameter 1' + print*, 'NaN element in A matrix' + return + endif + enddo + enddo + + do i = 1, n + if (A(i,i) /= 0d0) then + print*, 'WARNING: matrix A is not antisymmetric' + print*, 'Non 0 element on the diagonal', i, A(i,i) + call ABORT + endif + enddo + + do j = 1, n + do i = 1, n + if (A(i,j)+A(j,i)>1d-16) then + print*, 'WANRING: matrix A is not antisymmetric' + print*, 'A(i,j) /= - A(j,i):', i,j,A(i,j), A(j,i) + print*, 'diff:', A(i,j)+A(j,i) + call ABORT + endif + enddo + enddo + + ! Fix for too big elements ! bad idea better to cancel if the error is too big + !do j = 1, n + ! do i = 1, n + ! A(i,j) = mod(A(i,j),2d0*pi) + ! if (dabs(A(i,j)) > pi) then + ! A(i,j) = 0d0 + ! endif + ! enddo + !enddo + + max_elem_A = 0d0 + do j = 1, n + do i = 1, n + if (ABS(A(i,j)) > ABS(max_elem_A)) then + max_elem_A = A(i,j) + endif + enddo + enddo + !print*,'max element in A', max_elem_A + + if (ABS(max_elem_A) > 2 * pi) then + print*,'' + print*,'WARNING: ABS(max_elem_A) > 2 pi ' + print*,'' + endif + +#+END_SRC + +** Calculations + +*** B=A.A + - Calculation of the matrix $\textbf{B} = \textbf{A}^2$ + - Diagonalization of $\textbf{B}$ + W, the eigenvectors + e_val, the eigenvalues + + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! Compute B=A.A + + call dgemm('N','N',n,n,n,1d0,A,size(A,1),A,size(A,1),0d0,B,size(B,1)) + + ! Copy B in W, diagonalization will put the eigenvectors in W + W=B + + ! Diagonalization of B + ! Eigenvalues -> e_val + ! Eigenvectors -> W + lwork = 3*n-1 + allocate(work(lwork,n)) + + !print*,'Starting diagonalization ...' + + call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2) + + deallocate(work) + + if (info2 < 0) then + print*, 'WARNING: error in the diagonalization' + print*, 'Illegal value of the ', info2,'-th parameter' + elseif (info2 >0) then + print*, "WARNING: Diagonalization failed to converge" + endif + #+END_SRC + +*** Tau^-1, cos(tau), sin(tau) + $$\tau = \sqrt{-x}$$ + - Calculation of $\cos(\tau)$ $\Leftrightarrow$ $\cos(\sqrt{-x})$ + - Calculation of $\sin(\tau)$ $\Leftrightarrow$ $\sin(\sqrt{-x})$ + - Calculation of $\tau^{-1}$ $\Leftrightarrow$ $(\sqrt{-x})^{-1}$ + These matrices are diagonals + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! Diagonal matrix m_diag + do j = 1, n + if (e_val(j) >= -1d-12) then !0.d0) then !!! e_avl(i) must be < -1d-12 to avoid numerical problems + e_val(j) = 0.d0 + else + e_val(j) = - e_val(j) + endif + enddo + + m_diag = 0.d0 + do i = 1, n + m_diag(i,i) = e_val(i) + enddo + + ! cos_tau + do j = 1, n + do i = 1, n + if (i==j) then + cos_tau(i,j) = dcos(dsqrt(e_val(i))) + else + cos_tau(i,j) = 0d0 + endif + enddo + enddo + + ! sin_tau + do j = 1, n + do i = 1, n + if (i==j) then + sin_tau(i,j) = dsin(dsqrt(e_val(i))) + else + sin_tau(i,j) = 0d0 + endif + enddo + enddo + + ! Debug, display the cos_tau and sin_tau matrix + !if (debug) then + ! print*, 'cos_tau' + ! do i = 1, n + ! print*, cos_tau(i,:) + ! enddo + ! print*, 'sin_tau' + ! do i = 1, n + ! print*, sin_tau(i,:) + ! enddo + !endif + + ! tau^-1 + do j = 1, n + do i = 1, n + if ((i==j) .and. (e_val(i) > 1d-16)) then!0d0)) then !!! Convergence problem can come from here if the threshold is too big/small + tau_m1(i,j) = 1d0/(dsqrt(e_val(i))) + else + tau_m1(i,j) = 0d0 + endif + enddo + enddo + + max_elem = 0d0 + do i = 1, n + if (ABS(tau_m1(i,i)) > ABS(max_elem)) then + max_elem = tau_m1(i,i) + endif + enddo + !print*,'max elem tau^-1:', max_elem + + ! Debug + !print*,'eigenvalues:' + !do i = 1, n + ! print*, e_val(i) + !enddo + + !Debug, display tau^-1 + !if (debug) then + ! print*, 'tau^-1' + ! do i = 1, n + ! print*,tau_m1(i,:) + ! enddo + !endif + #+END_SRC + +*** Rotation matrix + \begin{align*} + \textbf{R} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} + \end{align*} + \begin{align*} + \textbf{Part1} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \end{align*} + \begin{align*} + \textbf{Part2} = \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} + \end{align*} + + First: + part_1 = dgemm(W, dgemm(cos_tau, W^t)) + part_1a = dgemm(cos_tau, W^t) + part_1 = dgemm(W, part_1a) + And: + part_2= dgemm(W, dgemm(tau_m1, dgemm(sin_tau, dgemm(W^t, A)))) + part_2a = dgemm(W^t, A) + part_2b = dgemm(sin_tau, part_2a) + part_2c = dgemm(tau_m1, part_2b) + part_2 = dgemm(W, part_2c) + Finally: + Rotation matrix, R = part_1+part_2 + + If $R$ is a rotation matrix: + $R.R^T=R^T.R=\textbf{1}$ + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! part_1 + call dgemm('N','T',n,n,n,1d0,cos_tau,size(cos_tau,1),W,size(W,1),0d0,part_1a,size(part_1a,1)) + call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_1a,size(part_1a,1),0d0,part_1,size(part_1,1)) + + ! part_2 + call dgemm('T','N',n,n,n,1d0,W,size(W,1),A,size(A,1),0d0,part_2a,size(part_2a,1)) + call dgemm('N','N',n,n,n,1d0,sin_tau,size(sin_tau,1),part_2a,size(part_2a,1),0d0,part_2b,size(part_2b,1)) + call dgemm('N','N',n,n,n,1d0,tau_m1,size(tau_m1,1),part_2b,size(part_2b,1),0d0,part_2c,size(part_2c,1)) + call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_2c,size(part_2c,1),0d0,part_2,size(part_2,1)) + + ! Rotation matrix R + R = part_1 + part_2 + + ! Matrix check + ! R.R^t and R^t.R must be equal to identity matrix + do j = 1, n + do i=1,n + if (i==j) then + RR_t(i,j) = 1d0 + else + RR_t(i,j) = 0d0 + endif + enddo + enddo + + call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1)) + + norm = dnrm2(n*n,RR_t,1) + !print*, 'Rotation matrix check, norm R.R^T = ', norm + + ! Debug + !if (debug) then + ! print*, 'RR_t' + ! do i = 1, n + ! print*, RR_t(i,:) + ! enddo + !endif + #+END_SRC + +*** Post conditions + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + ! Check if R.R^T=1 + max_elem = 0d0 + do j = 1, n + do i = 1, n + if (ABS(RR_t(i,j)) > ABS(max_elem)) then + max_elem = RR_t(i,j) + endif + enddo + enddo + + print*, 'Max error in R.R^T:', max_elem + !print*, 'e_val(1):', e_val(1) + !print*, 'e_val(n):', e_val(n) + !print*, 'max elem in A:', max_elem_A + + if (ABS(max_elem) > 1d-12) then + print*, 'WARNING: max error in R.R^T > 1d-12' + print*, 'Enforce the step cancellation' + enforce_step_cancellation = .True. + endif + + ! Matrix elements of R must by non-NaN + do j = 1,n + do i = 1,LDR + if (disnan(R(i,j))) then + info = 666 + print*, 'NaN in rotation matrix' + call ABORT + endif + enddo + enddo + + ! Display + !if (debug) then + ! print*,'Rotation matrix :' + ! do i = 1, n + ! write(*,'(100(F10.5))') R(i,:) + ! enddo + !endif + #+END_SRC + +** Deallocation, end + #+BEGIN_SRC f90 :comments org :tangle rotation_matrix.irp.f + deallocate(B) + deallocate(m_diag,cos_tau,sin_tau,tau_m1) + deallocate(W,e_val) + deallocate(part_1,part_1a) + deallocate(part_2,part_2a,part_2b,part_2c) + deallocate(RR_t) + + call wall_time(t2) + t3 = t2-t1 + print*,'Time in rotation matrix:', t3 + + print*,'---End rotation_matrix---' + +end subroutine + #+END_SRC + diff --git a/src/utils_trust_region/org/rotation_matrix_iterative.org b/src/utils_trust_region/org/rotation_matrix_iterative.org new file mode 100644 index 00000000..f6cc9909 --- /dev/null +++ b/src/utils_trust_region/org/rotation_matrix_iterative.org @@ -0,0 +1,136 @@ +* Rotation matrix with the iterative method + +\begin{align*} +\textbf{R} = \sum_{k=0}^{\infty} \frac{1}{k!} \textbf{X}^k +\end{align*} + +!!! Doesn't work !!! + +#+BEGIN_SRC f90 :comments org :tangle rotation_matrix_iterative.irp.f +subroutine rotation_matrix_iterative(m,X,R) + + implicit none + + ! in + integer, intent(in) :: m + double precision, intent(in) :: X(m,m) + + ! out + double precision, intent(out) :: R(m,m) + + ! internal + double precision :: max_elem, pre_factor + double precision :: t1,t2,t3 + integer :: k,l,i,j + logical :: not_converged + double precision, allocatable :: RRT(:,:), A(:,:), B(:,:) + + ! Functions + integer :: factorial + + print*,'---rotation_matrix_iterative---' + call wall_time(t1) + + allocate(RRT(m,m),A(m,m),B(m,m)) + + ! k = 0 + R = 0d0 + do i = 1, m + R(i,i) = 1d0 + enddo + + ! k = 1 + R = R + X + + k = 2 + + not_converged = .True. + + do while (not_converged) + + pre_factor = 1d0/DBLE(factorial(k)) + if (pre_factor < 1d-15) then + print*,'pre factor=', pre_factor,'< 1d-15, exit' + exit + endif + + A = X + B = 0d0 + do l = 1, k-1 + call dgemm('N','N',m,m,m,1d0,X,size(X,1),A,size(A,1),0d0,B,size(B,1)) + A = B + enddo + + !print*,'B' + !do i = 1, m + ! print*,B(i,:) * 1d0/DBLE(factorial(k)) + !enddo + + R = R + pre_factor * B + + k = k + 1 + call dgemm('T','N',m,m,m,1d0,R,size(R,1),R,size(R,1),0d0,RRT,size(RRT,1)) + + !print*,'R' + !do i = 1, m + ! write(*,'(10(E12.5))') R(i,:) + !enddo + + do i = 1, m + RRT(i,i) = RRT(i,i) - 1d0 + enddo + + !print*,'RRT' + !do i = 1, m + ! write(*,'(10(E12.5))') RRT(i,:) + !enddo + + max_elem = 0d0 + do j = 1, m + do i = 1, m + if (dabs(RRT(i,j)) > max_elem) then + max_elem = dabs(RRT(i,j)) + endif + enddo + enddo + + print*, 'Iteration:', k + print*, 'Max error in R:', max_elem + + if (max_elem < 1d-12) then + not_converged = .False. + endif + + enddo + + deallocate(RRT,A,B) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in rotation matrix iterative:', t3 + print*,'---End roration_matrix_iterative---' + + +print*,'Does not work yet, abort' +call abort + +end +#+END_SRC + +** Factorial +#+BEGIN_SRC f90 :comments org :tangle rotation_matrix_iterative.irp.f +function factorial(n) + + implicit none + + integer, intent(in) :: n + integer :: factorial, k + + factorial = 1 + + do k = 1, n + factorial = factorial * k + enddo + +end +#+END_SRC diff --git a/src/utils_trust_region/org/sub_to_full_rotation_matrix.org b/src/utils_trust_region/org/sub_to_full_rotation_matrix.org new file mode 100644 index 00000000..f0cf0bfc --- /dev/null +++ b/src/utils_trust_region/org/sub_to_full_rotation_matrix.org @@ -0,0 +1,65 @@ +* Rotation matrix in a subspace to rotation matrix in the full space + +Usually, we are using a list of MOs, for exemple the active ones. When +we compute a rotation matrix to rotate the MOs, we just compute a +rotation matrix for these MOs in order to reduce the size of the +matrix which has to be computed. Since the computation of a rotation +matrix scale in $O(N^3)$ with $N$ the number of MOs, it's better to +reuce the number of MOs involved. +After that we replace the rotation matrix in the full space by +building the elements of the rotation matrix in the full space from +the elements of the rotation matrix in the subspace and adding some 0 +on the extradiagonal elements and some 1 on the diagonal elements, +for the MOs that are not involved in the rotation. + +Provided: +| mo_num | integer | Number of MOs | + +Input: +| m | integer | Size of tmp_list, m <= mo_num | +| tmp_list(m) | integer | List of MOs | +| tmp_R(m,m) | double precision | Rotation matrix in the space of | +| | | the MOs containing by tmp_list | + +Output: +| R(mo_num,mo_num | double precision | Rotation matrix in the space | +| | | of all the MOs | + +Internal: +| i,j | integer | indexes in the full space | +| tmp_i,tmp_j | integer | indexes in the subspace | + +#+BEGIN_SRC f90 :comments org :tangle sub_to_full_rotation_matrix.irp.f +subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R) + + !BEGIN_DOC + ! Compute the full rotation matrix from a smaller one + !END_DOC + + implicit none + + ! in + integer, intent(in) :: m, tmp_list(m) + double precision, intent(in) :: tmp_R(m,m) + + ! out + double precision, intent(out) :: R(mo_num,mo_num) + + ! internal + integer :: i,j,tmp_i,tmp_j + + ! tmp_R to R, subspace to full space + R = 0d0 + do i = 1, mo_num + R(i,i) = 1d0 ! 1 on the diagonal because it is a rotation matrix, 1 = nothing change for the corresponding orbital + enddo + do tmp_j = 1, m + j = tmp_list(tmp_j) + do tmp_i = 1, m + i = tmp_list(tmp_i) + R(i,j) = tmp_R(tmp_i,tmp_j) + enddo + enddo + +end +#+END_SRC diff --git a/src/utils_trust_region/org/trust_region_expected_e.org b/src/utils_trust_region/org/trust_region_expected_e.org new file mode 100644 index 00000000..9d2868fa --- /dev/null +++ b/src/utils_trust_region/org/trust_region_expected_e.org @@ -0,0 +1,128 @@ +* Predicted energy : e_model + +*Compute the energy predicted by the Taylor series* + +The energy is predicted using a Taylor expansion truncated at te 2nd +order : + +\begin{align*} +E_{k+1} = E_{k} + \textbf{g}_k^{T} \cdot \textbf{x}_{k+1} + \frac{1}{2} \cdot \textbf{x}_{k+1}^T \cdot \textbf{H}_{k} \cdot \textbf{x}_{k+1} + \mathcal{O}(\textbf{x}_{k+1}^2) +\end{align*} + +Input: +| n | integer | m*(m-1)/2 | +| n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +| v_grad(n) | double precision | gradient | +| H(n,n) | double precision | hessian | +| x(n) | double precision | Step in the trust region | +| prev_energy | double precision | previous energy | + +Output: +| e_model | double precision | predicted energy after the rotation of the MOs | + +Internal: +| part_1 | double precision | v_grad^T.x | +| part_2 | double precision | 1/2 . x^T.H.x | +| part_2a | double precision | H.x | +| i,j | integer | indexes | + +Function: +| ddot | double precision | dot product (Lapack) | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_expected_e.irp.f +subroutine trust_region_expected_e(n,n2,v_grad,H,x,prev_energy,e_model) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the expected criterion/energy after the application of the step x + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n,n2 + double precision, intent(in) :: v_grad(n),H(n,n2),x(n) + double precision, intent(in) :: prev_energy + + ! out + double precision, intent(out) :: e_model + + ! internal + double precision :: part_1, part_2, t1,t2,t3 + double precision, allocatable :: part_2a(:) + + integer :: i,j + + !Function + double precision :: ddot + + print*,'' + print*,'---Trust_e_model---' + + call wall_time(t1) + + ! Allocation + allocate(part_2a(n)) +#+END_SRC + +** Calculations + +part_1 corresponds to the product g.x +part_2a corresponds to the product H.x +part_2 corresponds to the product 0.5*(x^T.H.x) + +TODO: remove the dot products + +#+BEGIN_SRC f90 :comments org :tangle trust_region_expected_e.irp.f + ! Product v_grad.x + part_1 = ddot(n,v_grad,1,x,1) + + !if (debug) then + ! print*,'g.x : ', part_1 + !endif + + ! Product H.x + if (n == n2) then + call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1) + else + ! If the hessian is diagonal + do i = 1, n + part_2a(i) = H(i,1) * x(i) + enddo + endif + + ! Product 1/2 . x^T.H.x + part_2 = 0.5d0 * ddot(n,x,1,part_2a,1) + + !if (debug) then + ! print*,'1/2*x^T.H.x : ', part_2 + !endif + + + ! Sum + e_model = prev_energy + part_1 + part_2 + + ! Writing the predicted energy + print*, 'prev_energy: ', prev_energy + print*, 'Predicted energy after the rotation:', e_model + print*, 'Previous energy - predicted energy: ', prev_energy - e_model + + ! Can be deleted, already in another subroutine + if (DABS(prev_energy - e_model) < 1d-12 ) then + print*,'WARNING: ABS(prev_energy - e_model) < 1d-12' + endif + + ! Deallocation + deallocate(part_2a) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in trust e model:', t3 + + print*,'---End trust_e_model---' + +end subroutine +#+END_SRC diff --git a/src/utils_trust_region/org/trust_region_optimal_lambda.org b/src/utils_trust_region/org/trust_region_optimal_lambda.org new file mode 100644 index 00000000..ff454cb6 --- /dev/null +++ b/src/utils_trust_region/org/trust_region_optimal_lambda.org @@ -0,0 +1,1661 @@ +* Newton's method to find the optimal lambda + +*Compute the lambda value for the trust region* + +This subroutine uses the Newton method in order to find the optimal +lambda. This constant is added on the diagonal of the hessian to shift +the eiganvalues. It has a double role: +- ensure that the resulting hessian is positive definite for the + Newton method +- constrain the step in the trust region, i.e., + $||\textbf{x}(\lambda)|| \leq \Delta$, where $\Delta$ is the radius + of the trust region. +We search $\lambda$ which minimizes +\begin{align*} + f(\lambda) = (||\textbf{x}_{(k+1)}(\lambda)||^2 -\Delta^2)^2 +\end{align*} +or +\begin{align*} + \tilde{f}(\lambda) = (\frac{1}{||\textbf{x}_{(k+1)}(\lambda)||^2}-\frac{1}{\Delta^2})^2 +\end{align*} +and gives obviously 0 in both cases. \newline + +There are several cases: +- If $\textbf{H}$ is positive definite the interval containing the + solution is $\lambda \in (0, \infty)$ (and $-h_1 < 0$). +- If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot + \textbf{g} \neq 0$ then the interval containing + the solution is $\lambda \in (-h_1, \infty)$. +- If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot + \textbf{g} = 0$ then the interval containing the solution is + $\lambda \in (-h_1, \infty)$. The terms where $|h_i - \lambda| < + 10^{-12}$ are not computed, so the term where $i = 1$ is + automatically removed and this case becomes similar to the previous one. + +So to avoid numerical problems (cf. trust_region) we start the +algorithm at $\lambda=\max(0 + \epsilon,-h_1 + \epsilon)$, +with $\epsilon$ a little constant. +The research must be restricted to the interval containing the +solution. For that reason a little trust region in 1D is used. + +The Newton method to find the optimal $\lambda$ is : +\begin{align*} + \lambda_{(l+1)} &= \lambda_{(l)} - f^{''}(\lambda)_{(l)}^{-1} f^{'}(\lambda)_{(l)}^{} \\ +\end{align*} +$f^{'}(\lambda)_{(l)}$: the first derivative of $f$ with respect to +$\lambda$ at the l-th iteration, +$f^{''}(\lambda)_{(l)}$: the second derivative of $f$ with respect to +$\lambda$ at the l-th iteration.\newline + +Noting the Newton step $y = - f^{''}(\lambda)_{(l)}^{-1} +f^{'}(\lambda)_{(l)}^{}$ we constrain $y$ such as +\begin{align*} + y \leq \alpha +\end{align*} +with $\alpha$ a scalar representing the trust length (trust region in +1D) where the function $f$ or $\tilde{f}$ is correctly describe by the +Taylor series truncated at the second order. Thus, if $y > \alpha$, +the constraint is applied as +\begin{align*} + y^* = \alpha \frac{y}{|y|} +\end{align*} +with $y^*$ the solution in the trust region. + +The size of the trust region evolves in function of $\rho$ as for the +trust region seen previously cf. trust_region, rho_model. +The prediction of the value of $f$ or $\tilde{f}$ is done using the +Taylor series truncated at the second order cf. "trust_region", +"trust_e_model". + +The first and second derivatives of $f(\lambda) = (||\textbf{x}(\lambda)||^2 - +\Delta^2)^2$ with respect to $\lambda$ are: +\begin{align*} + \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 + = 2 \left(\sum_{i=1}^n \frac{-2(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) + \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) +\end{align*} +\begin{align*} +\frac{\partial^2}{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +\end{align*} + +The first and second derivatives of $\tilde{f}(\lambda) = (1/||\textbf{x}(\lambda)||^2 - +1/\Delta^2)^2$ with respect to $\lambda$ are: +\begin{align*} + \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{4}{\Delta^2} \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ + &= 4 \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} + \left( \frac{1}{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{1}{\Delta^2 (\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +\end{align*} + +\begin{align*} + \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} + - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ + &- \frac{4}{\Delta^2} \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2} + {(h_i + \lambda)^3)})^2}{(\sum_ {i=1}^n\frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} + {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +\end{align*} + +Provided in qp_edit: +| thresh_rho_2 | +| thresh_cc | +| nb_it_max_lambda | +| version_lambda_search | +| nb_it_max_pre_search | +see qp_edit for more details + +Input: +| n | integer | m*(m-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| tmp_wtg(n) | double precision | w_i^T.v_grad(i) | +| delta | double precision | delta for the trust region | + +Output: +| lambda | double precision | Lagrange multiplier to constrain the norm of the size of the Newton step | +| | | lambda > 0 | + +Internal: +| d1_N | double precision | value of d1_norm_trust_region | +| d2_N | double precision | value of d2_norm_trust_region | +| f_N | double precision | value of f_norm_trust_region | +| prev_f_N | double precision | previous value of f_norm_trust_region | +| f_R | double precision | (norm(x)^2 - delta^2)^2 or (1/norm(x)^2 - 1/delta^2)^2 | +| prev_f_R | double precision | previous value of f_R | +| model | double precision | predicted value of f_R from prev_f_R and y | +| d_1 | double precision | value of the first derivative | +| d_2 | double precision | value of the second derivative | +| y | double precision | Newton's step, y = -f''^-1 . f' = lambda - prev_lambda | +| prev_lambda | double precision | previous value of lambda | +| t1,t2,t3 | double precision | wall time | +| i | integer | index | +| epsilon | double precision | little constant to avoid numerical problem | +| rho_2 | double precision | (prev_f_R - f_R)/(prev_f_R - model), agreement between model and f_R | +| version | integer | version of the root finding method | + +Function: +| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +| d2_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +| d1_norm_inverse_trust_region | double precision | first derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | +| d2_norm_inverse_trust_region | double precision | second derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | +| f_norm_trust_region | double precision | value of norm(x)^2 | + + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + + include 'pi.h' + + !BEGIN_DOC + ! Research the optimal lambda to constrain the step size in the trust region + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(inout) :: e_val(n) + double precision, intent(in) :: delta + double precision, intent(in) :: tmp_wtg(n) + + ! out + double precision, intent(out) :: lambda + + ! Internal + double precision :: d1_N, d2_N, f_N, prev_f_N + double precision :: prev_f_R, f_R + double precision :: model + double precision :: d_1, d_2 + double precision :: t1,t2,t3 + integer :: i + double precision :: epsilon + double precision :: y + double precision :: prev_lambda + double precision :: rho_2 + double precision :: alpha + integer :: version + + ! Functions + double precision :: d1_norm_trust_region,d1_norm_trust_region_omp + double precision :: d2_norm_trust_region, d2_norm_trust_region_omp + double precision :: f_norm_trust_region, f_norm_trust_region_omp + double precision :: d1_norm_inverse_trust_region + double precision :: d2_norm_inverse_trust_region + double precision :: d1_norm_inverse_trust_region_omp + double precision :: d2_norm_inverse_trust_region_omp + + print*,'' + print*,'---Trust_newton---' + + call wall_time(t1) + + ! version_lambda_search + ! 1 -> ||x||^2 - delta^2 = 0, + ! 2 -> 1/||x||^2 - 1/delta^2 = 0 (better) + !if (version_lambda_search == 1) then + ! print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0' + !else + ! print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0' + !endif + ! Version 2 is normally better +#+END_SRC + +Resolution with the Newton method: + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f + ! Initialization + epsilon = 1d-4 + lambda = max(0d0, -e_val(1)) + + ! Pre research of lambda to start near the optimal lambda + ! by adding a constant epsilon and changing the constant to + ! have ||x(lambda + epsilon)|| ~ delta, before setting + ! lambda = lambda + epsilon + !print*, 'Pre research of lambda:' + !print*,'Initial lambda =', lambda + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + !print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta + i = 1 + + ! To increase lambda + if (f_N > delta**2) then + !print*,'Increasing lambda...' + do while (f_N > delta**2 .and. i <= nb_it_max_pre_search) + + ! Update the previous norm + prev_f_N = f_N + ! New epsilon + epsilon = epsilon * 2d0 + ! New norm + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + + !print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + + ! Security + if (prev_f_N < f_N) then + print*,'WARNING, error: prev_f_N < f_N, exit' + epsilon = epsilon * 0.5d0 + i = nb_it_max_pre_search + 1 + endif + + i = i + 1 + enddo + + ! To reduce lambda + else + !print*,'Reducing lambda...' + do while (f_N < delta**2 .and. i <= nb_it_max_pre_search) + + ! Update the previous norm + prev_f_N = f_N + ! New epsilon + epsilon = epsilon * 0.5d0 + ! New norm + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + + !print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + + ! Security + if (prev_f_N > f_N) then + print*,'WARNING, error: prev_f_N > f_N, exit' + epsilon = epsilon * 2d0 + i = nb_it_max_pre_search + 1 + endif + + i = i + 1 + enddo + endif + + !print*,'End of the pre research of lambda' + + ! New value of lambda + lambda = lambda + epsilon + + !print*, 'e_val(1):', e_val(1) + !print*, 'Staring point, lambda =', lambda + + ! thresh_cc, threshold for the research of the optimal lambda + ! Leaves the loop when ABS(1d0-||x||^2/delta^2) > thresh_cc + ! thresh_rho_2, threshold to cancel the step in the research + ! of the optimal lambda, the step is cancelled if rho_2 < thresh_rho_2 + + !print*,'Threshold for the CC:', thresh_cc + !print*,'Threshold for rho_2:', thresh_rho_2 + !print*, 'w_1^T . g =', tmp_wtg(1) + + ! Debug + !print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' + + ! Initialization + i = 1 + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) ! Value of the ||x(lambda)||^2 + model = 0d0 ! predicted value of (||x||^2 - delta^2)^2 + prev_f_N = 0d0 ! previous value of ||x||^2 + prev_f_R = 0d0 ! previous value of (||x||^2 - delta^2)^2 + f_R = 0d0 ! value of (||x||^2 - delta^2)^2 + rho_2 = 0d0 ! (prev_f_R - f_R)/(prev_f_R - m) + y = 0d0 ! step size + prev_lambda = 0d0 ! previous lambda + + ! Derivatives + if (version_lambda_search == 1) then + d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 + d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 + else + d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + endif + + ! Trust length + alpha = DABS((1d0/d_2)*d_1) + + ! Newton's method + do while (i <= 100 .and. DABS(1d0-f_N/delta**2) > thresh_cc) + !print*,'--------------------------------------' + !print*,'Research of lambda, iteration:', i + !print*,'--------------------------------------' + + ! Update of f_N, f_R and the derivatives + prev_f_N = f_N + if (version_lambda_search == 1) then + prev_f_R = (prev_f_N - delta**2)**2 + d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 + d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 + else + prev_f_R = (1d0/prev_f_N - 1d0/delta**2)**2 + d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + endif + !write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 + + ! Newton's step + y = -(1d0/DABS(d_2))*d_1 + + ! Constraint on y (the newton step) + if (DABS(y) > alpha) then + y = alpha * (y/DABS(y)) ! preservation of the sign of y + endif + !write(*,'(a,E12.5)') ' Step length: ', y + + ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series + model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2 + + ! Updates lambda + prev_lambda = lambda + lambda = prev_lambda + y + !print*,'prev lambda:', prev_lambda + !print*,'new lambda:', lambda + + ! Checks if lambda is in (-h_1, \infty) + if (lambda > MAX(0d0, -e_val(1))) then + ! New value of ||x(lambda)||^2 + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + + ! New f_R + if (version_lambda_search == 1) then + f_R = (f_N - delta**2)**2 ! new value of (||x(lambda)||^2 - delta^2)^2 + else + f_R = (1d0/f_N - 1d0/delta**2)**2 ! new value of (1/||x(lambda)||^2 -1/delta^2)^2 + endif + + !if (version_lambda_search == 1) then + ! print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R + ! print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R + ! print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model + !else + ! print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R + ! print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R + ! print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model + !endif + + !print*,'previous - actual:', prev_f_R - f_R + !print*,'previous - model:', prev_f_R - model + + ! Check the gain + if (DABS(prev_f_R - model) < thresh_model_2) then + print*,'' + print*,'WARNING: ABS(previous - model) <', thresh_model_2, 'rho_2 will tend toward infinity' + print*,'' + endif + + ! Will be deleted + !if (prev_f_R - f_R <= 1d-16 .or. prev_f_R - model <= 1d-16) then + ! print*,'' + ! print*,'WARNING: ABS(previous - model) <= 1d-16, exit' + ! print*,'' + ! exit + !endif + + ! Computes rho_2 + rho_2 = (prev_f_R - f_R)/(prev_f_R - model) + !print*,'rho_2:', rho_2 + else + rho_2 = 0d0 ! in order to reduce the size of the trust region, alpha, until lambda is in (-h_1, \infty) + !print*,'lambda < -e_val(1) ===> rho_2 = 0' + endif + + ! Evolution of the trust length, alpha + if (rho_2 >= 0.75d0) then + alpha = 2d0 * alpha + elseif (rho_2 >= 0.5d0) then + alpha = alpha + elseif (rho_2 >= 0.25d0) then + alpha = 0.5d0 * alpha + else + alpha = 0.25d0 * alpha + endif + !write(*,'(a,E12.5)') ' New trust length alpha: ', alpha + + ! cancellaion of the step if rho < 0.1 + if (rho_2 < thresh_rho_2) then !0.1d0) then + lambda = prev_lambda + f_N = prev_f_N + !print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda' + endif + + !print*,'' + !print*,'lambda, ||x||, delta:' + !print*, lambda, dsqrt(f_N), delta + !print*,'CC:', DABS(1d0 - f_N/delta**2) + !print*,'' + + i = i + 1 + enddo + + ! if trust newton failed + if (i > nb_it_max_lambda) then + print*,'' + print*,'######################################################' + print*,'WARNING: i >', nb_it_max_lambda,'for the trust Newton' + print*,'The research of the optimal lambda has failed' + print*,'######################################################' + print*,'' + endif + + print*,'Number of iterations:', i + print*,'Value of lambda:', lambda + !print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2 + print*,'Convergence criterion:', 1d0-f_N/delta**2 + !print*,'Error on the trust region (||x||^2 - delta^2)^2):', (f_N - delta**2)**2 + !print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2 + + ! Time + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in trust_newton:', t3 + + print*,'---End trust_newton---' + +end subroutine +#+END_SRC + +* OMP: First derivative of (||x||^2 - Delta^2)^2 + +*Function to compute the first derivative of (||x||^2 - Delta^2)^2* + +This function computes the first derivative of (||x||^2 - Delta^2)^2 +with respect to lambda. + +\begin{align*} +\frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += -4 \left(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} \right) +\left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i+ \lambda)^2} \right) +\end{align*} + +\begin{align*} + \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2} \\ + \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +\end{align*} + +Provided: +| mo_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| accu1 | double precision | first sum of the formula | +| accu2 | double precision | second sum of the formula | +| tmp_accu1 | double precision | temporary array for the first sum | +| tmp_accu2 | double precision | temporary array for the second sum | +| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg,accu1,accu2 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) + + ! Functions + double precision :: d1_norm_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1,accu2) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d1_norm_trust_region_omp = -4d0 * accu2 * (accu1 - delta**2) + + deallocate(tmp_accu1, tmp_accu2) + +end function +#+END_SRC + +* OMP: Second derivative of (||x||^2 - Delta^2)^2 + +*Function to compute the second derivative of (||x||^2 - Delta^2)^2* + +This function computes the second derivative of (||x||^2 - Delta^2)^2 +with respect to lambda. +\begin{align*} +\frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +\end{align*} + +\begin{align*} + \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ + \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ + \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| accu1 | double precision | first sum of the formula | +| accu2 | double precision | second sum of the formula | +| accu3 | double precision | third sum of the formula | +| tmp_accu1 | double precision | temporary array for the first sum | +| tmp_accu2 | double precision | temporary array for the second sum | +| tmp_accu2 | double precision | temporary array for the third sum | +| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +| i,j | integer | indexes | + +Function: +| d2_norm_trust_region | double precision | second derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Functions + double precision :: d2_norm_trust_region_omp + double precision :: ddot + + ! Internal + double precision :: accu1,accu2,accu3 + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) + integer :: i, j + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) + + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & + !$OMP accu1, accu2, accu3) & + !$OMP DEFAULT(NONE) + + ! Initialization + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + !$OMP DO + do i = 1, n + tmp_accu3(i) = 0d0 + enddo + !$OMP END DO + + ! Calculations + + ! accu1 + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + ! accu2 + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + ! accu3 + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu3 = accu3 + tmp_accu3(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + d2_norm_trust_region_omp = 2d0 * (6d0 * accu3 * (- delta**2 + accu1) + (-2d0 * accu2)**2) + + deallocate(tmp_accu1, tmp_accu2, tmp_accu3) + +end function +#+END_SRC + +* OMP: Function value of ||x||^2 + +*Compute the value of ||x||^2* + +This function computes the value of ||x(lambda)||^2 + +\begin{align*} +||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | + +Internal: +| tmp_wtg(n) | double precision | temporary array for W^T.v_grad | +| tmp_fN | double precision | temporary array for the function | +| i,j | integer | indexes | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + + use omp_lib + + include 'pi.h' + + !BEGIN_DOC + ! Compute ||x(lambda)||^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + + ! functions + double precision :: f_norm_trust_region_omp + + ! internal + double precision, allocatable :: tmp_fN(:) + integer :: i,j + + ! Allocation + allocate(tmp_fN(n)) + + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_fN, tmp_wtg, f_norm_trust_region_omp) & + !$OMP DEFAULT(NONE) + + ! Initialization + + !$OMP MASTER + f_norm_trust_region_omp = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_fN(i) = 0d0 + enddo + !$OMP END DO + + ! Calculations + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_fN(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + f_norm_trust_region_omp = f_norm_trust_region_omp + tmp_fN(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + deallocate(tmp_fN) + +end function +#+END_SRC + +* First derivative of (||x||^2 - Delta^2)^2 +Version without omp + +*Function to compute the first derivative of ||x||^2 - Delta* + +This function computes the first derivative of (||x||^2 - Delta^2)^2 +with respect to lambda. + +\begin{align*} +\frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += 2 \left(-2\sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) +\left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) +\end{align*} + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| accu1 | double precision | first sum of the formula | +| accu2 | double precision | second sum of the formula | +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +| ddot | double precision | blas dot product | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d1_norm_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2 + integer :: i, j + + ! Functions + double precision :: d1_norm_trust_region + double precision :: ddot + + ! Initialization + accu1 = 0d0 + accu2 = 0d0 + + do i = 1, n + wtg = 0d0 + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + wtg = 0d0 + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + d1_norm_trust_region = 2d0 * accu2 * (accu1 - delta**2) + +end function +#+END_SRC + +* Second derivative of (||x||^2 - Delta^2)^2 +Version without OMP + +*Function to compute the second derivative of ||x||^2 - Delta* + + +\begin{equation} +\frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 += 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +\end{equation} + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +\end{align*} +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| accu1 | double precision | first sum of the formula | +| accu2 | double precision | second sum of the formula | +| accu3 | double precision | third sum of the formula | +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| d2_norm_trust_region | double precision | second derivative with respect to lambda of norm(x)^2 - Delta^2 | +| ddot | double precision | blas dot product | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d2_norm_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Functions + double precision :: d2_norm_trust_region + double precision :: ddot + + ! Internal + double precision :: wtg,accu1,accu2,accu3 + integer :: i, j + + ! Initialization + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 !4 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 !2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu3 = accu3 + 6d0 * wtg**2 / (e_val(i) + lambda)**4 !3 + endif + enddo + + d2_norm_trust_region = 2d0 * (accu3 * (- delta**2 + accu1) + accu2**2) + +end function +#+END_SRC + +* Function value of ||x||^2 +Version without OMP + +*Compute the value of ||x||^2* + +This function computes the value of ||x(lambda)||^2 + +\begin{align*} +||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| f_norm_trust_region | double precision | value of norm(x)^2 | +| ddot | double precision | blas dot product | + + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function f_norm_trust_region(n,e_val,tmp_wtg,lambda) + + include 'pi.h' + + !BEGIN_DOC + ! Compute ||x(lambda)||^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + + ! function + double precision :: f_norm_trust_region + double precision :: ddot + + ! internal + integer :: i,j + + ! Initialization + f_norm_trust_region = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + f_norm_trust_region = f_norm_trust_region + tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + +end function +#+END_SRC + +* OMP: First derivative of (1/||x||^2 - 1/Delta^2)^2 +Version with OMP + +*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +\begin{align*} + \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} + {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} + {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ + &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} + \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +\end{align*} + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| tmp_accu1 | double precision | temporary array for the first sum | +| tmp_accu2 | double precision | temporary array for the second sum | +| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_inverse_trust_region | double precision | value of the first derivative | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: accu1, accu2 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) + + ! Functions + double precision :: d1_norm_inverse_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1, accu2) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + +! !$OMP MASTER +! do i = 1, n +! if (ABS(e_val(i)+lambda) > 1d-12) then +! tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 +! endif +! enddo +! !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + +! !$OMP MASTER +! do i = 1, n +! if (ABS(e_val(i)+lambda) > 1d-12) then +! tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 +! endif +! enddo +! !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d1_norm_inverse_trust_region_omp = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) + + deallocate(tmp_accu1, tmp_accu2) + +end +#+END_SRC + +* OMP: Second derivative of (1/||x||^2 - 1/Delta^2)^2 +Version with OMP + +*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +\begin{align*} + \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} + - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ + &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +\end{align*} + + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| tmp_accu1 | double precision | temporary array for the first sum | +| tmp_accu2 | double precision | temporary array for the second sum | +| tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_inverse_trust_region | double precision | value of the first derivative | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: accu1, accu2, accu3 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) + + ! Functions + double precision :: d2_norm_inverse_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & + !$OMP accu1, accu2, accu3) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu3(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu3 = accu3 + tmp_accu3(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d2_norm_inverse_trust_region_omp = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & + - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) + + deallocate(tmp_accu1,tmp_accu2,tmp_accu3) + +end +#+END_SRC + +* First derivative of (1/||x||^2 - 1/Delta^2)^2 +Version without OMP + +*Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +\begin{align*} + \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} + {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} + {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ + &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} + \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +\end{align*} +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +\end{align*} +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| d1_norm_inverse_trust_region | double precision | value of the first derivative | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d1_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2 + integer :: i,j + + ! Functions + double precision :: d1_norm_inverse_trust_region + + accu1 = 0d0 + accu2 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + d1_norm_inverse_trust_region = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) + +end +#+END_SRC + +* Second derivative of (1/||x||^2 - 1/Delta^2)^2 +Version without OMP + +*Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2* + +This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +\begin{align*} + \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 + &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} + - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ + &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} + - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +\end{align*} + +\begin{align*} +\text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +\text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +\text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +\end{align*} + +Provided: +| m_num | integer | number of MOs | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n,n) | double precision | eigenvectors of the hessian | +| v_grad(n) | double precision | gradient | +| lambda | double precision | Lagrange multiplier | +| delta | double precision | Delta of the trust region | + +Internal: +| wtg | double precision | temporary variable to store W^T.v_grad | +| i,j | integer | indexes | + +Function: +| d2_norm_inverse_trust_region | double precision | value of the first derivative | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f +function d2_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2, accu3 + integer :: i,j + + ! Functions + double precision :: d2_norm_inverse_trust_region + + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu3 = accu3 + wtg**2 / (e_val(i) + lambda)**4 + endif + enddo + + d2_norm_inverse_trust_region = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & + - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) + +end +#+END_SRC diff --git a/src/utils_trust_region/org/trust_region_rho.org b/src/utils_trust_region/org/trust_region_rho.org new file mode 100644 index 00000000..b669da8c --- /dev/null +++ b/src/utils_trust_region/org/trust_region_rho.org @@ -0,0 +1,122 @@ +* Agreement with the model: Rho + +*Compute the ratio : rho = (prev_energy - energy) / (prev_energy - e_model)* + +Rho represents the agreement between the model (the predicted energy +by the Taylor expansion truncated at the 2nd order) and the real +energy : + +\begin{equation} +\rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} +\end{equation} +With : +$E^{k}$ the energy at the previous iteration +$E^{k+1}$ the energy at the actual iteration +$m^{k+1}$ the predicted energy for the actual iteration +(cf. trust_e_model) + +If $\rho \approx 1$, the agreement is good, contrary to $\rho \approx 0$. +If $\rho \leq 0$ the previous energy is lower than the actual +energy. We have to cancel the last step and use a smaller trust +region. +Here we cancel the last step if $\rho < 0.1$, because even if +the energy decreases, the agreement is bad, i.e., the Taylor expansion +truncated at the second order doesn't represent correctly the energy +landscape. So it's better to cancel the step and restart with a +smaller trust region. + +Provided in qp_edit: +| thresh_rho | + +Input: +| prev_energy | double precision | previous energy (energy before the rotation) | +| e_model | double precision | predicted energy after the rotation | + +Output: +| rho | double precision | the agreement between the model (predicted) and the real energy | +| prev_energy | double precision | if rho >= 0.1 the actual energy becomes the previous energy | +| | | else the previous energy doesn't change | + +Internal: +| energy | double precision | energy (real) after the rotation | +| i | integer | index | +| t* | double precision | time | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_rho.irp.f +subroutine trust_region_rho(prev_energy, energy,e_model,rho) + + include 'pi.h' + + !BEGIN_DOC + ! Compute rho, the agreement between the predicted criterion/energy and the real one + !END_DOC + + implicit none + + ! Variables + + ! In + double precision, intent(inout) :: prev_energy + double precision, intent(in) :: e_model, energy + + ! Out + double precision, intent(out) :: rho + + ! Internal + double precision :: t1, t2, t3 + integer :: i + + print*,'' + print*,'---Rho_model---' + + !call wall_time(t1) +#+END_SRC + +** Rho +\begin{equation} +\rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} +\end{equation} + +In function of $\rho$ th step can be accepted or cancelled. + +If we cancel the last step (k+1), the previous energy (k) doesn't +change! +If the step (k+1) is accepted, then the "previous energy" becomes E(k+1) + +#+BEGIN_SRC f90 :comments org :tangle trust_region_rho.irp.f + ! Already done in an other subroutine + !if (ABS(prev_energy - e_model) < 1d-12) then + ! print*,'WARNING: prev_energy - e_model < 1d-12' + ! print*,'=> rho will tend toward infinity' + ! print*,'Check you convergence criterion !' + !endif + + rho = (prev_energy - energy) / (prev_energy - e_model) + + !print*, 'previous energy, prev_energy:', prev_energy + !print*, 'predicted energy, e_model:', e_model + !print*, 'real energy, energy:', energy + !print*, 'prev_energy - energy:', prev_energy - energy + !print*, 'prev_energy - e_model:', prev_energy - e_model + print*, 'Rho:', rho + !print*, 'Threshold for rho:', thresh_rho + + ! Modification of prev_energy in function of rho + if (rho < thresh_rho) then !0.1) then + ! the step is cancelled + print*, 'Rho <', thresh_rho,', the previous energy does not changed' + !print*, 'prev_energy :', prev_energy + else + ! the step is accepted + prev_energy = energy + print*, 'Rho >=', thresh_rho,', energy -> prev_energy:', energy + endif + + !call wall_time(t2) + !t3 = t2 - t1 + !print*,'Time in rho model:', t3 + + print*,'---End rho_model---' + +end subroutine +#+END_SRC diff --git a/src/utils_trust_region/org/trust_region_step.org b/src/utils_trust_region/org/trust_region_step.org new file mode 100644 index 00000000..0e5f090f --- /dev/null +++ b/src/utils_trust_region/org/trust_region_step.org @@ -0,0 +1,759 @@ +* Trust region + +*Compute the next step with the trust region algorithm* + +The Newton method is an iterative method to find a minimum of a given +function. It uses a Taylor series truncated at the second order of the +targeted function and gives its minimizer. The minimizer is taken as +the new position and the same thing is done. And by doing so +iteratively the method find a minimum, a local or global one depending +of the starting point and the convexity/nonconvexity of the targeted +function. + +The goal of the trust region is to constrain the step size of the +Newton method in a certain area around the actual position, where the +Taylor series is a good approximation of the targeted function. This +area is called the "trust region". + +In addition, in function of the agreement between the Taylor +development of the energy and the real energy, the size of the trust +region will be updated at each iteration. By doing so, the step sizes +are not too larges. In addition, since we add a criterion to cancel the +step if the energy increases (more precisely if rho < 0.1), so it's +impossible to diverge. \newline + +References: \newline +Nocedal & Wright, Numerical Optimization, chapter 4 (1999), \newline +https://link.springer.com/book/10.1007/978-0-387-40065-5, \newline +ISBN: 978-0-387-40065-5 \newline + +By using the first and the second derivatives, the Newton method gives +a step: +\begin{align*} + \textbf{x}_{(k+1)}^{\text{Newton}} = - \textbf{H}_{(k)}^{-1} \cdot + \textbf{g}_{(k)} +\end{align*} +which leads to the minimizer of the Taylor series. +!!! Warning: the Newton method gives the minimizer if and only if +$\textbf{H}$ is positive definite, else it leads to a saddle point !!! +But we want a step $\textbf{x}_{(k+1)}$ with a constraint on its (euclidian) norm: +\begin{align*} + ||\textbf{x}_{(k+1)}|| \leq \Delta_{(k+1)} +\end{align*} +which is equivalent to +\begin{align*} + \textbf{x}_{(k+1)}^T \cdot \textbf{x}_{(k+1)} \leq \Delta_{(k+1)}^2 +\end{align*} + +with: \newline +$\textbf{x}_{(k+1)}$ is the step for the k+1-th iteration (vector of +size n) \newline +$\textbf{H}_{(k)}$ is the hessian at the k-th iteration (n by n +matrix) \newline +$\textbf{g}_{(k)}$ is the gradient at the k-th iteration (vector of +size n) \newline +$\Delta_{(k+1)}$ is the trust radius for the (k+1)-th iteration +\newline + +Thus we want to constrain the step size $\textbf{x}_{(k+1)}$ into a +hypersphere of radius $\Delta_{(k+1)}$.\newline + +So, if $||\textbf{x}_{(k+1)}^{\text{Newton}}|| \leq \Delta_{(k)}$ and +$\textbf{H}$ is positive definite, the +solution is the step given by the Newton method +$\textbf{x}_{(k+1)} = \textbf{x}_{(k+1)}^{\text{Newton}}$. +Else we have to constrain the step size. For simplicity we will remove +the index $_{(k)}$ and $_{(k+1)}$. To restict the step size, we have +to put a constraint on $\textbf{x}$ with a Lagrange multiplier. +Starting from the Taylor series of a function E (here, the energy) +truncated at the 2nd order, we have: +\begin{align*} + E(\textbf{x}) = E +\textbf{g}^T \cdot \textbf{x} + \frac{1}{2} + \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} + + \mathcal{O}(\textbf{x}^2) +\end{align*} + +With the constraint on the norm of $\textbf{x}$ we can write the +Lagrangian +\begin{align*} + \mathcal{L}(\textbf{x},\lambda) = E + \textbf{g}^T \cdot \textbf{x} + + \frac{1}{2} \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} + + \frac{1}{2} \lambda (\textbf{x}^T \cdot \textbf{x} - \Delta^2) +\end{align*} +Where: \newline +$\lambda$ is the Lagrange multiplier \newline +$E$ is the energy at the k-th iteration $\Leftrightarrow +E(\textbf{x} = \textbf{0})$ \newline + +To solve this equation, we search a stationary point where the first +derivative of $\mathcal{L}$ with respect to $\textbf{x}$ becomes 0, i.e. +\begin{align*} + \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}=0 +\end{align*} + +The derivative is: +\begin{align*} + \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}} + = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} +\end{align*} + +So, we search $\textbf{x}$ such as: +\begin{align*} +\frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}} += \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} = 0 +\end{align*} + +We can rewrite that as: +\begin{align*} + \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} + = \textbf{g} + (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x} = 0 +\end{align*} +with $\textbf{I}$ is the identity matrix. + +By doing so, the solution is: +\begin{align*} + (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x}= -\textbf{g} +\end{align*} +\begin{align*} + \textbf{x}= - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g} +\end{align*} +with $\textbf{x}^T \textbf{x} = \Delta^2$. + +We have to solve this previous equation to find this $\textbf{x}$ in the +trust region, i.e. $||\textbf{x}|| = \Delta$. Now, this problem is +just a one dimension problem because we can express $\textbf{x}$ as a +function of $\lambda$: +\begin{align*} + \textbf{x}(\lambda) = - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g} +\end{align*} + +We start from the fact that the hessian is diagonalizable. So we have: +\begin{align*} + \textbf{H} = \textbf{W} \cdot \textbf{h} \cdot \textbf{W}^T +\end{align*} +with: \newline +$\textbf{H}$, the hessian matrix \newline +$\textbf{W}$, the matrix containing the eigenvectors \newline +$\textbf{w}_i$, the i-th eigenvector, i.e. i-th column of $\textbf{W}$ \newline +$\textbf{h}$, the matrix containing the eigenvalues in ascending order \newline +$h_i$, the i-th eigenvalue in ascending order \newline + +Now we use the fact that adding a constant on the diagonal just shifts +the eigenvalues: +\begin{align*} + \textbf{H} + \textbf{I} \lambda = \textbf{W} \cdot (\textbf{h} + +\textbf{I} \lambda) \cdot \textbf{W}^T +\end{align*} + +By doing so we can express $\textbf{x}$ as a function of $\lambda$ +\begin{align*} + \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot + \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i +\end{align*} +with $\lambda \neq - h_i$. + +An interesting thing in our case is the norm of $\textbf{x}$, +because we want $||\textbf{x}|| = \Delta$. Due to the orthogonality of +the eigenvectors $\left\{\textbf{w} \right\} _{i=1}^n$ we have: +\begin{align*} + ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot + \textbf{g})^2}{(h_i + \lambda)^2} +\end{align*} + +So the $||\textbf{x}(\lambda)||^2$ is just a function of $\lambda$. +And if we study the properties of this function we see that: +\begin{align*} + \lim_{\lambda\to\infty} ||\textbf{x}(\lambda)|| = 0 +\end{align*} +and if $\textbf{w}_i^T \cdot \textbf{g} \neq 0$: +\begin{align*} + \lim_{\lambda\to -h_i} ||\textbf{x}(\lambda)|| = + \infty +\end{align*} + +From these limits and knowing that $h_1$ is the lowest eigenvalue, we +can conclude that $||\textbf{x}(\lambda)||$ is a continuous and +strictly decreasing function on the interval $\lambda \in +(-h_1;\infty)$. Thus, there is one $\lambda$ in this interval which +gives $||\textbf{x}(\lambda)|| = \Delta$, consequently there is one +solution. + +Since $\textbf{x} = - (\textbf{H} + \lambda \textbf{I})^{-1} \cdot +\textbf{g}$ and we want to reduce the norm of $\textbf{x}$, clearly, +$\lambda > 0$ ($\lambda = 0$ is the unconstraint solution). But the +Newton method is only defined for a positive definite hessian matrix, +so $(\textbf{H} + \textbf{I} \lambda)$ must be positive +definite. Consequently, in the case where $\textbf{H}$ is not positive +definite, to ensure the positive definiteness, $\lambda$ must be +greater than $- h_1$. +\begin{align*} + \lambda > 0 \quad \text{and} \quad \lambda \geq - h_1 +\end{align*} + +From that there are five cases: +- if $\textbf{H}$ is positive definite, $-h_1 < 0$, $\lambda \in (0,\infty)$ +- if $\textbf{H}$ is not positive definite and $\textbf{w}_1^T \cdot + \textbf{g} \neq 0$, $(\textbf{H} + \textbf{I} + \lambda)$ + must be positve definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty)$ +- if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot + \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| > \Delta$ by removing + $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be + positive definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty$) +- if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot + \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| \leq \Delta$ by removing + $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be + positive definite, $-h_1 > 0$, $\lambda = -h_1$). This case is + similar to the case where $\textbf{H}$ and $||\textbf{x}(\lambda = + 0)|| \leq \Delta$ + but we can also add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ + time a constant to ensure the condition $||\textbf{x}(\lambda = + -h_1)|| = \Delta$ and escape from the saddle point + +Thus to find the solution, we can write: +\begin{align*} + ||\textbf{x}(\lambda)|| = \Delta +\end{align*} +\begin{align*} + ||\textbf{x}(\lambda)|| - \Delta = 0 +\end{align*} + +Taking the square of this equation +\begin{align*} + (||\textbf{x}(\lambda)|| - \Delta)^2 = 0 +\end{align*} +we have a function with one minimum for the optimal $\lambda$. +Since we have the formula of $||\textbf{x}(\lambda)||^2$, we solve +\begin{align*} + (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 +\end{align*} + +But in practice, it is more effective to solve: +\begin{align*} + (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 +\end{align*} + +To do that, we just use the Newton method with "trust_newton" using +first and second derivative of $(||\textbf{x}(\lambda)||^2 - +\Delta^2)^2$ with respect to $\textbf{x}$. +This will give the optimal $\lambda$ to compute the +solution $\textbf{x}$ with the formula seen previously: +\begin{align*} + \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot + \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i +\end{align*} + +The solution $\textbf{x}(\lambda)$ with the optimal $\lambda$ is our +step to go from the (k)-th to the (k+1)-th iteration, is noted $\textbf{x}^*$. + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +#+END_SRC + +** Evolution of the trust region + +We initialize the trust region at the first iteration using a radius +\begin{align*} + \Delta = ||\textbf{x}(\lambda=0)|| +\end{align*} + +And for the next iteration the trust region will evolves depending of +the agreement of the energy prediction based on the Taylor series +truncated at the 2nd order and the real energy. If the Taylor series +truncated at the 2nd order represents correctly the energy landscape +the trust region will be extent else it will be reduced. In order to +mesure this agreement we use the ratio rho cf. "rho_model" and +"trust_e_model". From that we use the following values: +- if $\rho \geq 0.75$, then $\Delta = 2 \Delta$, +- if $0.5 \geq \rho < 0.75$, then $\Delta = \Delta$, +- if $0.25 \geq \rho < 0.5$, then $\Delta = 0.5 \Delta$, +- if $\rho < 0.25$, then $\Delta = 0.25 \Delta$. + +In addition, if $\rho < 0.1$ the iteration is cancelled, so it +restarts with a smaller trust region until the energy decreases. + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +#+END_SRC + +** Summary + +To summarize, knowing the hessian (eigenvectors and eigenvalues), the +gradient and the radius of the trust region we can compute the norm of +the Newton step +\begin{align*} + ||\textbf{x}(\lambda = 0)||^2 = ||- \textbf{H}^{-1} \cdot \textbf{g}||^2 = \sum_{i=1}^n + \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2}, \quad h_i \neq 0 +\end{align*} + +- if $h_1 \geq 0$, $||\textbf{x}(\lambda = 0)|| \leq \Delta$ and + $\textbf{x}(\lambda=0)$ is in the trust region and it is not + necessary to put a constraint on $\textbf{x}$, the solution is the + unconstrained one, $\textbf{x}^* = \textbf{x}(\lambda = 0)$. +- else if $h_1 < 0$, $\textbf{w}_1^T \cdot \textbf{g} = 0$ and + $||\textbf{x}(\lambda = -h_1)|| \leq \Delta$ (by removing $j=1$ in + the sum), the solution is $\textbf{x}^* = \textbf{x}(\lambda = + -h_1)$, similarly to the previous case. + But we can add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ + time a constant to ensure the condition $||\textbf{x}(\lambda = + -h_1)|| = \Delta$ and escape from the saddle point +- else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} \neq 0$ we + have to search $\lambda \in (-h_1, \infty)$ such as + $\textbf{x}(\lambda) = \Delta$ by solving with the Newton method + \begin{align*} + (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 + \end{align*} + or + \begin{align*} + (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 + \end{align*} + which is numerically more stable. And finally compute + \begin{align*} + \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot + \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i + \end{align*} +- else if $h_1 \geq 0$ and $||\textbf{x}(\lambda = 0)|| > \Delta$ we + do exactly the same thing that the previous case but we search + $\lambda \in (0, \infty)$ +- else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} = 0$ and + $||\textbf{x}(\lambda = -h_1)|| > \Delta$ (by removing $j=1$ in the + sum), again we do exactly the same thing that the previous case + searching $\lambda \in (-h_1, \infty)$. + + +For the cases where $\textbf{w}_1^T \cdot \textbf{g} = 0$ it is not +necessary in fact to remove the $j = 1$ in the sum since the term +where $h_i - \lambda < 10^{-6}$ are not computed. + +After that, we take this vector $\textbf{x}^*$, called "x", and we do +the transformation to an antisymmetric matrix $\textbf{X}$, called +m_x. This matrix $\textbf{X}$ will be used to compute a rotation +matrix $\textbf{R}= \exp(\textbf{X})$ in "rotation_matrix". + +NB: +An improvement can be done using a elleptical trust region. + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +#+END_SRC + +** Code + +Provided: +| mo_num | integer | number of MOs | + +Cf. qp_edit in orbital optimization section, for some constants/thresholds + +Input: +| m | integer | number of MOs | +| n | integer | m*(m-1)/2 | +| n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +| H(n,n2) | double precision | hessian | +| v_grad(n) | double precision | gradient | +| e_val(n) | double precision | eigenvalues of the hessian | +| W(n, n) | double precision | eigenvectors of the hessian | +| rho | double precision | agreement between the model and the reality, | +| | | represents the quality of the energy prediction | +| nb_iter | integer | number of iteration | + +Input/Ouput: +| delta | double precision | radius of the trust region | + +Output: +| x(n) | double precision | vector containing the step | + +Internal: +| accu | double precision | temporary variable to compute the step | +| lambda | double precision | lagrange multiplier | +| trust_radius2 | double precision | square of the radius of the trust region | +| norm2_x | double precision | norm^2 of the vector x | +| norm2_g | double precision | norm^2 of the vector containing the gradient | +| tmp_wtg(n) | double precision | tmp_wtg(i) = w_i^T . g | +| i, j, k | integer | indexes | + +Function: +| dnrm2 | double precision | Blas function computing the norm | +| f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) | + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +subroutine trust_region_step(n,n2,nb_iter,v_grad,rho,e_val,w,x,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compuet the step in the trust region + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n,n2 + double precision, intent(in) :: v_grad(n), rho + integer, intent(inout) :: nb_iter + double precision, intent(in) :: e_val(n), w(n,n2) + + ! inout + double precision, intent(inout) :: delta + + ! out + double precision, intent(out) :: x(n) + + ! Internal + double precision :: accu, lambda, trust_radius2 + double precision :: norm2_x, norm2_g + double precision, allocatable :: tmp_wtg(:) + integer :: i,j,k + double precision :: t1,t2,t3 + integer :: n_neg_eval + + + ! Functions + double precision :: ddot, dnrm2 + double precision :: f_norm_trust_region_omp + + print*,'' + print*,'==================' + print*,'---Trust_region---' + print*,'==================' + + call wall_time(t1) + + ! Allocation + allocate(tmp_wtg(n)) +#+END_SRC + + +*** Initialization and norm + +The norm of the step size will be useful for the trust region +algorithm. We start from a first guess and the radius of the trust +region will evolve during the optimization. + +avoid_saddle is actually a test to avoid saddle points + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! Initialization of the Lagrange multiplier + lambda = 0d0 + + ! List of w^T.g, to avoid the recomputation + tmp_wtg = 0d0 + if (n == n2) then + do j = 1, n + do i = 1, n + tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i) + enddo + enddo + else + ! For the diagonal case + do j = 1, n + k = int(w(j,1)+1d-15) + tmp_wtg(j) = v_grad(k) + enddo + endif + + ! Replacement of the small tmp_wtg corresponding to a negative eigenvalue + ! in the case of avoid_saddle + if (avoid_saddle .and. e_val(1) < - thresh_eig) then + i = 2 + ! Number of negative eigenvalues + do while (e_val(i) < - thresh_eig) + if (tmp_wtg(i) < thresh_wtg2) then + if (version_avoid_saddle == 1) then + tmp_wtg(i) = 1d0 + elseif (version_avoid_saddle == 2) then + tmp_wtg(i) = DABS(e_val(i)) + elseif (version_avoid_saddle == 3) then + tmp_wtg(i) = dsqrt(DABS(e_val(i))) + else + tmp_wtg(i) = thresh_wtg2 + endif + endif + i = i + 1 + enddo + + ! For the fist one it's a little bit different + if (tmp_wtg(1) < thresh_wtg2) then + tmp_wtg(1) = 0d0 + endif + + endif + + ! Norm^2 of x, ||x||^2 + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) + ! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta + ! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm + ! Anyway if the step is too big it will be reduced + !print*,'||x||^2 :', norm2_x + + ! Norm^2 of the gradient, ||v_grad||^2 + norm2_g = (dnrm2(n,v_grad,1))**2 + !print*,'||grad||^2 :', norm2_g +#+END_SRC + +*** Trust radius initialization + + At the first iteration (nb_iter = 0) we initialize the trust region + with the norm of the step generate by the Newton's method ($\textbf{x}_1 = + (\textbf{H}_0)^{-1} \cdot \textbf{g}_0$, + we compute this norm using f_norm_trust_region_omp as explain just + below) + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! trust radius + if (nb_iter == 0) then + trust_radius2 = norm2_x + ! To avoid infinite loop of cancellation of this first step + ! without changing delta + nb_iter = 1 + + ! Compute delta, delta = sqrt(trust_radius) + delta = dsqrt(trust_radius2) + endif +#+END_SRC + +*** Modification of the trust radius + +In function of rho (which represents the agreement between the model +and the reality, cf. rho_model) the trust region evolves. We update +delta (the radius of the trust region). + +To avoid too big trust region we put a maximum size. + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! Modification of the trust radius in function of rho + if (rho >= 0.75d0) then + delta = 2d0 * delta + elseif (rho >= 0.5d0) then + delta = delta + elseif (rho >= 0.25d0) then + delta = 0.5d0 * delta + else + delta = 0.25d0 * delta + endif + + ! Maximum size of the trust region + !if (delta > 0.5d0 * n * pi) then + ! delta = 0.5d0 * n * pi + ! print*,'Delta > delta_max, delta = 0.5d0 * n * pi' + !endif + + if (delta > 1d10) then + delta = 1d10 + endif + + !print*, 'Delta :', delta +#+END_SRC + +*** Calculation of the optimal lambda + +We search the solution of $(||x||^2 - \Delta^2)^2 = 0$ +- If $||\textbf{x}|| > \Delta$ or $h_1 < 0$ we have to add a constant + $\lambda > 0 \quad \text{and} \quad \lambda > -h_1$ +- If $||\textbf{x}|| \leq \Delta$ and $h_1 \geq 0$ the solution is the + unconstrained one, $\lambda = 0$ + +You will find more details at the beginning + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! By giving delta, we search (||x||^2 - delta^2)^2 = 0 + ! and not (||x||^2 - delta)^2 = 0 + + ! Research of lambda to solve ||x(lambda)|| = Delta + + ! Display + !print*, 'e_val(1) = ', e_val(1) + !print*, 'w_1^T.g =', tmp_wtg(1) + + ! H positive definite + if (e_val(1) > - thresh_eig) then + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) + !print*, '||x(0)||=', dsqrt(norm2_x) + !print*, 'Delta=', delta + + ! H positive definite, ||x(lambda = 0)|| <= Delta + if (dsqrt(norm2_x) <= delta) then + !print*, 'H positive definite, ||x(lambda = 0)|| <= Delta' + !print*, 'lambda = 0, no lambda optimization' + lambda = 0d0 + + ! H positive definite, ||x(lambda = 0)|| > Delta + else + ! Constraint solution + !print*, 'H positive definite, ||x(lambda = 0)|| > Delta' + !print*,'Computation of the optimal lambda...' + call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + endif + + ! H indefinite + else + if (DABS(tmp_wtg(1)) < thresh_wtg) then + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1)) + !print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) + endif + + ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta + if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then + ! Add e_val(1) in order to have (H - e_val(1) I) positive definite + !print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta' + !print*, 'lambda = -e_val(1), no lambda optimization' + lambda = - e_val(1) + + ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta + ! and + ! H indefinite, w_1^T.g =/= 0 + else + ! Constraint solution/ add lambda + !if (DABS(tmp_wtg(1)) < thresh_wtg) then + ! print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta' + !else + ! print*, 'H indefinite, w_1^T.g =/= 0' + !endif + !print*, 'Computation of the optimal lambda...' + call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + endif + + endif + + ! Recomputation of the norm^2 of the step x + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + print*,'' + print*,'Summary after the trust region:' + print*,'lambda:', lambda + print*,'||x||:', dsqrt(norm2_x) + print*,'delta:', delta +#+END_SRC + +*** Calculation of the step x + +x refers to $\textbf{x}^*$ +We compute x in function of lambda using its formula : +\begin{align*} +\textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot \textbf{g}}{h_i ++ \lambda} \cdot \textbf{w}_i +\end{align*} + +#+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f + ! Initialisation + x = 0d0 + + ! Calculation of the step x + + if (n == n2) then + ! Normal version + if (.not. absolute_eig) then + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda) + enddo + endif + enddo + + ! Version to use the absolute value of the eigenvalues + else + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda) + enddo + endif + enddo + + endif + else + ! If the hessian is diagonal + ! Normal version + if (.not. absolute_eig) then + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + j = int(w(i,1) + 1d-15) + x(j) = - tmp_wtg(i) * 1d0 / (e_val(i) + lambda) + endif + enddo + + ! Version to use the absolute value of the eigenvalues + else + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig) then + j = int(w(i,1) + 1d-15) + x(j) = - tmp_wtg(i) * 1d0 / (DABS(e_val(i)) + lambda) + endif + enddo + + endif + endif + + double precision :: beta, norm_x + + ! Test + ! If w_1^T.g = 0, the lim of ||x(lambda)|| when lambda tend to -e_val(1) + ! is not + infinity. So ||x(lambda=-e_val(1))|| < delta, we add the first + ! eigenvectors multiply by a constant to ensure the condition + ! ||x(lambda=-e_val(1))|| = delta and escape the saddle point + if (avoid_saddle .and. e_val(1) < - thresh_eig) then + if (tmp_wtg(1) < 1d-15 .and. (1d0 - dsqrt(norm2_x)/delta) > 1d-3 ) then + + ! norm of x + norm_x = dnrm2(n,x,1) + + ! Computes the coefficient for the w_1 + beta = delta**2 - norm_x**2 + + ! Updates the step x + x = x + W(:,1) * dsqrt(beta) + + ! Recomputes the norm to check + norm_x = dnrm2(n,x,1) + + print*, 'Add w_1 * dsqrt(delta^2 - ||x||^2):' + print*, '||x||', norm_x + endif + endif +#+END_SRC + +*** Transformation of x + +x is a vector of size n, so it can be write as a m by m +antisymmetric matrix m_x cf. "mat_to_vec_index" and "vec_to_mat_index". + + #+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f +! ! Step transformation vector -> matrix +! ! Vector with n element -> mo_num by mo_num matrix +! do j = 1, m +! do i = 1, m +! if (i>j) then +! call mat_to_vec_index(i,j,k) +! m_x(i,j) = x(k) +! else +! m_x(i,j) = 0d0 +! endif +! enddo +! enddo +! +! ! Antisymmetrization of the previous matrix +! do j = 1, m +! do i = 1, m +! if (i lower diagonal matrix (p,q), p > q + +If a matrix is antisymmetric it can be reshaped as a vector. And the +vector can be reshaped as an antisymmetric matrix + +\begin{align*} +\begin{pmatrix} +0 & -1 & -2 & -4 \\ +1 & 0 & -3 & -5 \\ +2 & 3 & 0 & -6 \\ +4 & 5 & 6 & 0 +\end{pmatrix} +\Leftrightarrow +\begin{pmatrix} +1 & 2 & 3 & 4 & 5 & 6 +\end{pmatrix} +\end{align*} + +!!! Here the algorithm only work for the lower diagonal !!! + +Input: +| i | integer | index in the vector | + +Ouput: +| p,q | integer | corresponding indexes in the lower diagonal of a matrix | +| | | p > q, | +| | | p -> row, | +| | | q -> column | + +#+BEGIN_SRC f90 :comments org :tangle vec_to_mat_index.irp.f +subroutine vec_to_mat_index(i,p,q) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing + ! its index i a vector + !END_DOC + + implicit none + + ! Variables + + ! in + integer,intent(in) :: i + + ! out + integer, intent(out) :: p,q + + ! internal + integer :: a,b + double precision :: da + + da = 0.5d0*(1+ sqrt(1d0+8d0*DBLE(i))) + a = INT(da) + if ((a*(a-1))/2==i) then + p = a-1 + else + p = a + endif + b = p*(p-1)/2 + + ! Matrix element indexes + p = p + 1 + q = i - b + +end subroutine +#+END_SRC diff --git a/src/utils_trust_region/org/vec_to_mat_v2.org b/src/utils_trust_region/org/vec_to_mat_v2.org new file mode 100644 index 00000000..4ce5f5e1 --- /dev/null +++ b/src/utils_trust_region/org/vec_to_mat_v2.org @@ -0,0 +1,40 @@ +* Vect to antisymmetric matrix using mat_to_vec_index + +Vector to antisymmetric matrix transformation using mat_to_vec_index +subroutine. + +Can be done in OMP (for the first part and with omp critical for the second) + +#+BEGIN_SRC f90 :comments org :tangle vec_to_mat_v2.irp.f +subroutine vec_to_mat_v2(n,m,v_x,m_x) + + !BEGIN_DOC + ! Vector to antisymmetric matrix + !END_DOC + + implicit none + + integer, intent(in) :: n,m + double precision, intent(in) :: v_x(n) + double precision, intent(out) :: m_x(m,m) + + integer :: i,j,k + + ! 1D -> 2D lower diagonal + m_x = 0d0 + do j = 1, m - 1 + do i = j + 1, m + call mat_to_vec_index(i,j,k) + m_x(i,j) = v_x(k) + enddo + enddo + + ! Antisym + do i = 1, m - 1 + do j = i + 1, m + m_x(i,j) = - m_x(j,i) + enddo + enddo + +end +#+END_SRC diff --git a/src/utils_trust_region/pi.h b/src/utils_trust_region/pi.h new file mode 100644 index 00000000..2c36a9f0 --- /dev/null +++ b/src/utils_trust_region/pi.h @@ -0,0 +1,2 @@ + logical, parameter :: debug=.False. + double precision, parameter :: pi = 3.1415926535897932d0 diff --git a/src/utils_trust_region/rotation_matrix.irp.f b/src/utils_trust_region/rotation_matrix.irp.f new file mode 100644 index 00000000..827af8c0 --- /dev/null +++ b/src/utils_trust_region/rotation_matrix.irp.f @@ -0,0 +1,441 @@ +! Rotation matrix + +! *Build a rotation matrix from an antisymmetric matrix* + +! Compute a rotation matrix $\textbf{R}$ from an antisymmetric matrix $$\textbf{A}$$ such as : +! $$ +! \textbf{R}=\exp(\textbf{A}) +! $$ + +! So : +! \begin{align*} +! \textbf{R}=& \exp(\textbf{A}) \\ +! =& \sum_k^{\infty} \frac{1}{k!}\textbf{A}^k \\ +! =& \textbf{W} \cdot \cos(\tau) \cdot \textbf{W}^{\dagger} + \textbf{W} \cdot \tau^{-1} \cdot \sin(\tau) \cdot \textbf{W}^{\dagger} \cdot \textbf{A} +! \end{align*} + +! With : +! $\textbf{W}$ : eigenvectors of $\textbf{A}^2$ +! $\tau$ : $\sqrt{-x}$ +! $x$ : eigenvalues of $\textbf{A}^2$ + +! Input: +! | A(n,n) | double precision | antisymmetric matrix | +! | n | integer | number of columns of the A matrix | +! | LDA | integer | specifies the leading dimension of A, must be at least max(1,n) | +! | LDR | integer | specifies the leading dimension of R, must be at least max(1,n) | + +! Output: +! | R(n,n) | double precision | Rotation matrix | +! | info | integer | if info = 0, the execution is successful | +! | | | if info = k, the k-th parameter has an illegal value | +! | | | if info = -k, the algorithm failed | + +! Internal: +! | B(n,n) | double precision | B = A.A | +! | work(lwork,n) | double precision | work matrix for dysev, dimension max(1,lwork) | +! | lwork | integer | dimension of the syev work array >= max(1, 3n-1) | +! | W(n,n) | double precision | eigenvectors of B | +! | e_val(n) | double precision | eigenvalues of B | +! | m_diag(n,n) | double precision | diagonal matrix with the eigenvalues of B | +! | cos_tau(n,n) | double precision | diagonal matrix with cos(tau) values | +! | sin_tau(n,n) | double precision | diagonal matrix with sin cos(tau) values | +! | tau_m1(n,n) | double precision | diagonal matrix with (tau)^-1 values | +! | part_1(n,n) | double precision | matrix W.cos_tau.W^t | +! | part_1a(n,n) | double precision | matrix cos_tau.W^t | +! | part_2(n,n) | double precision | matrix W.tau_m1.sin_tau.W^t.A | +! | part_2a(n,n) | double precision | matrix W^t.A | +! | part_2b(n,n) | double precision | matrix sin_tau.W^t.A | +! | part_2c(n,n) | double precision | matrix tau_m1.sin_tau.W^t.A | +! | RR_t(n,n) | double precision | R.R^t must be equal to the identity<=> R.R^t-1=0 <=> norm = 0 | +! | norm | integer | norm of R.R^t-1, must be equal to 0 | +! | i,j | integer | indexes | + +! Functions: +! | dnrm2 | double precision | Lapack function, compute the norm of a matrix | +! | disnan | logical | Lapack function, check if an element is NaN | + + + +subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) + + implicit none + + !BEGIN_DOC + ! Rotation matrix to rotate the molecular orbitals. + ! If the rotation is too large the transformation is not unitary and must be cancelled. + !END_DOC + + include 'pi.h' + + ! Variables + + ! in + integer, intent(in) :: n,LDA,LDR + double precision, intent(inout) :: A(LDA,n) + + ! out + double precision, intent(out) :: R(LDR,n) + integer, intent(out) :: info + logical, intent(out) :: enforce_step_cancellation + + ! internal + double precision, allocatable :: B(:,:) + double precision, allocatable :: work(:,:) + double precision, allocatable :: W(:,:), e_val(:) + double precision, allocatable :: m_diag(:,:),cos_tau(:,:),sin_tau(:,:),tau_m1(:,:) + double precision, allocatable :: part_1(:,:),part_1a(:,:) + double precision, allocatable :: part_2(:,:),part_2a(:,:),part_2b(:,:),part_2c(:,:) + double precision, allocatable :: RR_t(:,:) + integer :: i,j + integer :: info2, lwork ! for dsyev + double precision :: norm, max_elem, max_elem_A, t1,t2,t3 + + ! function + double precision :: dnrm2 + logical :: disnan + + print*,'' + print*,'---rotation_matrix---' + + call wall_time(t1) + + ! Allocation + allocate(B(n,n)) + allocate(m_diag(n,n),cos_tau(n,n),sin_tau(n,n),tau_m1(n,n)) + allocate(W(n,n),e_val(n)) + allocate(part_1(n,n),part_1a(n,n)) + allocate(part_2(n,n),part_2a(n,n),part_2b(n,n),part_2c(n,n)) + allocate(RR_t(n,n)) + +! Pre-conditions + +! Initialization +info=0 +enforce_step_cancellation = .False. + +! Size of matrix A must be at least 1 by 1 +if (n<1) then + info = 3 + print*, 'WARNING: invalid parameter 5' + print*, 'n<1' + return +endif + +! Leading dimension of A must be >= n +if (LDA < n) then + info = 25 + print*, 'WARNING: invalid parameter 2 or 5' + print*, 'LDA < n' + return +endif + +! Leading dimension of A must be >= n +if (LDR < n) then + info = 4 + print*, 'WARNING: invalid parameter 4' + print*, 'LDR < n' + return +endif + +! Matrix elements of A must by non-NaN +do j = 1, n + do i = 1, n + if (disnan(A(i,j))) then + info=1 + print*, 'WARNING: invalid parameter 1' + print*, 'NaN element in A matrix' + return + endif + enddo +enddo + +do i = 1, n + if (A(i,i) /= 0d0) then + print*, 'WARNING: matrix A is not antisymmetric' + print*, 'Non 0 element on the diagonal', i, A(i,i) + call ABORT + endif +enddo + +do j = 1, n + do i = 1, n + if (A(i,j)+A(j,i)>1d-16) then + print*, 'WANRING: matrix A is not antisymmetric' + print*, 'A(i,j) /= - A(j,i):', i,j,A(i,j), A(j,i) + print*, 'diff:', A(i,j)+A(j,i) + call ABORT + endif + enddo +enddo + +! Fix for too big elements ! bad idea better to cancel if the error is too big +!do j = 1, n +! do i = 1, n +! A(i,j) = mod(A(i,j),2d0*pi) +! if (dabs(A(i,j)) > pi) then +! A(i,j) = 0d0 +! endif +! enddo +!enddo + +max_elem_A = 0d0 +do j = 1, n + do i = 1, n + if (ABS(A(i,j)) > ABS(max_elem_A)) then + max_elem_A = A(i,j) + endif + enddo +enddo +!print*,'max element in A', max_elem_A + +if (ABS(max_elem_A) > 2 * pi) then + print*,'' + print*,'WARNING: ABS(max_elem_A) > 2 pi ' + print*,'' +endif + +! B=A.A +! - Calculation of the matrix $\textbf{B} = \textbf{A}^2$ +! - Diagonalization of $\textbf{B}$ +! W, the eigenvectors +! e_val, the eigenvalues + + +! Compute B=A.A + +call dgemm('N','N',n,n,n,1d0,A,size(A,1),A,size(A,1),0d0,B,size(B,1)) + +! Copy B in W, diagonalization will put the eigenvectors in W +W=B + +! Diagonalization of B +! Eigenvalues -> e_val +! Eigenvectors -> W +lwork = 3*n-1 +allocate(work(lwork,n)) + +!print*,'Starting diagonalization ...' + +call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2) + +deallocate(work) + +if (info2 < 0) then + print*, 'WARNING: error in the diagonalization' + print*, 'Illegal value of the ', info2,'-th parameter' +elseif (info2 >0) then + print*, "WARNING: Diagonalization failed to converge" +endif + +! Tau^-1, cos(tau), sin(tau) +! $$\tau = \sqrt{-x}$$ +! - Calculation of $\cos(\tau)$ $\Leftrightarrow$ $\cos(\sqrt{-x})$ +! - Calculation of $\sin(\tau)$ $\Leftrightarrow$ $\sin(\sqrt{-x})$ +! - Calculation of $\tau^{-1}$ $\Leftrightarrow$ $(\sqrt{-x})^{-1}$ +! These matrices are diagonals + +! Diagonal matrix m_diag +do j = 1, n + if (e_val(j) >= -1d-12) then !0.d0) then !!! e_avl(i) must be < -1d-12 to avoid numerical problems + e_val(j) = 0.d0 + else + e_val(j) = - e_val(j) + endif +enddo + +m_diag = 0.d0 +do i = 1, n + m_diag(i,i) = e_val(i) +enddo + +! cos_tau +do j = 1, n + do i = 1, n + if (i==j) then + cos_tau(i,j) = dcos(dsqrt(e_val(i))) + else + cos_tau(i,j) = 0d0 + endif + enddo +enddo + +! sin_tau +do j = 1, n + do i = 1, n + if (i==j) then + sin_tau(i,j) = dsin(dsqrt(e_val(i))) + else + sin_tau(i,j) = 0d0 + endif + enddo +enddo + +! Debug, display the cos_tau and sin_tau matrix +!if (debug) then +! print*, 'cos_tau' +! do i = 1, n +! print*, cos_tau(i,:) +! enddo +! print*, 'sin_tau' +! do i = 1, n +! print*, sin_tau(i,:) +! enddo +!endif + +! tau^-1 +do j = 1, n + do i = 1, n + if ((i==j) .and. (e_val(i) > 1d-16)) then!0d0)) then !!! Convergence problem can come from here if the threshold is too big/small + tau_m1(i,j) = 1d0/(dsqrt(e_val(i))) + else + tau_m1(i,j) = 0d0 + endif + enddo +enddo + +max_elem = 0d0 +do i = 1, n + if (ABS(tau_m1(i,i)) > ABS(max_elem)) then + max_elem = tau_m1(i,i) + endif +enddo +!print*,'max elem tau^-1:', max_elem + +! Debug +!print*,'eigenvalues:' +!do i = 1, n +! print*, e_val(i) +!enddo + +!Debug, display tau^-1 +!if (debug) then +! print*, 'tau^-1' +! do i = 1, n +! print*,tau_m1(i,:) +! enddo +!endif + +! Rotation matrix +! \begin{align*} +! \textbf{R} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} + \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} +! \end{align*} +! \begin{align*} +! \textbf{Part1} = \textbf{W} \cos(\tau) \textbf{W}^{\dagger} +! \end{align*} +! \begin{align*} +! \textbf{Part2} = \textbf{W} \tau^{-1} \sin(\tau) \textbf{W}^{\dagger} \textbf{A} +! \end{align*} + +! First: +! part_1 = dgemm(W, dgemm(cos_tau, W^t)) +! part_1a = dgemm(cos_tau, W^t) +! part_1 = dgemm(W, part_1a) +! And: +! part_2= dgemm(W, dgemm(tau_m1, dgemm(sin_tau, dgemm(W^t, A)))) +! part_2a = dgemm(W^t, A) +! part_2b = dgemm(sin_tau, part_2a) +! part_2c = dgemm(tau_m1, part_2b) +! part_2 = dgemm(W, part_2c) +! Finally: +! Rotation matrix, R = part_1+part_2 + +! If $R$ is a rotation matrix: +! $R.R^T=R^T.R=\textbf{1}$ + +! part_1 +call dgemm('N','T',n,n,n,1d0,cos_tau,size(cos_tau,1),W,size(W,1),0d0,part_1a,size(part_1a,1)) +call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_1a,size(part_1a,1),0d0,part_1,size(part_1,1)) + +! part_2 +call dgemm('T','N',n,n,n,1d0,W,size(W,1),A,size(A,1),0d0,part_2a,size(part_2a,1)) +call dgemm('N','N',n,n,n,1d0,sin_tau,size(sin_tau,1),part_2a,size(part_2a,1),0d0,part_2b,size(part_2b,1)) +call dgemm('N','N',n,n,n,1d0,tau_m1,size(tau_m1,1),part_2b,size(part_2b,1),0d0,part_2c,size(part_2c,1)) +call dgemm('N','N',n,n,n,1d0,W,size(W,1),part_2c,size(part_2c,1),0d0,part_2,size(part_2,1)) + +! Rotation matrix R +R = part_1 + part_2 + +! Matrix check +! R.R^t and R^t.R must be equal to identity matrix +do j = 1, n + do i=1,n + if (i==j) then + RR_t(i,j) = 1d0 + else + RR_t(i,j) = 0d0 + endif + enddo +enddo + +call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1)) + +norm = dnrm2(n*n,RR_t,1) +!print*, 'Rotation matrix check, norm R.R^T = ', norm + +! Debug +!if (debug) then +! print*, 'RR_t' +! do i = 1, n +! print*, RR_t(i,:) +! enddo +!endif + +! Post conditions + +! Check if R.R^T=1 +max_elem = 0d0 +do j = 1, n + do i = 1, n + if (ABS(RR_t(i,j)) > ABS(max_elem)) then + max_elem = RR_t(i,j) + endif + enddo +enddo + +print*, 'Max error in R.R^T:', max_elem +!print*, 'e_val(1):', e_val(1) +!print*, 'e_val(n):', e_val(n) +!print*, 'max elem in A:', max_elem_A + +if (ABS(max_elem) > 1d-12) then + print*, 'WARNING: max error in R.R^T > 1d-12' + print*, 'Enforce the step cancellation' + enforce_step_cancellation = .True. +endif + +! Matrix elements of R must by non-NaN +do j = 1,n + do i = 1,LDR + if (disnan(R(i,j))) then + info = 666 + print*, 'NaN in rotation matrix' + call ABORT + endif + enddo +enddo + +! Display +!if (debug) then +! print*,'Rotation matrix :' +! do i = 1, n +! write(*,'(100(F10.5))') R(i,:) +! enddo +!endif + +! Deallocation, end + +deallocate(B) + deallocate(m_diag,cos_tau,sin_tau,tau_m1) + deallocate(W,e_val) + deallocate(part_1,part_1a) + deallocate(part_2,part_2a,part_2b,part_2c) + deallocate(RR_t) + + call wall_time(t2) + t3 = t2-t1 + print*,'Time in rotation matrix:', t3 + + print*,'---End rotation_matrix---' + +end subroutine diff --git a/src/utils_trust_region/rotation_matrix_iterative.irp.f b/src/utils_trust_region/rotation_matrix_iterative.irp.f new file mode 100644 index 00000000..f268df04 --- /dev/null +++ b/src/utils_trust_region/rotation_matrix_iterative.irp.f @@ -0,0 +1,134 @@ +! Rotation matrix with the iterative method + +! \begin{align*} +! \textbf{R} = \sum_{k=0}^{\infty} \frac{1}{k!} \textbf{X}^k +! \end{align*} + +! !!! Doesn't work !!! + + +subroutine rotation_matrix_iterative(m,X,R) + + implicit none + + ! in + integer, intent(in) :: m + double precision, intent(in) :: X(m,m) + + ! out + double precision, intent(out) :: R(m,m) + + ! internal + double precision :: max_elem, pre_factor + double precision :: t1,t2,t3 + integer :: k,l,i,j + logical :: not_converged + double precision, allocatable :: RRT(:,:), A(:,:), B(:,:) + + ! Functions + integer :: factorial + + print*,'---rotation_matrix_iterative---' + call wall_time(t1) + + allocate(RRT(m,m),A(m,m),B(m,m)) + + ! k = 0 + R = 0d0 + do i = 1, m + R(i,i) = 1d0 + enddo + + ! k = 1 + R = R + X + + k = 2 + + not_converged = .True. + + do while (not_converged) + + pre_factor = 1d0/DBLE(factorial(k)) + if (pre_factor < 1d-15) then + print*,'pre factor=', pre_factor,'< 1d-15, exit' + exit + endif + + A = X + B = 0d0 + do l = 1, k-1 + call dgemm('N','N',m,m,m,1d0,X,size(X,1),A,size(A,1),0d0,B,size(B,1)) + A = B + enddo + + !print*,'B' + !do i = 1, m + ! print*,B(i,:) * 1d0/DBLE(factorial(k)) + !enddo + + R = R + pre_factor * B + + k = k + 1 + call dgemm('T','N',m,m,m,1d0,R,size(R,1),R,size(R,1),0d0,RRT,size(RRT,1)) + + !print*,'R' + !do i = 1, m + ! write(*,'(10(E12.5))') R(i,:) + !enddo + + do i = 1, m + RRT(i,i) = RRT(i,i) - 1d0 + enddo + + !print*,'RRT' + !do i = 1, m + ! write(*,'(10(E12.5))') RRT(i,:) + !enddo + + max_elem = 0d0 + do j = 1, m + do i = 1, m + if (dabs(RRT(i,j)) > max_elem) then + max_elem = dabs(RRT(i,j)) + endif + enddo + enddo + + print*, 'Iteration:', k + print*, 'Max error in R:', max_elem + + if (max_elem < 1d-12) then + not_converged = .False. + endif + + enddo + + deallocate(RRT,A,B) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in rotation matrix iterative:', t3 + print*,'---End roration_matrix_iterative---' + + +print*,'Does not work yet, abort' +call abort + +end + +! Factorial + +function factorial(n) + + implicit none + + integer, intent(in) :: n + integer :: factorial, k + + factorial = 1 + + do k = 1, n + factorial = factorial * k + enddo + +end diff --git a/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f b/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f new file mode 100644 index 00000000..75d04352 --- /dev/null +++ b/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f @@ -0,0 +1,64 @@ +! Rotation matrix in a subspace to rotation matrix in the full space + +! Usually, we are using a list of MOs, for exemple the active ones. When +! we compute a rotation matrix to rotate the MOs, we just compute a +! rotation matrix for these MOs in order to reduce the size of the +! matrix which has to be computed. Since the computation of a rotation +! matrix scale in $O(N^3)$ with $N$ the number of MOs, it's better to +! reuce the number of MOs involved. +! After that we replace the rotation matrix in the full space by +! building the elements of the rotation matrix in the full space from +! the elements of the rotation matrix in the subspace and adding some 0 +! on the extradiagonal elements and some 1 on the diagonal elements, +! for the MOs that are not involved in the rotation. + +! Provided: +! | mo_num | integer | Number of MOs | + +! Input: +! | m | integer | Size of tmp_list, m <= mo_num | +! | tmp_list(m) | integer | List of MOs | +! | tmp_R(m,m) | double precision | Rotation matrix in the space of | +! | | | the MOs containing by tmp_list | + +! Output: +! | R(mo_num,mo_num | double precision | Rotation matrix in the space | +! | | | of all the MOs | + +! Internal: +! | i,j | integer | indexes in the full space | +! | tmp_i,tmp_j | integer | indexes in the subspace | + + +subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R) + + !BEGIN_DOC + ! Compute the full rotation matrix from a smaller one + !END_DOC + + implicit none + + ! in + integer, intent(in) :: m, tmp_list(m) + double precision, intent(in) :: tmp_R(m,m) + + ! out + double precision, intent(out) :: R(mo_num,mo_num) + + ! internal + integer :: i,j,tmp_i,tmp_j + + ! tmp_R to R, subspace to full space + R = 0d0 + do i = 1, mo_num + R(i,i) = 1d0 ! 1 on the diagonal because it is a rotation matrix, 1 = nothing change for the corresponding orbital + enddo + do tmp_j = 1, m + j = tmp_list(tmp_j) + do tmp_i = 1, m + i = tmp_list(tmp_i) + R(i,j) = tmp_R(tmp_i,tmp_j) + enddo + enddo + +end diff --git a/src/utils_trust_region/trust_region_expected_e.irp.f b/src/utils_trust_region/trust_region_expected_e.irp.f new file mode 100644 index 00000000..ad5ad2f9 --- /dev/null +++ b/src/utils_trust_region/trust_region_expected_e.irp.f @@ -0,0 +1,126 @@ +! Predicted energy : e_model + +! *Compute the energy predicted by the Taylor series* + +! The energy is predicted using a Taylor expansion truncated at te 2nd +! order : + +! \begin{align*} +! E_{k+1} = E_{k} + \textbf{g}_k^{T} \cdot \textbf{x}_{k+1} + \frac{1}{2} \cdot \textbf{x}_{k+1}^T \cdot \textbf{H}_{k} \cdot \textbf{x}_{k+1} + \mathcal{O}(\textbf{x}_{k+1}^2) +! \end{align*} + +! Input: +! | n | integer | m*(m-1)/2 | +! | n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +! | v_grad(n) | double precision | gradient | +! | H(n,n) | double precision | hessian | +! | x(n) | double precision | Step in the trust region | +! | prev_energy | double precision | previous energy | + +! Output: +! | e_model | double precision | predicted energy after the rotation of the MOs | + +! Internal: +! | part_1 | double precision | v_grad^T.x | +! | part_2 | double precision | 1/2 . x^T.H.x | +! | part_2a | double precision | H.x | +! | i,j | integer | indexes | + +! Function: +! | ddot | double precision | dot product (Lapack) | + + +subroutine trust_region_expected_e(n,n2,v_grad,H,x,prev_energy,e_model) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the expected criterion/energy after the application of the step x + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n,n2 + double precision, intent(in) :: v_grad(n),H(n,n2),x(n) + double precision, intent(in) :: prev_energy + + ! out + double precision, intent(out) :: e_model + + ! internal + double precision :: part_1, part_2, t1,t2,t3 + double precision, allocatable :: part_2a(:) + + integer :: i,j + + !Function + double precision :: ddot + + print*,'' + print*,'---Trust_e_model---' + + call wall_time(t1) + + ! Allocation + allocate(part_2a(n)) + +! Calculations + +! part_1 corresponds to the product g.x +! part_2a corresponds to the product H.x +! part_2 corresponds to the product 0.5*(x^T.H.x) + +! TODO: remove the dot products + + +! Product v_grad.x + part_1 = ddot(n,v_grad,1,x,1) + + !if (debug) then + ! print*,'g.x : ', part_1 + !endif + + ! Product H.x + if (n == n2) then + call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1) + else + ! If the hessian is diagonal + do i = 1, n + part_2a(i) = H(i,1) * x(i) + enddo + endif + + ! Product 1/2 . x^T.H.x + part_2 = 0.5d0 * ddot(n,x,1,part_2a,1) + + !if (debug) then + ! print*,'1/2*x^T.H.x : ', part_2 + !endif + + + ! Sum + e_model = prev_energy + part_1 + part_2 + + ! Writing the predicted energy + print*, 'prev_energy: ', prev_energy + print*, 'Predicted energy after the rotation:', e_model + print*, 'Previous energy - predicted energy: ', prev_energy - e_model + + ! Can be deleted, already in another subroutine + if (DABS(prev_energy - e_model) < 1d-12 ) then + print*,'WARNING: ABS(prev_energy - e_model) < 1d-12' + endif + + ! Deallocation + deallocate(part_2a) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in trust e model:', t3 + + print*,'---End trust_e_model---' + +end subroutine diff --git a/src/utils_trust_region/trust_region_optimal_lambda.irp.f b/src/utils_trust_region/trust_region_optimal_lambda.irp.f new file mode 100644 index 00000000..b7dcf875 --- /dev/null +++ b/src/utils_trust_region/trust_region_optimal_lambda.irp.f @@ -0,0 +1,1651 @@ +! Newton's method to find the optimal lambda + +! *Compute the lambda value for the trust region* + +! This subroutine uses the Newton method in order to find the optimal +! lambda. This constant is added on the diagonal of the hessian to shift +! the eiganvalues. It has a double role: +! - ensure that the resulting hessian is positive definite for the +! Newton method +! - constrain the step in the trust region, i.e., +! $||\textbf{x}(\lambda)|| \leq \Delta$, where $\Delta$ is the radius +! of the trust region. +! We search $\lambda$ which minimizes +! \begin{align*} +! f(\lambda) = (||\textbf{x}_{(k+1)}(\lambda)||^2 -\Delta^2)^2 +! \end{align*} +! or +! \begin{align*} +! \tilde{f}(\lambda) = (\frac{1}{||\textbf{x}_{(k+1)}(\lambda)||^2}-\frac{1}{\Delta^2})^2 +! \end{align*} +! and gives obviously 0 in both cases. \newline + +! There are several cases: +! - If $\textbf{H}$ is positive definite the interval containing the +! solution is $\lambda \in (0, \infty)$ (and $-h_1 < 0$). +! - If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot +! \textbf{g} \neq 0$ then the interval containing +! the solution is $\lambda \in (-h_1, \infty)$. +! - If $\textbf{H}$ is indefinite ($h_1 < 0$) and $\textbf{w}_1^T \cdot +! \textbf{g} = 0$ then the interval containing the solution is +! $\lambda \in (-h_1, \infty)$. The terms where $|h_i - \lambda| < +! 10^{-12}$ are not computed, so the term where $i = 1$ is +! automatically removed and this case becomes similar to the previous one. + +! So to avoid numerical problems (cf. trust_region) we start the +! algorithm at $\lambda=\max(0 + \epsilon,-h_1 + \epsilon)$, +! with $\epsilon$ a little constant. +! The research must be restricted to the interval containing the +! solution. For that reason a little trust region in 1D is used. + +! The Newton method to find the optimal $\lambda$ is : +! \begin{align*} +! \lambda_{(l+1)} &= \lambda_{(l)} - f^{''}(\lambda)_{(l)}^{-1} f^{'}(\lambda)_{(l)}^{} \\ +! \end{align*} +! $f^{'}(\lambda)_{(l)}$: the first derivative of $f$ with respect to +! $\lambda$ at the l-th iteration, +! $f^{''}(\lambda)_{(l)}$: the second derivative of $f$ with respect to +! $\lambda$ at the l-th iteration.\newline + +! Noting the Newton step $y = - f^{''}(\lambda)_{(l)}^{-1} +! f^{'}(\lambda)_{(l)}^{}$ we constrain $y$ such as +! \begin{align*} +! y \leq \alpha +! \end{align*} +! with $\alpha$ a scalar representing the trust length (trust region in +! 1D) where the function $f$ or $\tilde{f}$ is correctly describe by the +! Taylor series truncated at the second order. Thus, if $y > \alpha$, +! the constraint is applied as +! \begin{align*} +! y^* = \alpha \frac{y}{|y|} +! \end{align*} +! with $y^*$ the solution in the trust region. + +! The size of the trust region evolves in function of $\rho$ as for the +! trust region seen previously cf. trust_region, rho_model. +! The prediction of the value of $f$ or $\tilde{f}$ is done using the +! Taylor series truncated at the second order cf. "trust_region", +! "trust_e_model". + +! The first and second derivatives of $f(\lambda) = (||\textbf{x}(\lambda)||^2 - +! \Delta^2)^2$ with respect to $\lambda$ are: +! \begin{align*} +! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left(\sum_{i=1}^n \frac{-2(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) +! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) +! \end{align*} +! \begin{align*} +! \frac{\partial^2}{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +! \end{align*} + +! The first and second derivatives of $\tilde{f}(\lambda) = (1/||\textbf{x}(\lambda)||^2 - +! 1/\Delta^2)^2$ with respect to $\lambda$ are: +! \begin{align*} +! \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{4}{\Delta^2} \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ +! &= 4 \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +! \left( \frac{1}{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{1}{\Delta^2 (\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +! \end{align*} + +! \begin{align*} +! \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} +! - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ +! &- \frac{4}{\Delta^2} \left[ \frac{(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2} +! {(h_i + \lambda)^3)})^2}{(\sum_ {i=1}^n\frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - 3 \frac{\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}} +! {(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +! \end{align*} + +! Provided in qp_edit: +! | thresh_rho_2 | +! | thresh_cc | +! | nb_it_max_lambda | +! | version_lambda_search | +! | nb_it_max_pre_search | +! see qp_edit for more details + +! Input: +! | n | integer | m*(m-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | tmp_wtg(n) | double precision | w_i^T.v_grad(i) | +! | delta | double precision | delta for the trust region | + +! Output: +! | lambda | double precision | Lagrange multiplier to constrain the norm of the size of the Newton step | +! | | | lambda > 0 | + +! Internal: +! | d1_N | double precision | value of d1_norm_trust_region | +! | d2_N | double precision | value of d2_norm_trust_region | +! | f_N | double precision | value of f_norm_trust_region | +! | prev_f_N | double precision | previous value of f_norm_trust_region | +! | f_R | double precision | (norm(x)^2 - delta^2)^2 or (1/norm(x)^2 - 1/delta^2)^2 | +! | prev_f_R | double precision | previous value of f_R | +! | model | double precision | predicted value of f_R from prev_f_R and y | +! | d_1 | double precision | value of the first derivative | +! | d_2 | double precision | value of the second derivative | +! | y | double precision | Newton's step, y = -f''^-1 . f' = lambda - prev_lambda | +! | prev_lambda | double precision | previous value of lambda | +! | t1,t2,t3 | double precision | wall time | +! | i | integer | index | +! | epsilon | double precision | little constant to avoid numerical problem | +! | rho_2 | double precision | (prev_f_R - f_R)/(prev_f_R - model), agreement between model and f_R | +! | version | integer | version of the root finding method | + +! Function: +! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +! | d2_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +! | d1_norm_inverse_trust_region | double precision | first derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | +! | d2_norm_inverse_trust_region | double precision | second derivative with respect to lambda of (1/norm(x)^2 - 1/Delta^2)^2 | +! | f_norm_trust_region | double precision | value of norm(x)^2 | + + + +subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + + include 'pi.h' + + !BEGIN_DOC + ! Research the optimal lambda to constrain the step size in the trust region + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(inout) :: e_val(n) + double precision, intent(in) :: delta + double precision, intent(in) :: tmp_wtg(n) + + ! out + double precision, intent(out) :: lambda + + ! Internal + double precision :: d1_N, d2_N, f_N, prev_f_N + double precision :: prev_f_R, f_R + double precision :: model + double precision :: d_1, d_2 + double precision :: t1,t2,t3 + integer :: i + double precision :: epsilon + double precision :: y + double precision :: prev_lambda + double precision :: rho_2 + double precision :: alpha + integer :: version + + ! Functions + double precision :: d1_norm_trust_region,d1_norm_trust_region_omp + double precision :: d2_norm_trust_region, d2_norm_trust_region_omp + double precision :: f_norm_trust_region, f_norm_trust_region_omp + double precision :: d1_norm_inverse_trust_region + double precision :: d2_norm_inverse_trust_region + double precision :: d1_norm_inverse_trust_region_omp + double precision :: d2_norm_inverse_trust_region_omp + + print*,'' + print*,'---Trust_newton---' + + call wall_time(t1) + + ! version_lambda_search + ! 1 -> ||x||^2 - delta^2 = 0, + ! 2 -> 1/||x||^2 - 1/delta^2 = 0 (better) + !if (version_lambda_search == 1) then + ! print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0' + !else + ! print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0' + !endif + ! Version 2 is normally better + + + +! Resolution with the Newton method: + + +! Initialization + epsilon = 1d-4 + lambda = max(0d0, -e_val(1)) + + ! Pre research of lambda to start near the optimal lambda + ! by adding a constant epsilon and changing the constant to + ! have ||x(lambda + epsilon)|| ~ delta, before setting + ! lambda = lambda + epsilon + !print*, 'Pre research of lambda:' + !print*,'Initial lambda =', lambda + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + !print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta + i = 1 + + ! To increase lambda + if (f_N > delta**2) then + !print*,'Increasing lambda...' + do while (f_N > delta**2 .and. i <= nb_it_max_pre_search) + + ! Update the previous norm + prev_f_N = f_N + ! New epsilon + epsilon = epsilon * 2d0 + ! New norm + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + + !print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + + ! Security + if (prev_f_N < f_N) then + print*,'WARNING, error: prev_f_N < f_N, exit' + epsilon = epsilon * 0.5d0 + i = nb_it_max_pre_search + 1 + endif + + i = i + 1 + enddo + + ! To reduce lambda + else + !print*,'Reducing lambda...' + do while (f_N < delta**2 .and. i <= nb_it_max_pre_search) + + ! Update the previous norm + prev_f_N = f_N + ! New epsilon + epsilon = epsilon * 0.5d0 + ! New norm + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) + + !print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + + ! Security + if (prev_f_N > f_N) then + print*,'WARNING, error: prev_f_N > f_N, exit' + epsilon = epsilon * 2d0 + i = nb_it_max_pre_search + 1 + endif + + i = i + 1 + enddo + endif + + !print*,'End of the pre research of lambda' + + ! New value of lambda + lambda = lambda + epsilon + + !print*, 'e_val(1):', e_val(1) + !print*, 'Staring point, lambda =', lambda + + ! thresh_cc, threshold for the research of the optimal lambda + ! Leaves the loop when ABS(1d0-||x||^2/delta^2) > thresh_cc + ! thresh_rho_2, threshold to cancel the step in the research + ! of the optimal lambda, the step is cancelled if rho_2 < thresh_rho_2 + + !print*,'Threshold for the CC:', thresh_cc + !print*,'Threshold for rho_2:', thresh_rho_2 + !print*, 'w_1^T . g =', tmp_wtg(1) + + ! Debug + !print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' + + ! Initialization + i = 1 + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) ! Value of the ||x(lambda)||^2 + model = 0d0 ! predicted value of (||x||^2 - delta^2)^2 + prev_f_N = 0d0 ! previous value of ||x||^2 + prev_f_R = 0d0 ! previous value of (||x||^2 - delta^2)^2 + f_R = 0d0 ! value of (||x||^2 - delta^2)^2 + rho_2 = 0d0 ! (prev_f_R - f_R)/(prev_f_R - m) + y = 0d0 ! step size + prev_lambda = 0d0 ! previous lambda + + ! Derivatives + if (version_lambda_search == 1) then + d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 + d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 + else + d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + endif + + ! Trust length + alpha = DABS((1d0/d_2)*d_1) + + ! Newton's method + do while (i <= 100 .and. DABS(1d0-f_N/delta**2) > thresh_cc) + !print*,'--------------------------------------' + !print*,'Research of lambda, iteration:', i + !print*,'--------------------------------------' + + ! Update of f_N, f_R and the derivatives + prev_f_N = f_N + if (version_lambda_search == 1) then + prev_f_R = (prev_f_N - delta**2)**2 + d_1 = d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (||x(lambda)||^2 - delta^2)^2 + d_2 = d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (||x(lambda)||^2 - delta^2)^2 + else + prev_f_R = (1d0/prev_f_N - 1d0/delta**2)**2 + d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 + endif + !write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 + + ! Newton's step + y = -(1d0/DABS(d_2))*d_1 + + ! Constraint on y (the newton step) + if (DABS(y) > alpha) then + y = alpha * (y/DABS(y)) ! preservation of the sign of y + endif + !write(*,'(a,E12.5)') ' Step length: ', y + + ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series + model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2 + + ! Updates lambda + prev_lambda = lambda + lambda = prev_lambda + y + !print*,'prev lambda:', prev_lambda + !print*,'new lambda:', lambda + + ! Checks if lambda is in (-h_1, \infty) + if (lambda > MAX(0d0, -e_val(1))) then + ! New value of ||x(lambda)||^2 + f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + + ! New f_R + if (version_lambda_search == 1) then + f_R = (f_N - delta**2)**2 ! new value of (||x(lambda)||^2 - delta^2)^2 + else + f_R = (1d0/f_N - 1d0/delta**2)**2 ! new value of (1/||x(lambda)||^2 -1/delta^2)^2 + endif + + !if (version_lambda_search == 1) then + ! print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R + ! print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R + ! print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model + !else + ! print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R + ! print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R + ! print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model + !endif + + !print*,'previous - actual:', prev_f_R - f_R + !print*,'previous - model:', prev_f_R - model + + ! Check the gain + if (DABS(prev_f_R - model) < thresh_model_2) then + print*,'' + print*,'WARNING: ABS(previous - model) <', thresh_model_2, 'rho_2 will tend toward infinity' + print*,'' + endif + + ! Will be deleted + !if (prev_f_R - f_R <= 1d-16 .or. prev_f_R - model <= 1d-16) then + ! print*,'' + ! print*,'WARNING: ABS(previous - model) <= 1d-16, exit' + ! print*,'' + ! exit + !endif + + ! Computes rho_2 + rho_2 = (prev_f_R - f_R)/(prev_f_R - model) + !print*,'rho_2:', rho_2 + else + rho_2 = 0d0 ! in order to reduce the size of the trust region, alpha, until lambda is in (-h_1, \infty) + !print*,'lambda < -e_val(1) ===> rho_2 = 0' + endif + + ! Evolution of the trust length, alpha + if (rho_2 >= 0.75d0) then + alpha = 2d0 * alpha + elseif (rho_2 >= 0.5d0) then + alpha = alpha + elseif (rho_2 >= 0.25d0) then + alpha = 0.5d0 * alpha + else + alpha = 0.25d0 * alpha + endif + !write(*,'(a,E12.5)') ' New trust length alpha: ', alpha + + ! cancellaion of the step if rho < 0.1 + if (rho_2 < thresh_rho_2) then !0.1d0) then + lambda = prev_lambda + f_N = prev_f_N + !print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda' + endif + + !print*,'' + !print*,'lambda, ||x||, delta:' + !print*, lambda, dsqrt(f_N), delta + !print*,'CC:', DABS(1d0 - f_N/delta**2) + !print*,'' + + i = i + 1 + enddo + + ! if trust newton failed + if (i > nb_it_max_lambda) then + print*,'' + print*,'######################################################' + print*,'WARNING: i >', nb_it_max_lambda,'for the trust Newton' + print*,'The research of the optimal lambda has failed' + print*,'######################################################' + print*,'' + endif + + print*,'Number of iterations:', i + print*,'Value of lambda:', lambda + !print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2 + print*,'Convergence criterion:', 1d0-f_N/delta**2 + !print*,'Error on the trust region (||x||^2 - delta^2)^2):', (f_N - delta**2)**2 + !print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2 + + ! Time + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in trust_newton:', t3 + + print*,'---End trust_newton---' + +end subroutine + +! OMP: First derivative of (||x||^2 - Delta^2)^2 + +! *Function to compute the first derivative of (||x||^2 - Delta^2)^2* + +! This function computes the first derivative of (||x||^2 - Delta^2)^2 +! with respect to lambda. + +! \begin{align*} +! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = -4 \left(\sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} \right) +! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i+ \lambda)^2} \right) +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +! \end{align*} + +! Provided: +! | mo_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | accu1 | double precision | first sum of the formula | +! | accu2 | double precision | second sum of the formula | +! | tmp_accu1 | double precision | temporary array for the first sum | +! | tmp_accu2 | double precision | temporary array for the second sum | +! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | + + +function d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg,accu1,accu2 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) + + ! Functions + double precision :: d1_norm_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1,accu2) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d1_norm_trust_region_omp = -4d0 * accu2 * (accu1 - delta**2) + + deallocate(tmp_accu1, tmp_accu2) + +end function + +! OMP: Second derivative of (||x||^2 - Delta^2)^2 + +! *Function to compute the second derivative of (||x||^2 - Delta^2)^2* + +! This function computes the second derivative of (||x||^2 - Delta^2)^2 +! with respect to lambda. +! \begin{align*} +! \frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | accu1 | double precision | first sum of the formula | +! | accu2 | double precision | second sum of the formula | +! | accu3 | double precision | third sum of the formula | +! | tmp_accu1 | double precision | temporary array for the first sum | +! | tmp_accu2 | double precision | temporary array for the second sum | +! | tmp_accu2 | double precision | temporary array for the third sum | +! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d2_norm_trust_region | double precision | second derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | + + +function d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Functions + double precision :: d2_norm_trust_region_omp + double precision :: ddot + + ! Internal + double precision :: accu1,accu2,accu3 + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) + integer :: i, j + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) + + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & + !$OMP accu1, accu2, accu3) & + !$OMP DEFAULT(NONE) + + ! Initialization + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + !$OMP DO + do i = 1, n + tmp_accu3(i) = 0d0 + enddo + !$OMP END DO + + ! Calculations + + ! accu1 + !$OMP DO + do i = 1, n + if (ABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + ! accu2 + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + ! accu3 + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu3 = accu3 + tmp_accu3(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + d2_norm_trust_region_omp = 2d0 * (6d0 * accu3 * (- delta**2 + accu1) + (-2d0 * accu2)**2) + + deallocate(tmp_accu1, tmp_accu2, tmp_accu3) + +end function + +! OMP: Function value of ||x||^2 + +! *Compute the value of ||x||^2* + +! This function computes the value of ||x(lambda)||^2 + +! \begin{align*} +! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | + +! Internal: +! | tmp_wtg(n) | double precision | temporary array for W^T.v_grad | +! | tmp_fN | double precision | temporary array for the function | +! | i,j | integer | indexes | + + +function f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) + + use omp_lib + + include 'pi.h' + + !BEGIN_DOC + ! Compute ||x(lambda)||^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + + ! functions + double precision :: f_norm_trust_region_omp + + ! internal + double precision, allocatable :: tmp_fN(:) + integer :: i,j + + ! Allocation + allocate(tmp_fN(n)) + + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_fN, tmp_wtg, f_norm_trust_region_omp) & + !$OMP DEFAULT(NONE) + + ! Initialization + + !$OMP MASTER + f_norm_trust_region_omp = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_fN(i) = 0d0 + enddo + !$OMP END DO + + ! Calculations + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_fN(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + f_norm_trust_region_omp = f_norm_trust_region_omp + tmp_fN(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + deallocate(tmp_fN) + +end function + +! First derivative of (||x||^2 - Delta^2)^2 +! Version without omp + +! *Function to compute the first derivative of ||x||^2 - Delta* + +! This function computes the first derivative of (||x||^2 - Delta^2)^2 +! with respect to lambda. + +! \begin{align*} +! \frac{\partial }{\partial \lambda} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left(-2\sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right) +! \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i+ \lambda)^2} \right) +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | accu1 | double precision | first sum of the formula | +! | accu2 | double precision | second sum of the formula | +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_trust_region | double precision | first derivative with respect to lambda of (norm(x)^2 - Delta^2)^2 | +! | ddot | double precision | blas dot product | + + +function d1_norm_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2 + integer :: i, j + + ! Functions + double precision :: d1_norm_trust_region + double precision :: ddot + + ! Initialization + accu1 = 0d0 + accu2 = 0d0 + + do i = 1, n + wtg = 0d0 + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + wtg = 0d0 + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + d1_norm_trust_region = 2d0 * accu2 * (accu1 - delta**2) + +end function + +! Second derivative of (||x||^2 - Delta^2)^2 +! Version without OMP + +! *Function to compute the second derivative of ||x||^2 - Delta* + + +! \begin{equation} +! \frac{\partial^2 }{\partial \lambda^2} (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 +! = 2 \left[ \left( \sum_{i=1}^n 6 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} \right) \left( - \Delta^2 + \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \right) + \left( \sum_{i=1}^n -2 \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \right)^2 \right] +! \end{equation} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +! \end{align*} +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | accu1 | double precision | first sum of the formula | +! | accu2 | double precision | second sum of the formula | +! | accu3 | double precision | third sum of the formula | +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d2_norm_trust_region | double precision | second derivative with respect to lambda of norm(x)^2 - Delta^2 | +! | ddot | double precision | blas dot product | + + +function d2_norm_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Functions + double precision :: d2_norm_trust_region + double precision :: ddot + + ! Internal + double precision :: wtg,accu1,accu2,accu3 + integer :: i, j + + ! Initialization + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 !4 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu2 = accu2 - 2d0 * wtg**2 / (e_val(i) + lambda)**3 !2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + !wtg = ddot(n,w(:,i),1,v_grad,1) + accu3 = accu3 + 6d0 * wtg**2 / (e_val(i) + lambda)**4 !3 + endif + enddo + + d2_norm_trust_region = 2d0 * (accu3 * (- delta**2 + accu1) + accu2**2) + +end function + +! Function value of ||x||^2 +! Version without OMP + +! *Compute the value of ||x||^2* + +! This function computes the value of ||x(lambda)||^2 + +! \begin{align*} +! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | f_norm_trust_region | double precision | value of norm(x)^2 | +! | ddot | double precision | blas dot product | + + + +function f_norm_trust_region(n,e_val,tmp_wtg,lambda) + + include 'pi.h' + + !BEGIN_DOC + ! Compute ||x(lambda)||^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + + ! function + double precision :: f_norm_trust_region + double precision :: ddot + + ! internal + integer :: i,j + + ! Initialization + f_norm_trust_region = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + f_norm_trust_region = f_norm_trust_region + tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + +end function + +! OMP: First derivative of (1/||x||^2 - 1/Delta^2)^2 +! Version with OMP + +! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +! \begin{align*} +! \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} +! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} +! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ +! &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +! \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | tmp_accu1 | double precision | temporary array for the first sum | +! | tmp_accu2 | double precision | temporary array for the second sum | +! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_inverse_trust_region | double precision | value of the first derivative | + + +function d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: accu1, accu2 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:) + + ! Functions + double precision :: d1_norm_inverse_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_wtg, accu1, accu2) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + +! !$OMP MASTER +! do i = 1, n +! if (ABS(e_val(i)+lambda) > 1d-12) then +! tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 +! endif +! enddo +! !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + +! !$OMP MASTER +! do i = 1, n +! if (ABS(e_val(i)+lambda) > 1d-12) then +! tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 +! endif +! enddo +! !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d1_norm_inverse_trust_region_omp = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) + + deallocate(tmp_accu1, tmp_accu2) + +end + +! OMP: Second derivative of (1/||x||^2 - 1/Delta^2)^2 +! Version with OMP + +! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +! \begin{align*} +! \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} +! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ +! &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +! \end{align*} + + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | tmp_accu1 | double precision | temporary array for the first sum | +! | tmp_accu2 | double precision | temporary array for the second sum | +! | tmp_wtg(n) | double precision | temporary array for W^t.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_inverse_trust_region | double precision | value of the first derivative | + + +function d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) + + use omp_lib + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: tmp_wtg(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: accu1, accu2, accu3 + integer :: i,j + double precision, allocatable :: tmp_accu1(:), tmp_accu2(:), tmp_accu3(:) + + ! Functions + double precision :: d2_norm_inverse_trust_region_omp + + ! Allocation + allocate(tmp_accu1(n), tmp_accu2(n), tmp_accu3(n)) + + ! OMP + call omp_set_max_active_levels(1) + + ! OMP + !$OMP PARALLEL & + !$OMP PRIVATE(i,j) & + !$OMP SHARED(n,lambda, e_val, thresh_eig,& + !$OMP tmp_accu1, tmp_accu2, tmp_accu3, tmp_wtg, & + !$OMP accu1, accu2, accu3) & + !$OMP DEFAULT(NONE) + + !$OMP MASTER + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + !$OMP END MASTER + + !$OMP DO + do i = 1, n + tmp_accu1(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu2(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + tmp_accu3(i) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu1(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**2 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu1 = accu1 + tmp_accu1(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu2(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**3 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu2 = accu2 + tmp_accu2(i) + enddo + !$OMP END MASTER + + !$OMP DO + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + tmp_accu3(i) = tmp_wtg(i)**2 / (e_val(i) + lambda)**4 + endif + enddo + !$OMP END DO + + !$OMP MASTER + do i = 1, n + accu3 = accu3 + tmp_accu3(i) + enddo + !$OMP END MASTER + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + d2_norm_inverse_trust_region_omp = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & + - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) + + deallocate(tmp_accu1,tmp_accu2,tmp_accu3) + +end + +! First derivative of (1/||x||^2 - 1/Delta^2)^2 +! Version without OMP + +! *Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2* + +! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +! \begin{align*} +! \frac{\partial}{\partial \lambda} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3}} +! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{4}{\Delta^2} \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)}} +! {(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \\ +! &= 4 \sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3} +! \left( \frac{1}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - \frac{1}{\Delta^2 (\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right) +! \end{align*} +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} +! \end{align*} +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d1_norm_inverse_trust_region | double precision | value of the first derivative | + + +function d1_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2 + integer :: i,j + + ! Functions + double precision :: d1_norm_inverse_trust_region + + accu1 = 0d0 + accu2 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + d1_norm_inverse_trust_region = 4d0 * accu2 * (1d0/accu1**3 - 1d0/(delta**2 * accu1**2)) + +end + +! Second derivative of (1/||x||^2 - 1/Delta^2)^2 +! Version without OMP + +! *Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2* + +! This function computes the value of (1/||x(lambda)||^2 - 1/Delta^2)^2 + +! \begin{align*} +! \frac{\partial^2}{\partial \lambda^2} (1/||\textbf{x}(\lambda)||^2 - 1/\Delta^2)^2 +! &= 4 \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^4} +! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} \right] \\ +! &- \frac{4}{\Delta^2} \left[ \frac{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^3)})^2}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^3} +! - 3 \frac{\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^4}}{(\sum_i \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2})^2} \right] +! \end{align*} + +! \begin{align*} +! \text{accu1} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^2} \\ +! \text{accu2} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^3} \\ +! \text{accu3} &= \sum_{i=1}^n \frac{(\textbf{w}_i^T \textbf{g})^2}{(h_i + \lambda)^4} +! \end{align*} + +! Provided: +! | m_num | integer | number of MOs | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n,n) | double precision | eigenvectors of the hessian | +! | v_grad(n) | double precision | gradient | +! | lambda | double precision | Lagrange multiplier | +! | delta | double precision | Delta of the trust region | + +! Internal: +! | wtg | double precision | temporary variable to store W^T.v_grad | +! | i,j | integer | indexes | + +! Function: +! | d2_norm_inverse_trust_region | double precision | value of the first derivative | + + +function d2_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: e_val(n) + double precision, intent(in) :: w(n,n) + double precision, intent(in) :: v_grad(n) + double precision, intent(in) :: lambda + double precision, intent(in) :: delta + + ! Internal + double precision :: wtg, accu1, accu2, accu3 + integer :: i,j + + ! Functions + double precision :: d2_norm_inverse_trust_region + + accu1 = 0d0 + accu2 = 0d0 + accu3 = 0d0 + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu1 = accu1 + wtg**2 / (e_val(i) + lambda)**2 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu2 = accu2 + wtg**2 / (e_val(i) + lambda)**3 + endif + enddo + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + wtg = 0d0 + do j = 1, n + wtg = wtg + w(j,i) * v_grad(j) + enddo + accu3 = accu3 + wtg**2 / (e_val(i) + lambda)**4 + endif + enddo + + d2_norm_inverse_trust_region = 4d0 * (6d0 * accu2**2/accu1**4 - 3d0 * accu3/accu1**3) & + - 4d0/delta**2 * (4d0 * accu2**2/accu1**3 - 3d0 * accu3/accu1**2) + +end diff --git a/src/utils_trust_region/trust_region_rho.irp.f b/src/utils_trust_region/trust_region_rho.irp.f new file mode 100644 index 00000000..11ab11e9 --- /dev/null +++ b/src/utils_trust_region/trust_region_rho.irp.f @@ -0,0 +1,120 @@ +! Agreement with the model: Rho + +! *Compute the ratio : rho = (prev_energy - energy) / (prev_energy - e_model)* + +! Rho represents the agreement between the model (the predicted energy +! by the Taylor expansion truncated at the 2nd order) and the real +! energy : + +! \begin{equation} +! \rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} +! \end{equation} +! With : +! $E^{k}$ the energy at the previous iteration +! $E^{k+1}$ the energy at the actual iteration +! $m^{k+1}$ the predicted energy for the actual iteration +! (cf. trust_e_model) + +! If $\rho \approx 1$, the agreement is good, contrary to $\rho \approx 0$. +! If $\rho \leq 0$ the previous energy is lower than the actual +! energy. We have to cancel the last step and use a smaller trust +! region. +! Here we cancel the last step if $\rho < 0.1$, because even if +! the energy decreases, the agreement is bad, i.e., the Taylor expansion +! truncated at the second order doesn't represent correctly the energy +! landscape. So it's better to cancel the step and restart with a +! smaller trust region. + +! Provided in qp_edit: +! | thresh_rho | + +! Input: +! | prev_energy | double precision | previous energy (energy before the rotation) | +! | e_model | double precision | predicted energy after the rotation | + +! Output: +! | rho | double precision | the agreement between the model (predicted) and the real energy | +! | prev_energy | double precision | if rho >= 0.1 the actual energy becomes the previous energy | +! | | | else the previous energy doesn't change | + +! Internal: +! | energy | double precision | energy (real) after the rotation | +! | i | integer | index | +! | t* | double precision | time | + + +subroutine trust_region_rho(prev_energy, energy,e_model,rho) + + include 'pi.h' + + !BEGIN_DOC + ! Compute rho, the agreement between the predicted criterion/energy and the real one + !END_DOC + + implicit none + + ! Variables + + ! In + double precision, intent(inout) :: prev_energy + double precision, intent(in) :: e_model, energy + + ! Out + double precision, intent(out) :: rho + + ! Internal + double precision :: t1, t2, t3 + integer :: i + + print*,'' + print*,'---Rho_model---' + + !call wall_time(t1) + +! Rho +! \begin{equation} +! \rho^{k+1} = \frac{E^{k} - E^{k+1}}{E^{k} - m^{k+1}} +! \end{equation} + +! In function of $\rho$ th step can be accepted or cancelled. + +! If we cancel the last step (k+1), the previous energy (k) doesn't +! change! +! If the step (k+1) is accepted, then the "previous energy" becomes E(k+1) + + +! Already done in an other subroutine + !if (ABS(prev_energy - e_model) < 1d-12) then + ! print*,'WARNING: prev_energy - e_model < 1d-12' + ! print*,'=> rho will tend toward infinity' + ! print*,'Check you convergence criterion !' + !endif + + rho = (prev_energy - energy) / (prev_energy - e_model) + + !print*, 'previous energy, prev_energy:', prev_energy + !print*, 'predicted energy, e_model:', e_model + !print*, 'real energy, energy:', energy + !print*, 'prev_energy - energy:', prev_energy - energy + !print*, 'prev_energy - e_model:', prev_energy - e_model + print*, 'Rho:', rho + !print*, 'Threshold for rho:', thresh_rho + + ! Modification of prev_energy in function of rho + if (rho < thresh_rho) then !0.1) then + ! the step is cancelled + print*, 'Rho <', thresh_rho,', the previous energy does not changed' + !print*, 'prev_energy :', prev_energy + else + ! the step is accepted + prev_energy = energy + print*, 'Rho >=', thresh_rho,', energy -> prev_energy:', energy + endif + + !call wall_time(t2) + !t3 = t2 - t1 + !print*,'Time in rho model:', t3 + + print*,'---End rho_model---' + +end subroutine diff --git a/src/utils_trust_region/trust_region_step.irp.f b/src/utils_trust_region/trust_region_step.irp.f new file mode 100644 index 00000000..54161a1c --- /dev/null +++ b/src/utils_trust_region/trust_region_step.irp.f @@ -0,0 +1,749 @@ +! Trust region + +! *Compute the next step with the trust region algorithm* + +! The Newton method is an iterative method to find a minimum of a given +! function. It uses a Taylor series truncated at the second order of the +! targeted function and gives its minimizer. The minimizer is taken as +! the new position and the same thing is done. And by doing so +! iteratively the method find a minimum, a local or global one depending +! of the starting point and the convexity/nonconvexity of the targeted +! function. + +! The goal of the trust region is to constrain the step size of the +! Newton method in a certain area around the actual position, where the +! Taylor series is a good approximation of the targeted function. This +! area is called the "trust region". + +! In addition, in function of the agreement between the Taylor +! development of the energy and the real energy, the size of the trust +! region will be updated at each iteration. By doing so, the step sizes +! are not too larges. In addition, since we add a criterion to cancel the +! step if the energy increases (more precisely if rho < 0.1), so it's +! impossible to diverge. \newline + +! References: \newline +! Nocedal & Wright, Numerical Optimization, chapter 4 (1999), \newline +! https://link.springer.com/book/10.1007/978-0-387-40065-5, \newline +! ISBN: 978-0-387-40065-5 \newline + +! By using the first and the second derivatives, the Newton method gives +! a step: +! \begin{align*} +! \textbf{x}_{(k+1)}^{\text{Newton}} = - \textbf{H}_{(k)}^{-1} \cdot +! \textbf{g}_{(k)} +! \end{align*} +! which leads to the minimizer of the Taylor series. +! !!! Warning: the Newton method gives the minimizer if and only if +! $\textbf{H}$ is positive definite, else it leads to a saddle point !!! +! But we want a step $\textbf{x}_{(k+1)}$ with a constraint on its (euclidian) norm: +! \begin{align*} +! ||\textbf{x}_{(k+1)}|| \leq \Delta_{(k+1)} +! \end{align*} +! which is equivalent to +! \begin{align*} +! \textbf{x}_{(k+1)}^T \cdot \textbf{x}_{(k+1)} \leq \Delta_{(k+1)}^2 +! \end{align*} + +! with: \newline +! $\textbf{x}_{(k+1)}$ is the step for the k+1-th iteration (vector of +! size n) \newline +! $\textbf{H}_{(k)}$ is the hessian at the k-th iteration (n by n +! matrix) \newline +! $\textbf{g}_{(k)}$ is the gradient at the k-th iteration (vector of +! size n) \newline +! $\Delta_{(k+1)}$ is the trust radius for the (k+1)-th iteration +! \newline + +! Thus we want to constrain the step size $\textbf{x}_{(k+1)}$ into a +! hypersphere of radius $\Delta_{(k+1)}$.\newline + +! So, if $||\textbf{x}_{(k+1)}^{\text{Newton}}|| \leq \Delta_{(k)}$ and +! $\textbf{H}$ is positive definite, the +! solution is the step given by the Newton method +! $\textbf{x}_{(k+1)} = \textbf{x}_{(k+1)}^{\text{Newton}}$. +! Else we have to constrain the step size. For simplicity we will remove +! the index $_{(k)}$ and $_{(k+1)}$. To restict the step size, we have +! to put a constraint on $\textbf{x}$ with a Lagrange multiplier. +! Starting from the Taylor series of a function E (here, the energy) +! truncated at the 2nd order, we have: +! \begin{align*} +! E(\textbf{x}) = E +\textbf{g}^T \cdot \textbf{x} + \frac{1}{2} +! \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} + +! \mathcal{O}(\textbf{x}^2) +! \end{align*} + +! With the constraint on the norm of $\textbf{x}$ we can write the +! Lagrangian +! \begin{align*} +! \mathcal{L}(\textbf{x},\lambda) = E + \textbf{g}^T \cdot \textbf{x} +! + \frac{1}{2} \cdot \textbf{x}^T \cdot \textbf{H} \cdot \textbf{x} +! + \frac{1}{2} \lambda (\textbf{x}^T \cdot \textbf{x} - \Delta^2) +! \end{align*} +! Where: \newline +! $\lambda$ is the Lagrange multiplier \newline +! $E$ is the energy at the k-th iteration $\Leftrightarrow +! E(\textbf{x} = \textbf{0})$ \newline + +! To solve this equation, we search a stationary point where the first +! derivative of $\mathcal{L}$ with respect to $\textbf{x}$ becomes 0, i.e. +! \begin{align*} +! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}}=0 +! \end{align*} + +! The derivative is: +! \begin{align*} +! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}} +! = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} +! \end{align*} + +! So, we search $\textbf{x}$ such as: +! \begin{align*} +! \frac{\partial \mathcal{L}(\textbf{x},\lambda)}{\partial \textbf{x}} +! = \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} = 0 +! \end{align*} + +! We can rewrite that as: +! \begin{align*} +! \textbf{g} + \textbf{H} \cdot \textbf{x} + \lambda \cdot \textbf{x} +! = \textbf{g} + (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x} = 0 +! \end{align*} +! with $\textbf{I}$ is the identity matrix. + +! By doing so, the solution is: +! \begin{align*} +! (\textbf{H} +\textbf{I} \lambda) \cdot \textbf{x}= -\textbf{g} +! \end{align*} +! \begin{align*} +! \textbf{x}= - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g} +! \end{align*} +! with $\textbf{x}^T \textbf{x} = \Delta^2$. + +! We have to solve this previous equation to find this $\textbf{x}$ in the +! trust region, i.e. $||\textbf{x}|| = \Delta$. Now, this problem is +! just a one dimension problem because we can express $\textbf{x}$ as a +! function of $\lambda$: +! \begin{align*} +! \textbf{x}(\lambda) = - (\textbf{H} + \textbf{I} \lambda)^{-1} \cdot \textbf{g} +! \end{align*} + +! We start from the fact that the hessian is diagonalizable. So we have: +! \begin{align*} +! \textbf{H} = \textbf{W} \cdot \textbf{h} \cdot \textbf{W}^T +! \end{align*} +! with: \newline +! $\textbf{H}$, the hessian matrix \newline +! $\textbf{W}$, the matrix containing the eigenvectors \newline +! $\textbf{w}_i$, the i-th eigenvector, i.e. i-th column of $\textbf{W}$ \newline +! $\textbf{h}$, the matrix containing the eigenvalues in ascending order \newline +! $h_i$, the i-th eigenvalue in ascending order \newline + +! Now we use the fact that adding a constant on the diagonal just shifts +! the eigenvalues: +! \begin{align*} +! \textbf{H} + \textbf{I} \lambda = \textbf{W} \cdot (\textbf{h} +! +\textbf{I} \lambda) \cdot \textbf{W}^T +! \end{align*} + +! By doing so we can express $\textbf{x}$ as a function of $\lambda$ +! \begin{align*} +! \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot +! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i +! \end{align*} +! with $\lambda \neq - h_i$. + +! An interesting thing in our case is the norm of $\textbf{x}$, +! because we want $||\textbf{x}|| = \Delta$. Due to the orthogonality of +! the eigenvectors $\left\{\textbf{w} \right\} _{i=1}^n$ we have: +! \begin{align*} +! ||\textbf{x}(\lambda)||^2 = \sum_{i=1}^n \frac{(\textbf{w}_i^T \cdot +! \textbf{g})^2}{(h_i + \lambda)^2} +! \end{align*} + +! So the $||\textbf{x}(\lambda)||^2$ is just a function of $\lambda$. +! And if we study the properties of this function we see that: +! \begin{align*} +! \lim_{\lambda\to\infty} ||\textbf{x}(\lambda)|| = 0 +! \end{align*} +! and if $\textbf{w}_i^T \cdot \textbf{g} \neq 0$: +! \begin{align*} +! \lim_{\lambda\to -h_i} ||\textbf{x}(\lambda)|| = + \infty +! \end{align*} + +! From these limits and knowing that $h_1$ is the lowest eigenvalue, we +! can conclude that $||\textbf{x}(\lambda)||$ is a continuous and +! strictly decreasing function on the interval $\lambda \in +! (-h_1;\infty)$. Thus, there is one $\lambda$ in this interval which +! gives $||\textbf{x}(\lambda)|| = \Delta$, consequently there is one +! solution. + +! Since $\textbf{x} = - (\textbf{H} + \lambda \textbf{I})^{-1} \cdot +! \textbf{g}$ and we want to reduce the norm of $\textbf{x}$, clearly, +! $\lambda > 0$ ($\lambda = 0$ is the unconstraint solution). But the +! Newton method is only defined for a positive definite hessian matrix, +! so $(\textbf{H} + \textbf{I} \lambda)$ must be positive +! definite. Consequently, in the case where $\textbf{H}$ is not positive +! definite, to ensure the positive definiteness, $\lambda$ must be +! greater than $- h_1$. +! \begin{align*} +! \lambda > 0 \quad \text{and} \quad \lambda \geq - h_1 +! \end{align*} + +! From that there are five cases: +! - if $\textbf{H}$ is positive definite, $-h_1 < 0$, $\lambda \in (0,\infty)$ +! - if $\textbf{H}$ is not positive definite and $\textbf{w}_1^T \cdot +! \textbf{g} \neq 0$, $(\textbf{H} + \textbf{I} +! \lambda)$ +! must be positve definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty)$ +! - if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot +! \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| > \Delta$ by removing +! $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be +! positive definite, $-h_1 > 0$, $\lambda \in (-h_1, \infty$) +! - if $\textbf{H}$ is not positive definite , $\textbf{w}_1^T \cdot +! \textbf{g} = 0$ and $||\textbf{x}(-h_1)|| \leq \Delta$ by removing +! $j=1$ in the sum, $(\textbf{H} + \textbf{I} \lambda)$ must be +! positive definite, $-h_1 > 0$, $\lambda = -h_1$). This case is +! similar to the case where $\textbf{H}$ and $||\textbf{x}(\lambda = +! 0)|| \leq \Delta$ +! but we can also add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ +! time a constant to ensure the condition $||\textbf{x}(\lambda = +! -h_1)|| = \Delta$ and escape from the saddle point + +! Thus to find the solution, we can write: +! \begin{align*} +! ||\textbf{x}(\lambda)|| = \Delta +! \end{align*} +! \begin{align*} +! ||\textbf{x}(\lambda)|| - \Delta = 0 +! \end{align*} + +! Taking the square of this equation +! \begin{align*} +! (||\textbf{x}(\lambda)|| - \Delta)^2 = 0 +! \end{align*} +! we have a function with one minimum for the optimal $\lambda$. +! Since we have the formula of $||\textbf{x}(\lambda)||^2$, we solve +! \begin{align*} +! (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 +! \end{align*} + +! But in practice, it is more effective to solve: +! \begin{align*} +! (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 +! \end{align*} + +! To do that, we just use the Newton method with "trust_newton" using +! first and second derivative of $(||\textbf{x}(\lambda)||^2 - +! \Delta^2)^2$ with respect to $\textbf{x}$. +! This will give the optimal $\lambda$ to compute the +! solution $\textbf{x}$ with the formula seen previously: +! \begin{align*} +! \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot +! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i +! \end{align*} + +! The solution $\textbf{x}(\lambda)$ with the optimal $\lambda$ is our +! step to go from the (k)-th to the (k+1)-th iteration, is noted $\textbf{x}^*$. + + + + +! Evolution of the trust region + +! We initialize the trust region at the first iteration using a radius +! \begin{align*} +! \Delta = ||\textbf{x}(\lambda=0)|| +! \end{align*} + +! And for the next iteration the trust region will evolves depending of +! the agreement of the energy prediction based on the Taylor series +! truncated at the 2nd order and the real energy. If the Taylor series +! truncated at the 2nd order represents correctly the energy landscape +! the trust region will be extent else it will be reduced. In order to +! mesure this agreement we use the ratio rho cf. "rho_model" and +! "trust_e_model". From that we use the following values: +! - if $\rho \geq 0.75$, then $\Delta = 2 \Delta$, +! - if $0.5 \geq \rho < 0.75$, then $\Delta = \Delta$, +! - if $0.25 \geq \rho < 0.5$, then $\Delta = 0.5 \Delta$, +! - if $\rho < 0.25$, then $\Delta = 0.25 \Delta$. + +! In addition, if $\rho < 0.1$ the iteration is cancelled, so it +! restarts with a smaller trust region until the energy decreases. + + + + +! Summary + +! To summarize, knowing the hessian (eigenvectors and eigenvalues), the +! gradient and the radius of the trust region we can compute the norm of +! the Newton step +! \begin{align*} +! ||\textbf{x}(\lambda = 0)||^2 = ||- \textbf{H}^{-1} \cdot \textbf{g}||^2 = \sum_{i=1}^n +! \frac{(\textbf{w}_i^T \cdot \textbf{g})^2}{(h_i + \lambda)^2}, \quad h_i \neq 0 +! \end{align*} + +! - if $h_1 \geq 0$, $||\textbf{x}(\lambda = 0)|| \leq \Delta$ and +! $\textbf{x}(\lambda=0)$ is in the trust region and it is not +! necessary to put a constraint on $\textbf{x}$, the solution is the +! unconstrained one, $\textbf{x}^* = \textbf{x}(\lambda = 0)$. +! - else if $h_1 < 0$, $\textbf{w}_1^T \cdot \textbf{g} = 0$ and +! $||\textbf{x}(\lambda = -h_1)|| \leq \Delta$ (by removing $j=1$ in +! the sum), the solution is $\textbf{x}^* = \textbf{x}(\lambda = +! -h_1)$, similarly to the previous case. +! But we can add to $\textbf{x}$, the first eigenvector $\textbf{W}_1$ +! time a constant to ensure the condition $||\textbf{x}(\lambda = +! -h_1)|| = \Delta$ and escape from the saddle point +! - else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} \neq 0$ we +! have to search $\lambda \in (-h_1, \infty)$ such as +! $\textbf{x}(\lambda) = \Delta$ by solving with the Newton method +! \begin{align*} +! (||\textbf{x}(\lambda)||^2 - \Delta^2)^2 = 0 +! \end{align*} +! or +! \begin{align*} +! (\frac{1}{||\textbf{x}(\lambda)||^2} - \frac{1}{\Delta^2})^2 = 0 +! \end{align*} +! which is numerically more stable. And finally compute +! \begin{align*} +! \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot +! \textbf{g}}{h_i + \lambda} \cdot \textbf{w}_i +! \end{align*} +! - else if $h_1 \geq 0$ and $||\textbf{x}(\lambda = 0)|| > \Delta$ we +! do exactly the same thing that the previous case but we search +! $\lambda \in (0, \infty)$ +! - else if $h_1 < 0$ and $\textbf{w}_1^T \cdot \textbf{g} = 0$ and +! $||\textbf{x}(\lambda = -h_1)|| > \Delta$ (by removing $j=1$ in the +! sum), again we do exactly the same thing that the previous case +! searching $\lambda \in (-h_1, \infty)$. + + +! For the cases where $\textbf{w}_1^T \cdot \textbf{g} = 0$ it is not +! necessary in fact to remove the $j = 1$ in the sum since the term +! where $h_i - \lambda < 10^{-6}$ are not computed. + +! After that, we take this vector $\textbf{x}^*$, called "x", and we do +! the transformation to an antisymmetric matrix $\textbf{X}$, called +! m_x. This matrix $\textbf{X}$ will be used to compute a rotation +! matrix $\textbf{R}= \exp(\textbf{X})$ in "rotation_matrix". + +! NB: +! An improvement can be done using a elleptical trust region. + + + + +! Code + +! Provided: +! | mo_num | integer | number of MOs | + +! Cf. qp_edit in orbital optimization section, for some constants/thresholds + +! Input: +! | m | integer | number of MOs | +! | n | integer | m*(m-1)/2 | +! | n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +! | H(n,n2) | double precision | hessian | +! | v_grad(n) | double precision | gradient | +! | e_val(n) | double precision | eigenvalues of the hessian | +! | W(n, n) | double precision | eigenvectors of the hessian | +! | rho | double precision | agreement between the model and the reality, | +! | | | represents the quality of the energy prediction | +! | nb_iter | integer | number of iteration | + +! Input/Ouput: +! | delta | double precision | radius of the trust region | + +! Output: +! | x(n) | double precision | vector containing the step | + +! Internal: +! | accu | double precision | temporary variable to compute the step | +! | lambda | double precision | lagrange multiplier | +! | trust_radius2 | double precision | square of the radius of the trust region | +! | norm2_x | double precision | norm^2 of the vector x | +! | norm2_g | double precision | norm^2 of the vector containing the gradient | +! | tmp_wtg(n) | double precision | tmp_wtg(i) = w_i^T . g | +! | i, j, k | integer | indexes | + +! Function: +! | dnrm2 | double precision | Blas function computing the norm | +! | f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) | + + +subroutine trust_region_step(n,n2,nb_iter,v_grad,rho,e_val,w,x,delta) + + include 'pi.h' + + !BEGIN_DOC + ! Compuet the step in the trust region + !END_DOC + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n,n2 + double precision, intent(in) :: v_grad(n), rho + integer, intent(inout) :: nb_iter + double precision, intent(in) :: e_val(n), w(n,n2) + + ! inout + double precision, intent(inout) :: delta + + ! out + double precision, intent(out) :: x(n) + + ! Internal + double precision :: accu, lambda, trust_radius2 + double precision :: norm2_x, norm2_g + double precision, allocatable :: tmp_wtg(:) + integer :: i,j,k + double precision :: t1,t2,t3 + integer :: n_neg_eval + + + ! Functions + double precision :: ddot, dnrm2 + double precision :: f_norm_trust_region_omp + + print*,'' + print*,'==================' + print*,'---Trust_region---' + print*,'==================' + + call wall_time(t1) + + ! Allocation + allocate(tmp_wtg(n)) + +! Initialization and norm + +! The norm of the step size will be useful for the trust region +! algorithm. We start from a first guess and the radius of the trust +! region will evolve during the optimization. + +! avoid_saddle is actually a test to avoid saddle points + + +! Initialization of the Lagrange multiplier +lambda = 0d0 + +! List of w^T.g, to avoid the recomputation +tmp_wtg = 0d0 +if (n == n2) then + do j = 1, n + do i = 1, n + tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i) + enddo + enddo +else + ! For the diagonal case + do j = 1, n + k = int(w(j,1)+1d-15) + tmp_wtg(j) = v_grad(k) + enddo +endif + +! Replacement of the small tmp_wtg corresponding to a negative eigenvalue +! in the case of avoid_saddle +if (avoid_saddle .and. e_val(1) < - thresh_eig) then + i = 2 + ! Number of negative eigenvalues + do while (e_val(i) < - thresh_eig) + if (tmp_wtg(i) < thresh_wtg2) then + if (version_avoid_saddle == 1) then + tmp_wtg(i) = 1d0 + elseif (version_avoid_saddle == 2) then + tmp_wtg(i) = DABS(e_val(i)) + elseif (version_avoid_saddle == 3) then + tmp_wtg(i) = dsqrt(DABS(e_val(i))) + else + tmp_wtg(i) = thresh_wtg2 + endif + endif + i = i + 1 + enddo + + ! For the fist one it's a little bit different + if (tmp_wtg(1) < thresh_wtg2) then + tmp_wtg(1) = 0d0 + endif + +endif + +! Norm^2 of x, ||x||^2 +norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) +! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta +! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm +! Anyway if the step is too big it will be reduced +!print*,'||x||^2 :', norm2_x + +! Norm^2 of the gradient, ||v_grad||^2 +norm2_g = (dnrm2(n,v_grad,1))**2 +!print*,'||grad||^2 :', norm2_g + +! Trust radius initialization + +! At the first iteration (nb_iter = 0) we initialize the trust region +! with the norm of the step generate by the Newton's method ($\textbf{x}_1 = +! (\textbf{H}_0)^{-1} \cdot \textbf{g}_0$, +! we compute this norm using f_norm_trust_region_omp as explain just +! below) + + +! trust radius +if (nb_iter == 0) then + trust_radius2 = norm2_x + ! To avoid infinite loop of cancellation of this first step + ! without changing delta + nb_iter = 1 + + ! Compute delta, delta = sqrt(trust_radius) + delta = dsqrt(trust_radius2) +endif + +! Modification of the trust radius + +! In function of rho (which represents the agreement between the model +! and the reality, cf. rho_model) the trust region evolves. We update +! delta (the radius of the trust region). + +! To avoid too big trust region we put a maximum size. + + +! Modification of the trust radius in function of rho +if (rho >= 0.75d0) then + delta = 2d0 * delta +elseif (rho >= 0.5d0) then + delta = delta +elseif (rho >= 0.25d0) then + delta = 0.5d0 * delta +else + delta = 0.25d0 * delta +endif + +! Maximum size of the trust region +!if (delta > 0.5d0 * n * pi) then +! delta = 0.5d0 * n * pi +! print*,'Delta > delta_max, delta = 0.5d0 * n * pi' +!endif + +if (delta > 1d10) then + delta = 1d10 +endif + +!print*, 'Delta :', delta + +! Calculation of the optimal lambda + +! We search the solution of $(||x||^2 - \Delta^2)^2 = 0$ +! - If $||\textbf{x}|| > \Delta$ or $h_1 < 0$ we have to add a constant +! $\lambda > 0 \quad \text{and} \quad \lambda > -h_1$ +! - If $||\textbf{x}|| \leq \Delta$ and $h_1 \geq 0$ the solution is the +! unconstrained one, $\lambda = 0$ + +! You will find more details at the beginning + + +! By giving delta, we search (||x||^2 - delta^2)^2 = 0 +! and not (||x||^2 - delta)^2 = 0 + +! Research of lambda to solve ||x(lambda)|| = Delta + +! Display +!print*, 'e_val(1) = ', e_val(1) +!print*, 'w_1^T.g =', tmp_wtg(1) + +! H positive definite +if (e_val(1) > - thresh_eig) then + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) + !print*, '||x(0)||=', dsqrt(norm2_x) + !print*, 'Delta=', delta + + ! H positive definite, ||x(lambda = 0)|| <= Delta + if (dsqrt(norm2_x) <= delta) then + !print*, 'H positive definite, ||x(lambda = 0)|| <= Delta' + !print*, 'lambda = 0, no lambda optimization' + lambda = 0d0 + + ! H positive definite, ||x(lambda = 0)|| > Delta + else + ! Constraint solution + !print*, 'H positive definite, ||x(lambda = 0)|| > Delta' + !print*,'Computation of the optimal lambda...' + call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + endif + +! H indefinite +else + if (DABS(tmp_wtg(1)) < thresh_wtg) then + norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1)) + !print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) + endif + + ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta + if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then + ! Add e_val(1) in order to have (H - e_val(1) I) positive definite + !print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta' + !print*, 'lambda = -e_val(1), no lambda optimization' + lambda = - e_val(1) + + ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta + ! and + ! H indefinite, w_1^T.g =/= 0 + else + ! Constraint solution/ add lambda + !if (DABS(tmp_wtg(1)) < thresh_wtg) then + ! print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta' + !else + ! print*, 'H indefinite, w_1^T.g =/= 0' + !endif + !print*, 'Computation of the optimal lambda...' + call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) + endif + +endif + +! Recomputation of the norm^2 of the step x +norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) +print*,'' +print*,'Summary after the trust region:' +print*,'lambda:', lambda +print*,'||x||:', dsqrt(norm2_x) +print*,'delta:', delta + +! Calculation of the step x + +! x refers to $\textbf{x}^*$ +! We compute x in function of lambda using its formula : +! \begin{align*} +! \textbf{x}^* = \textbf{x}(\lambda) = - \sum_{i=1}^n \frac{\textbf{w}_i^T \cdot \textbf{g}}{h_i +! + \lambda} \cdot \textbf{w}_i +! \end{align*} + + +! Initialisation +x = 0d0 + +! Calculation of the step x + +if (n == n2) then + ! Normal version + if (.not. absolute_eig) then + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda) + enddo + endif + enddo + + ! Version to use the absolute value of the eigenvalues + else + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda) + enddo + endif + enddo + + endif +else + ! If the hessian is diagonal + ! Normal version + if (.not. absolute_eig) then + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + j = int(w(i,1) + 1d-15) + x(j) = - tmp_wtg(i) * 1d0 / (e_val(i) + lambda) + endif + enddo + + ! Version to use the absolute value of the eigenvalues + else + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig) then + j = int(w(i,1) + 1d-15) + x(j) = - tmp_wtg(i) * 1d0 / (DABS(e_val(i)) + lambda) + endif + enddo + + endif +endif + +double precision :: beta, norm_x + +! Test +! If w_1^T.g = 0, the lim of ||x(lambda)|| when lambda tend to -e_val(1) +! is not + infinity. So ||x(lambda=-e_val(1))|| < delta, we add the first +! eigenvectors multiply by a constant to ensure the condition +! ||x(lambda=-e_val(1))|| = delta and escape the saddle point +if (avoid_saddle .and. e_val(1) < - thresh_eig) then + if (tmp_wtg(1) < 1d-15 .and. (1d0 - dsqrt(norm2_x)/delta) > 1d-3 ) then + + ! norm of x + norm_x = dnrm2(n,x,1) + + ! Computes the coefficient for the w_1 + beta = delta**2 - norm_x**2 + + ! Updates the step x + x = x + W(:,1) * dsqrt(beta) + + ! Recomputes the norm to check + norm_x = dnrm2(n,x,1) + + print*, 'Add w_1 * dsqrt(delta^2 - ||x||^2):' + print*, '||x||', norm_x + endif +endif + +! Transformation of x + +! x is a vector of size n, so it can be write as a m by m +! antisymmetric matrix m_x cf. "mat_to_vec_index" and "vec_to_mat_index". + + +! ! Step transformation vector -> matrix +! ! Vector with n element -> mo_num by mo_num matrix +! do j = 1, m +! do i = 1, m +! if (i>j) then +! call mat_to_vec_index(i,j,k) +! m_x(i,j) = x(k) +! else +! m_x(i,j) = 0d0 +! endif +! enddo +! enddo +! +! ! Antisymmetrization of the previous matrix +! do j = 1, m +! do i = 1, m +! if (i lower diagonal matrix (p,q), p > q + +! If a matrix is antisymmetric it can be reshaped as a vector. And the +! vector can be reshaped as an antisymmetric matrix + +! \begin{align*} +! \begin{pmatrix} +! 0 & -1 & -2 & -4 \\ +! 1 & 0 & -3 & -5 \\ +! 2 & 3 & 0 & -6 \\ +! 4 & 5 & 6 & 0 +! \end{pmatrix} +! \Leftrightarrow +! \begin{pmatrix} +! 1 & 2 & 3 & 4 & 5 & 6 +! \end{pmatrix} +! \end{align*} + +! !!! Here the algorithm only work for the lower diagonal !!! + +! Input: +! | i | integer | index in the vector | + +! Ouput: +! | p,q | integer | corresponding indexes in the lower diagonal of a matrix | +! | | | p > q, | +! | | | p -> row, | +! | | | q -> column | + + +subroutine vec_to_mat_index(i,p,q) + + include 'pi.h' + + !BEGIN_DOC + ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing + ! its index i a vector + !END_DOC + + implicit none + + ! Variables + + ! in + integer,intent(in) :: i + + ! out + integer, intent(out) :: p,q + + ! internal + integer :: a,b + double precision :: da + + da = 0.5d0*(1+ sqrt(1d0+8d0*DBLE(i))) + a = INT(da) + if ((a*(a-1))/2==i) then + p = a-1 + else + p = a + endif + b = p*(p-1)/2 + + ! Matrix element indexes + p = p + 1 + q = i - b + +end subroutine diff --git a/src/utils_trust_region/vec_to_mat_v2.irp.f b/src/utils_trust_region/vec_to_mat_v2.irp.f new file mode 100644 index 00000000..e184d3ba --- /dev/null +++ b/src/utils_trust_region/vec_to_mat_v2.irp.f @@ -0,0 +1,39 @@ +! Vect to antisymmetric matrix using mat_to_vec_index + +! Vector to antisymmetric matrix transformation using mat_to_vec_index +! subroutine. + +! Can be done in OMP (for the first part and with omp critical for the second) + + +subroutine vec_to_mat_v2(n,m,v_x,m_x) + + !BEGIN_DOC + ! Vector to antisymmetric matrix + !END_DOC + + implicit none + + integer, intent(in) :: n,m + double precision, intent(in) :: v_x(n) + double precision, intent(out) :: m_x(m,m) + + integer :: i,j,k + + ! 1D -> 2D lower diagonal + m_x = 0d0 + do j = 1, m - 1 + do i = j + 1, m + call mat_to_vec_index(i,j,k) + m_x(i,j) = v_x(k) + enddo + enddo + + ! Antisym + do i = 1, m - 1 + do j = i + 1, m + m_x(i,j) = - m_x(j,i) + enddo + enddo + +end From d6f7ec60f8e140982a24b282a36d027397d68d93 Mon Sep 17 00:00:00 2001 From: ydamour Date: Tue, 18 Apr 2023 13:22:46 +0200 Subject: [PATCH 042/337] add mo localization --- src/mo_localization/84.mo_localization.bats | 97 + src/mo_localization/EZFIO.cfg | 54 + src/mo_localization/NEED | 3 + src/mo_localization/README.md | 113 + src/mo_localization/break_spatial_sym.irp.f | 27 + src/mo_localization/debug_gradient_loc.irp.f | 65 + src/mo_localization/debug_hessian_loc.irp.f | 65 + src/mo_localization/kick_the_mos.irp.f | 16 + src/mo_localization/localization.irp.f | 520 +++ src/mo_localization/localization_sub.irp.f | 2008 ++++++++++++ src/mo_localization/org/TANGLE_org_mode.sh | 7 + src/mo_localization/org/break_spatial_sym.org | 28 + .../org/debug_gradient_loc.org | 67 + src/mo_localization/org/debug_hessian_loc.org | 67 + src/mo_localization/org/kick_the_mos.org | 18 + src/mo_localization/org/localization.org | 2899 +++++++++++++++++ 16 files changed, 6054 insertions(+) create mode 100644 src/mo_localization/84.mo_localization.bats create mode 100644 src/mo_localization/EZFIO.cfg create mode 100644 src/mo_localization/NEED create mode 100644 src/mo_localization/README.md create mode 100644 src/mo_localization/break_spatial_sym.irp.f create mode 100644 src/mo_localization/debug_gradient_loc.irp.f create mode 100644 src/mo_localization/debug_hessian_loc.irp.f create mode 100644 src/mo_localization/kick_the_mos.irp.f create mode 100644 src/mo_localization/localization.irp.f create mode 100644 src/mo_localization/localization_sub.irp.f create mode 100755 src/mo_localization/org/TANGLE_org_mode.sh create mode 100644 src/mo_localization/org/break_spatial_sym.org create mode 100644 src/mo_localization/org/debug_gradient_loc.org create mode 100644 src/mo_localization/org/debug_hessian_loc.org create mode 100644 src/mo_localization/org/kick_the_mos.org create mode 100644 src/mo_localization/org/localization.org diff --git a/src/mo_localization/84.mo_localization.bats b/src/mo_localization/84.mo_localization.bats new file mode 100644 index 00000000..b34c0bd5 --- /dev/null +++ b/src/mo_localization/84.mo_localization.bats @@ -0,0 +1,97 @@ +#!/usr/bin/env bats + +source $QP_ROOT/tests/bats/common.bats.sh +source $QP_ROOT/quantum_package.rc + +zero () { + if [ -z "$1" ]; then echo 0.0; else echo $1; fi +} + +function run() { + thresh1=1e-10 + thresh2=1e-12 + thresh3=1e-4 + test_exe scf || skip + qp set_file $1 + qp edit --check + qp reset -d + qp set_frozen_core + qp set localization localization_method boys + file="$(echo $1 | sed 's/.ezfio//g')" + energy="$(cat $1/hartree_fock/energy)" + fb_err1="$(qp run debug_gradient_loc | grep 'Max error' | tail -n 1 | awk '{print $3}')" + fb_err2="$(qp run debug_hessian_loc | grep 'Max error' | tail -n 1 | awk '{print $3}')" + qp run localization > $file.loc.out + fb_energy="$(qp run print_energy | grep -A 1 'Nuclear repulsion energy' | tail -n 1 )" + fb_c="$(cat $file.loc.out | grep 'Criterion:Core' | tail -n 1 | awk '{print $3}')i" + fb_i="$(cat $file.loc.out | grep 'Criterion:Inactive' | tail -n 1 | awk '{print $3}')" + fb_a="$(cat $file.loc.out | grep 'Criterion:Active' | tail -n 1 | awk '{print $3}')" + fb_v="$(cat $file.loc.out | grep 'Criterion:Virtual' | tail -n 1 | awk '{print $3}')" + qp reset -a + qp run scf + qp set_frozen_core + qp set localization localization_method pipek + pm_err1="$(qp run debug_gradient_loc | grep 'Max error' | tail -n 1 | awk '{print $3}')" + pm_err2="$(qp run debug_hessian_loc | grep 'Max error' | tail -n 1 | awk '{print $3}')" + qp run localization > $file.loc.out + pm_c="$(cat $file.loc.out | grep 'Criterion:Core' | tail -n 1 | awk '{print $3}')i" + pm_i="$(cat $file.loc.out | grep 'Criterion:Inactive' | tail -n 1 | awk '{print $3}')" + pm_a="$(cat $file.loc.out | grep 'Criterion:Active' | tail -n 1 | awk '{print $3}')" + pm_v="$(cat $file.loc.out | grep 'Criterion:Virtual' | tail -n 1 | awk '{print $3}')" + pm_energy="$(qp run print_energy | grep -A 1 'Nuclear repulsion energy' | tail -n 1 )" + qp set localization localization_method boys + qp reset -a + qp run scf + qp set_frozen_core + eq $energy $fb_energy $thresh1 + eq $fb_err1 0.0 $thresh2 + eq $fb_err2 0.0 $thresh2 + eq $energy $pm_energy $thresh1 + eq $pm_err1 0.0 $thresh2 + eq $pm_err2 0.0 $thresh2 + fb_c=$(zero $fb_c) + fb_i=$(zero $fb_i) + fb_a=$(zero $fb_a) + fb_v=$(zero $fb_v) + pm_c=$(zero $pm_c) + pm_i=$(zero $pm_i) + pm_a=$(zero $pm_a) + pm_v=$(zero $pm_v) + eq $fb_c $2 $thresh3 + eq $fb_i $3 $thresh3 + eq $fb_a $4 $thresh3 + eq $fb_v $5 $thresh3 + eq $pm_c $6 $thresh3 + eq $pm_i $7 $thresh3 + eq $pm_a $8 $thresh3 + eq $pm_v $9 $thresh3 +} + +@test "b2_stretched" { +run b2_stretched.ezfio -32.1357551678876 -47.0041982094667 0.0 -223.470015856259 -1.99990778964451 -2.51376723927071 0.0 -12.8490602539275 +} + +@test "clo" { +run clo.ezfio -44.1624001765291 -32.4386660941387 0.0 -103.666309287187 -5.99985418946811 -5.46871580225222 0.0 -20.2480064922275 +} + +@test "clf" { +run clf.ezfio -47.5143398826967 -35.7206886315104 0.0 -107.043029033468 -5.99994222062230 -6.63916513458470 0.0 -19.7035159913484 +} + +@test "h2o2" { +run h2o2.ezfio -7.76848143170524 -30.9694344369829 0.0 -175.898343829453 -1.99990497554575 -5.62980322957485 0.0 -33.5699813186666 +} + +@test "h2o" { +run h2o.ezfio 0.0 -2.52317434969591 0.0 -45.3136377925359 0.0 -3.01248365356981 0.0 -22.4470831240924 +} + +@test "h3coh" { +run h3coh.ezfio -3.66763692804590 -24.0463089480870 0.0 -111.485948435075 -1.99714061342078 -4.89242181322988 0.0 -23.6405412057679 +} + +@test "n2h4" { +run n2h4.ezfio -7.46608163002070 -35.7632174051822 0.0 -305.913449004632 -1.99989326143356 -4.62496615892268 0.0 -51.5171904685553 +} + diff --git a/src/mo_localization/EZFIO.cfg b/src/mo_localization/EZFIO.cfg new file mode 100644 index 00000000..d1b844a5 --- /dev/null +++ b/src/mo_localization/EZFIO.cfg @@ -0,0 +1,54 @@ +[localization_method] +type: character*(32) +doc: Method for the orbital localization. boys: Foster-Boys, pipek: Pipek-Mezey. +interface: ezfio,provider,ocaml +default: boys + +[localization_max_nb_iter] +type: integer +doc: Maximal number of iterations for the orbital localization. +interface: ezfio,provider,ocaml +default: 1000 + +[localization_use_hessian] +type: logical +doc: If true, it uses the trust region algorithm with the gradient and the diagonal of the hessian. Else it computes the rotation between each pair of MOs that should be applied to maximize/minimize the localization criterion. The last option is not easy to converge. +interface: ezfio,provider,ocaml +default: true + +[auto_mo_class] +type: logical +doc: If true, set automatically the classes. +interface: ezfio,provider,ocaml +default: true + +[thresh_loc_max_elem_grad] +type: double precision +doc: Threshold for the convergence, the localization exits when the largest element in the gradient is smaller than thresh_localization_max_elem_grad. +interface: ezfio,provider,ocaml +default: 1.e-6 + +[kick_in_mos] +type: logical +doc: If True, it applies a rotation of an angle angle_pre_rot between the MOs of a same mo_class before the localization. +interface: ezfio,provider,ocaml +default: true + +[angle_pre_rot] +type: double precision +doc: To define the angle for the rotation of the MOs before the localization (in rad). +interface: ezfio,provider,ocaml +default: 0.1 + +[sort_mos_by_e] +type: logical +doc: If True, the MOs are sorted using the diagonal elements of the Fock matrix. +interface: ezfio,provider,ocaml +default: false + +[debug_hf] +type: logical +doc: If True, prints the HF energy before/after the different steps of the localization. Only for debugging. +interface: ezfio,provider,ocaml +default: false + diff --git a/src/mo_localization/NEED b/src/mo_localization/NEED new file mode 100644 index 00000000..b438f39d --- /dev/null +++ b/src/mo_localization/NEED @@ -0,0 +1,3 @@ +hartree_fock +utils_trust_region +determinants diff --git a/src/mo_localization/README.md b/src/mo_localization/README.md new file mode 100644 index 00000000..c28a5ee1 --- /dev/null +++ b/src/mo_localization/README.md @@ -0,0 +1,113 @@ +# Orbital localisation +To localize the MOs: +``` +qp run localization +``` +By default, the different otbital classes are automatically set by splitting +the orbitales in the following classes: +- Core -> Core +- Active, doubly occupied -> Inactive +- Active, singly occupied -> Active +- Active, empty -> Virtual +- Deleted -> Deleted +The orbitals will be localized among each class, excpect the deleted ones. +If you want to choose another splitting, you can set +``` +qp set mo_localization auto_mo_class false +``` +and define the classes with +``` +qp set_mo_class -c [] -a [] -v [] -i [] -d [] +``` +for more information +``` +qp set_mo_class -q +``` +We don't care about the name of the +mo classes. The algorithm just localizes all the MOs of +a given class between them, for all the classes, except the deleted MOs. +If you are using the last option don't forget to reset the initial mo classes +after the localization. + +Before the localization, a kick is done for each mo class +(except the deleted ones) to break the MOs. This is done by +doing a given rotation between the MOs. +This feature can be removed by setting: +``` +qp set localization kick_in_mos false +``` +and the default angle for the rotation can be changed with: +``` +qp set localization angle_pre_rot 1e-3 # or something else +``` + +After the localization, the MOs of each class (except the deleted ones) +can be sorted between them using the diagonal elements of +the fock matrix with: +``` +qp set localization sort_mos_by_e true +``` + +You can check the Hartree-Fock energy before/during/after the localization +by putting (only for debugging): +``` +qp set localization debug_hf true +``` + +## Foster-Boys & Pipek-Mezey +Foster-Boys: +``` +qp set localization localization_method boys +``` + +Pipek-Mezey: +``` +qp set localization localization_method pipek +``` + +# Break the spatial symmetry of the MOs +This program work exactly as the localization. +To break the spatial symmetry of the MOs: +``` +qp run break_spatial_sym +``` +The default angle for the rotations is too big for this kind of +application, a value between 1e-3 and 1e-6 should break the spatial +symmetry with just a small change in the energy: +``` +qp set localization angle_pre_rot 1e-3 +``` + +# With or without hessian + trust region +With hessian + trust region +``` +qp set localization localisation_use_hessian true +``` +It uses the trust region algorithm with the diagonal of the hessian of the +localization criterion with respect to the MO rotations. + +Without the hessian and the trust region +``` +qp set localization localisation_use_hessian false +``` +By doing so it does not require to store the hessian but the +convergence is not easy, in particular for virtual MOs. +It seems that it not possible to converge with Pipek-Mezey +localization with this approach. + +# Parameters +Some other parameters are available for the localization (qp edit for more details). + +# Tests +``` +qp test +``` + +# Org files +The org files are stored in the directory org in order to avoid overwriting on user changes. +The org files can be modified, to export the change to the source code, run +``` +./TANGLE_org_mode.sh +mv *.irp.f ../. +``` + diff --git a/src/mo_localization/break_spatial_sym.irp.f b/src/mo_localization/break_spatial_sym.irp.f new file mode 100644 index 00000000..2048aca6 --- /dev/null +++ b/src/mo_localization/break_spatial_sym.irp.f @@ -0,0 +1,27 @@ +! ! A small program to break the spatial symmetry of the MOs. + +! ! You have to defined your MO classes or set security_mo_class to false +! ! with: +! ! qp set orbital_optimization security_mo_class false + +! ! The default angle for the rotations is too big for this kind of +! ! application, a value between 1e-3 and 1e-6 should break the spatial +! ! symmetry with just a small change in the energy. + + +program break_spatial_sym + + !BEGIN_DOC + ! Break the symmetry of the MOs with a rotation + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + call set_classes_loc + call apply_pre_rotation + call unset_classes_loc + +end diff --git a/src/mo_localization/debug_gradient_loc.irp.f b/src/mo_localization/debug_gradient_loc.irp.f new file mode 100644 index 00000000..d935e782 --- /dev/null +++ b/src/mo_localization/debug_gradient_loc.irp.f @@ -0,0 +1,65 @@ +program debug_gradient_loc + + !BEGIN_DOC + ! Check if the gradient is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: v_grad(:), v_grad2(:) + double precision :: norm, max_elem, threshold, max_error + integer :: i, nb_error + + threshold = 1d-12 + + list_size = dim_list_act_orb + + allocate(list(list_size)) + + list = list_act + + n = list_size*(list_size-1)/2 + + allocate(v_grad(n),v_grad2(n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call gradient_FB(n,list_size,list,v_grad,max_elem,norm) + call gradient_FB_omp(n,list_size,list,v_grad2,max_elem,norm) + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey' + call gradient_PM(n,list_size,list,v_grad,max_elem,norm) + call gradient_PM(n,list_size,list,v_grad2,max_elem,norm) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,v_grad(i) + enddo + + v_grad = v_grad - v_grad2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(v_grad(i)) > threshold) then + print*,v_grad(i) + nb_error = nb_error + 1 + if (dabs(v_grad(i)) > max_elem) then + max_elem = v_grad(i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(v_grad,v_grad2) + +end diff --git a/src/mo_localization/debug_hessian_loc.irp.f b/src/mo_localization/debug_hessian_loc.irp.f new file mode 100644 index 00000000..3ee4f0fa --- /dev/null +++ b/src/mo_localization/debug_hessian_loc.irp.f @@ -0,0 +1,65 @@ +program debug_hessian_loc + + !BEGIN_DOC + ! Check if the hessian is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: H(:), H2(:) + double precision :: threshold, max_error, max_elem + integer :: i, nb_error + + threshold = 1d-12 + + list_size = dim_list_act_orb + + allocate(list(list_size)) + + list = list_act + + n = list_size*(list_size-1)/2 + + allocate(H(n),H2(n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call hessian_FB(n,list_size,list,H) + call hessian_FB_omp(n,list_size,list,H2) + elseif(localization_method == 'pipek') then + print*,'Pipek-Mezey' + call hessian_PM(n,list_size,list,H) + call hessian_PM(n,list_size,list,H2) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,H(i) + enddo + + H = H - H2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(H(i)) > threshold) then + print*,H(i) + nb_error = nb_error + 1 + if (dabs(H(i)) > max_elem) then + max_elem = H(i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(H,H2) + +end diff --git a/src/mo_localization/kick_the_mos.irp.f b/src/mo_localization/kick_the_mos.irp.f new file mode 100644 index 00000000..b6c77c9e --- /dev/null +++ b/src/mo_localization/kick_the_mos.irp.f @@ -0,0 +1,16 @@ +program kick_the_mos + + !BEGIN_DOC + ! To do a small rotation of the MOs + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + call set_classes_loc + call apply_pre_rotation + call unset_classes_loc + +end diff --git a/src/mo_localization/localization.irp.f b/src/mo_localization/localization.irp.f new file mode 100644 index 00000000..7ccb2f5a --- /dev/null +++ b/src/mo_localization/localization.irp.f @@ -0,0 +1,520 @@ +program localization + + implicit none + + call set_classes_loc + call run_localization + call unset_classes_loc + +end + + + + +! Variables: +! | pre_rot(mo_num, mo_num) | double precision | Matrix for the pre rotation | +! | R(mo_num,mo_num) | double precision | Rotation matrix | +! | tmp_R(:,:) | double precision | Rottation matrix in a subsapce | +! | prev_mos(ao_num, mo_num) | double precision | Previous mo_coef | +! | spatial_extent(mo_num) | double precision | Spatial extent of the orbitals | +! | criterion | double precision | Localization criterion | +! | prev_criterion | double precision | Previous criterion | +! | criterion_model | double precision | Estimated next criterion | +! | rho | double precision | Ratio to measure the agreement between the model | +! | | | and the reality | +! | delta | double precision | Radisu of the trust region | +! | norm_grad | double precision | Norm of the gradient | +! | info | integer | for dsyev from Lapack | +! | max_elem | double precision | maximal element in the gradient | +! | v_grad(:) | double precision | Gradient | +! | H(:,:) | double precision | Hessian (diagonal) | +! | e_val(:) | double precision | Eigenvalues of the hessian | +! | W(:,:) | double precision | Eigenvectors of the hessian | +! | tmp_x(:) | double precision | Step in 1D (in a subaspace) | +! | tmp_m_x(:,:) | double precision | Step in 2D (in a subaspace) | +! | tmp_list(:) | double precision | List of MOs in a mo_class | +! | i,j,k | integer | Indexes in the full MO space | +! | tmp_i, tmp_j, tmp_k | integer | Indexes in a subspace | +! | l | integer | Index for the mo_class | +! | key(:) | integer | Key to sort the eigenvalues of the hessian | +! | nb_iter | integer | Number of iterations | +! | must_exit | logical | To exit the trust region loop | +! | cancel_step | logical | To cancel a step | +! | not_*converged | logical | To localize the different mo classes | +! | t* | double precision | To measure the time | +! | n | integer | mo_num*(mo_num-1)/2, number of orbital parameters | +! | tmp_n | integer | dim_subspace*(dim_subspace-1)/2 | +! | | | Number of dimension in the subspace | + +! Variables in qp_edit for the localization: +! | localization_method | +! | localization_max_nb_iter | +! | default_mo_class | +! | thresh_loc_max_elem_grad | +! | kick_in_mos | +! | angle_pre_rot | + +! + all the variables for the trust region + +! Cf. qp_edit orbital optimization + + +subroutine run_localization + + include 'pi.h' + + BEGIN_DOC + ! Orbital localization + END_DOC + + implicit none + + ! Variables + double precision, allocatable :: pre_rot(:,:), R(:,:) + double precision, allocatable :: prev_mos(:,:), spatial_extent(:), tmp_R(:,:) + double precision :: criterion, norm_grad + integer :: i,j,k,l,p, tmp_i, tmp_j, tmp_k + integer :: info + integer :: n, tmp_n, tmp_list_size + double precision, allocatable :: v_grad(:), H(:), tmp_m_x(:,:), tmp_x(:),W(:),e_val(:) + double precision :: max_elem, t1, t2, t3, t4, t5, t6 + integer, allocatable :: tmp_list(:), key(:) + double precision :: prev_criterion, rho, delta, criterion_model + integer :: nb_iter, nb_sub_iter + logical :: not_converged, not_core_converged + logical :: not_act_converged, not_inact_converged, not_virt_converged + logical :: use_trust_region, must_exit, cancel_step,enforce_step_cancellation + + n = mo_num*(mo_num-1)/2 + + ! Allocation + allocate(spatial_extent(mo_num)) + allocate(pre_rot(mo_num, mo_num), R(mo_num, mo_num)) + allocate(prev_mos(ao_num, mo_num)) + + ! Locality before the localization + call compute_spatial_extent(spatial_extent) + + ! Choice of the method + print*,'' + print*,'Localization method:',localization_method + if (localization_method == 'boys') then + print*,'Foster-Boys localization' + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey localization' + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + print*,'' + + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### Before the pre rotation' + + ! Debug + if (debug_hf) then + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + if (tmp_list_size >= 2) then + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, mo_class(tmp_list(1)) + endif + + deallocate(tmp_list) + + enddo + + ! Debug + !print*,'HF', HF_energy + +! Loc + +! Pre rotation, to give a little kick in the MOs + call apply_pre_rotation() + + ! Criterion after the pre rotation + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### After the pre rotation' + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, trim(mo_class(tmp_list(1))) + + deallocate(tmp_list) + endif + + enddo + + ! Debug + !print*,'HF', HF_energy + + print*,'' + print*,'========================' + print*,' Orbital localization' + print*,'========================' + print*,'' + + !Initialization + not_converged = .TRUE. + + ! To do the localization only if there is at least 2 MOs + if (dim_list_core_orb >= 2) then + not_core_converged = .TRUE. + else + not_core_converged = .FALSE. + endif + + if (dim_list_act_orb >= 2) then + not_act_converged = .TRUE. + else + not_act_converged = .FALSE. + endif + + if (dim_list_inact_orb >= 2) then + not_inact_converged = .TRUE. + else + not_inact_converged = .FALSE. + endif + + if (dim_list_virt_orb >= 2) then + not_virt_converged = .TRUE. + else + not_virt_converged = .FALSE. + endif + + ! Loop over the mo_classes + do l = 1, 4 + + if (l==1) then ! core + not_converged = not_core_converged + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + not_converged = not_act_converged + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + not_converged = not_inact_converged + tmp_list_size = dim_list_inact_orb + else ! virt + not_converged = not_virt_converged + tmp_list_size = dim_list_virt_orb + endif + + ! Next iteration if converged = true + if (.not. not_converged) then + cycle + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + ! Display + if (not_converged) then + print*,'' + print*,'###', trim(mo_class(tmp_list(1))), 'MOs ###' + print*,'' + endif + + ! Size for the 2D -> 1D transformation + tmp_n = tmp_list_size * (tmp_list_size - 1)/2 + + ! Without hessian + trust region + if (.not. localization_use_hessian) then + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size), tmp_x(tmp_n)) + + ! Criterion + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Init + nb_iter = 0 + delta = 1d0 + + !Loop + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Angles of rotation + call theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + tmp_m_x = - tmp_m_x * delta + + ! Rotation submatrix + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + ! To ensure that the rotation matrix is unitary + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + delta = delta * 0.5d0 + cycle + else + delta = min(delta * 2d0, 1d0) + endif + + ! Full rotation matrix and application of the rotation + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + call apply_mo_rotation(R, prev_mos) + + ! Update the needed data + call update_data_localization() + + ! New criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + print*,'Max elem :', max_elem + print*,'Delta :', delta + + nb_iter = nb_iter + 1 + + ! Exit + if (nb_iter >= localization_max_nb_iter .or. dabs(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + enddo + + ! Save the changes + call update_data_localization() + call save_mos() + TOUCH mo_coef + + ! Deallocate + deallocate(v_grad, tmp_m_x, tmp_list) + deallocate(tmp_R, tmp_x) + + ! Trust region + else + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), H(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size)) + allocate(tmp_x(tmp_n), W(tmp_n), e_val(tmp_n), key(tmp_n)) + + ! ### Initialization ### + delta = 0d0 ! can be deleted (normally) + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must be 0.5 + + ! Compute the criterion before the loop + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Loop until the convergence + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Gradient + call gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + ! Diagonal hessian + call hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + ! Diagonalization of the diagonal hessian by hands + !call diagonalization_hessian(tmp_n,H,e_val,w) + do i = 1, tmp_n + e_val(i) = H(i) + enddo + + ! Key list for dsort + do i = 1, tmp_n + key(i) = i + enddo + + ! Sort of the eigenvalues + call dsort(e_val, key, tmp_n) + + ! Eigenvectors + W = 0d0 + do i = 1, tmp_n + W(i) = dble(key(i)) + enddo + + ! To enter in the loop just after + cancel_step = .True. + nb_sub_iter = 0 + + ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + print*,'-----------------------------' + print*, mo_class(tmp_list(1)) + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'Max elem grad:', max_elem + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n,1, H, W, e_val, v_grad, prev_criterion, & + rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + ! Internal loop exit condition + if (must_exit) then + print*,'trust_region_step_w_expected_e sent: Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) + + ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + + ! Rotation of the MOs + call apply_mo_rotation(R, prev_mos) + + ! Update the things related to mo_coef + call update_data_localization() + + ! Update the criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, & + criterion_model, rho, cancel_step) + + ! Cancellation of the step, previous MOs + if (cancel_step) then + mo_coef = prev_mos + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + !call save_mos() !### depend of the time for 1 iteration + + ! To exit the external loop if must_exti = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! External loop exit conditions + if (DABS(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + if (nb_iter > localization_max_nb_iter) then + not_converged = .False. + endif + enddo + + ! Deallocation of temporary arrays + deallocate(v_grad, H, tmp_m_x, tmp_R, tmp_list, tmp_x, W, e_val, key) + + ! Save the MOs + call save_mos() + TOUCH mo_coef + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + endif + enddo + + ! Seems unecessary + TOUCH mo_coef + + ! To sort the MOs using the diagonal elements of the Fock matrix + if (sort_mos_by_e) then + call run_sort_by_fock_energies() + endif + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + ! Locality after the localization + call compute_spatial_extent(spatial_extent) + +end diff --git a/src/mo_localization/localization_sub.irp.f b/src/mo_localization/localization_sub.irp.f new file mode 100644 index 00000000..f5afed07 --- /dev/null +++ b/src/mo_localization/localization_sub.irp.f @@ -0,0 +1,2008 @@ +! Gathering +! Gradient/hessian/criterion for the localization: +! They are chosen in function of the localization method + +! Gradient: + +! qp_edit : +! | localization_method | method for the localization | + +! Input: +! | tmp_n | integer | Number of parameters in the MO subspace | +! | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +! | tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +! Output: +! | v_grad(tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + + + +subroutine gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the gradient of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + + if (localization_method == 'boys') then + call gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + !call gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + elseif (localization_method== 'pipek') then + call gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + else + print*,'Unkown method:'//localization_method + call abort + endif + +end + + + +! Hessian: + +! Output: +! | H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + + +subroutine hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + + if (localization_method == 'boys') then + call hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + !call hessian_FB(tmp_n, tmp_list_size, tmp_list, H) ! non OMP for debugging + elseif (localization_method == 'pipek') then + call hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end + + + +! Criterion: + +! Output: +! | criterion | double precision | Criterion for the orbital localization | + + +subroutine criterion_localization(tmp_list_size, tmp_list,criterion) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the localization criterion of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + + if (localization_method == 'boys') then + call criterion_FB(tmp_list_size, tmp_list, criterion) + elseif (localization_method == 'pipek') then + !call criterion_PM(tmp_list_size, tmp_list,criterion) + call criterion_PM_v3(tmp_list_size, tmp_list, criterion) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end + + + +! Subroutine to update the datas needed for the localization + +subroutine update_data_localization() + + include 'pi.h' + + implicit none + + if (localization_method == 'boys') then + ! Update the dipoles + call ao_to_mo_no_sym(ao_dipole_x, ao_num, mo_dipole_x, mo_num) + call ao_to_mo_no_sym(ao_dipole_y, ao_num, mo_dipole_y, mo_num) + call ao_to_mo_no_sym(ao_dipole_z, ao_num, mo_dipole_z, mo_num) + elseif (localization_method == 'pipek') then + ! Nothing required + else + print*,'Unkown method: '//localization_method + call abort + endif +end + + + +! Angles: + +! Output: +! | tmp_m_x(tmp_list_size, tmp_list_size) | double precision | Angles for the rotations in the subspace | +! | max_elem | double precision | Maximal angle | + + + +subroutine theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the rotation angles between the MOs for the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: tmp_m_x(tmp_list_size,tmp_list_size), max_elem + + if (localization_method == 'boys') then + call theta_FB(tmp_list, tmp_list_size, tmp_m_x, max_elem) + elseif (localization_method== 'pipek') then + call theta_PM(tmp_list, tmp_list_size, tmp_m_x, max_elem) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end + +! Gradient +! Input: +! | tmp_n | integer | Number of parameters in the MO subspace | +! | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +! | tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +! Output: +! | v_grad(tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + +! Internal: +! | m_grad(tmp_n,tmp_n) | double precision | Gradient in the matrix form | +! | i,j,k | integer | indexes in the full space | +! | tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +! | t* | double precision | to compute the time | + + +subroutine gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB:', t3 + + print*,'---End gradient_FB---' + +end subroutine + +! Gradient (OMP) + +subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + use omp_lib + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB_omp---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,m_grad,v_grad,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + !$OMP END DO + + ! 2D -> 1D + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB_omp:', t3 + + print*,'---End gradient_FB_omp---' + +end subroutine + +! Hessian + +! Output: +! | H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + +! Internal: +! Internal: +! | beta(tmp_n,tmp_n) | double precision | beta in the documentation below to compute the hesian | +! | i,j,k | integer | indexes in the full space | +! | tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +! | t* | double precision | to compute the time | + + +subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB---' + + call wall_time(t1) + + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + + ! Diagonal of the hessian + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB:', t3 + + print*,'---End hessian_FB---' + +end subroutine + +! Hessian (OMP) + +subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i,tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB_omp---' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,beta,H,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + !$OMP END DO + + ! Initialization + !$OMP DO + do i = 1, tmp_n + H(i) = 0d0 + enddo + !$OMP END DO + + ! Diagonalm of the hessian + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB_omp:', t3 + + print*,'---End hessian_FB_omp---' + +end subroutine + +! Gradient v1 + +subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size)) + + ! Initialization + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Gradient + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int) + +end subroutine grad_pipek + +! Gradient + +! The gradient is + +! \begin{align*} +! \left. \frac{\partial \mathcal{P} (\theta)}{\partial \theta} \right|_{\theta=0}= \gamma^{PM} +! \end{align*} +! with +! \begin{align*} +! \gamma_{st}^{PM} = \sum_{A=1}^N \left[ - \right] +! \end{align*} + +! \begin{align*} +! = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +! \end{align*} +! $\sum_{\rho}$ -> sum over all the AOs +! $\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +! $c^t$ -> expansion coefficient of orbital |t> + +! Input: +! | tmp_n | integer | Number of parameters in the MO subspace | +! | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +! | tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +! Output: +! | v_grad(tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + +! Internal: +! | m_grad(tmp_list_size,tmp_list_size) | double precision | Gradient in a 2D array | +! | tmp_int(tmp_list_size,tmp_list_size) | | Temporary array to store the integrals | +! | tmp_accu(tmp_list_size,tmp_list_size) | | Temporary array to store a matrix | +! | | | product and compute tmp_int | +! | CS(tmp_list_size,ao_num) | | Array to store the result of mo_coef * ao_overlap | +! | tmp_mo_coef(ao_num,tmp_list_size) | | Array to store just the useful MO coefficients | +! | | | depending of the mo_class | +! | tmp_mo_coef2(nucl_n_aos(a),tmp_list_size) | | Array to store just the useful MO coefficients | +! | | | depending of the nuclei | +! | tmp_CS(tmp_list_size,nucl_n_aos(a)) | | Array to store just the useful mo_coef * ao_overlap | +! | | | values depending of the nuclei | +! | a | | index to loop over the nuclei | +! | b | | index to loop over the AOs which belongs to the nuclei a | +! | mu | | index to refer to an AO which belongs to the nuclei a | +! | rho | | index to loop over all the AOs | + + +subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + double precision :: t1,t2,t3 + + print*,'' + print*,'---gradient_PM---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int,CS,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_PM:', t3 + + print*,'---End gradient_PM---' + +end + +! Hessian v1 + +subroutine hess_pipek(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size)) + + beta = 0d0 + + do a = 1, nucl_num + tmp_int = 0d0 + + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta,tmp_int) + +end + +! Hessian + +! The hessian is +! \begin{align*} +! \left. \frac{\partial^2 \mathcal{P} (\theta)}{\partial \theta^2}\right|_{\theta=0} = 4 \beta^{PM} +! \end{align*} +! \begin{align*} +! \beta_{st}^{PM} = \sum_{A=1}^N \left( ^2 - \frac{1}{4} \left[ - \right]^2 \right) +! \end{align*} + +! with +! \begin{align*} +! = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +! \end{align*} +! $\sum_{\rho}$ -> sum over all the AOs +! $\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +! $c^t$ -> expansion coefficient of orbital |t> + + +subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:),CS(:,:),tmp_mo_coef(:,:),tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_PM---' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size),tmp_accu(tmp_list_size,tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + beta = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta,tmp_int) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_PM:', t3 + + print*,'---End hessian_PM---' + +end + +! Criterion PM (old) + +subroutine compute_crit_pipek(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(tmp_int(mo_num, mo_num)) + + criterion = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do i = 1, mo_num + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(i,i) = tmp_int(i,i) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,i) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,i)) + + enddo + enddo + enddo + + do i = 1, mo_num + criterion = criterion + tmp_int(i,i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int) + +end + +! Criterion PM + +! The criterion is computed as +! \begin{align*} +! \mathcal{P} = \sum_{i=1}^n \sum_{A=1}^N \left[ \right]^2 +! \end{align*} +! with +! \begin{align*} +! = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +! \end{align*} + + +subroutine criterion_PM(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:),CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + print*,'' + print*,'---criterion_PM---' + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),CS(mo_num,ao_num)) + + ! Initialization + criterion = 0d0 + + call dgemm('T','N',mo_num,ao_num,ao_num,1d0,mo_coef,size(mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_i) = tmp_int(tmp_i,tmp_i) + 0.5d0 * (CS(i,mu) * mo_coef(mu,i) + mo_coef(mu,i) * CS(i,mu)) + + ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS) + + print*,'---End criterion_PM---' + +end + +! Criterion PM v3 + +subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho,nu,c + double precision :: t1,t2,t3 + + print*,'' + print*,'---criterion_PM_v3---' + + call wall_time(t1) + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + criterion = 0d0 + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + ! ao_overlap(ao_num,ao_num) + ! mo_coef(ao_num,mo_num) + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + + do j = 1, tmp_list_size + do i = 1, tmp_list_size + tmp_int(i,j) = 0d0 + enddo + enddo + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + ! Integrals + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Criterion + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS,tmp_accu,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in criterion_PM_v3:', t3 + + print*,'---End criterion_PM_v3---' + +end + +! Criterion FB (old) + +! The criterion is just computed as + +! \begin{align*} +! C = - \sum_i^{mo_{num}} (^2 + ^2 + ^2) +! \end{align*} + +! The minus sign is here in order to minimize this criterion + +! Output: +! | criterion | double precision | criterion for the Foster-Boys localization | + + +subroutine criterion_FB_old(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + double precision, intent(out) :: criterion + integer :: i + + ! Criterion (= \sum_i ^2 ) + criterion = 0d0 + do i = 1, mo_num + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine + +! Criterion FB + +subroutine criterion_FB(tmp_list_size, tmp_list, criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + integer :: i, tmp_i + + ! Criterion (= - \sum_i ^2 ) + criterion = 0d0 + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine + +subroutine theta_FB(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: i,j, tmp_i, tmp_j + double precision, allocatable :: cos4theta(:,:), sin4theta(:,:) + double precision, allocatable :: A(:,:), B(:,:), beta(:,:), gamma(:,:) + integer :: idx_i,idx_j + + allocate(cos4theta(n, n), sin4theta(n, n)) + allocate(A(n,n), B(n,n), beta(n,n), gamma(n,n)) + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + A(tmp_i,tmp_j) = mo_dipole_x(i,j)**2 - 0.25d0 * (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 & + + mo_dipole_y(i,j)**2 - 0.25d0 * (mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 & + + mo_dipole_z(i,j)**2 - 0.25d0 * (mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 + enddo + A(j,j) = 0d0 + enddo + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + B(tmp_i,tmp_j) = mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j)) - 4d0 * mo_dipole_x(i,j)**2 & + ! + (mo_dipole_y(i,i) - mo_dipole_y(j,j)) - 4d0 * mo_dipole_y(i,j)**2 & + ! + (mo_dipole_z(i,i) - mo_dipole_z(j,j)) - 4d0 * mo_dipole_z(i,j)**2 + ! enddo + !enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! gamma(tmp_i,tmp_j) = 4d0 * ( mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + ! + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + ! + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j))) + ! enddo + !enddo + + ! + !do j = 1, n + ! do i = 1, n + ! cos4theta(i,j) = - A(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + !do j = 1, n + ! do i = 1, n + ! sin4theta(i,j) = B(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(B(i,j), -A(i,j)) + !m_x(i,j) = 0.25d0 * atan2(sin4theta(i,j), cos4theta(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + !idx_i = i + !idx_j = j + endif + enddo + enddo + + ! Debug + !print*,'' + !print*,'sin/B' + !do i = 1, n + ! write(*,'(100F10.4)') sin4theta(i,:) + ! !B(i,:) + !enddo + !print*,'cos/A' + !do i = 1, n + ! write(*,'(100F10.4)') cos4theta(i,:) + ! !A(i,:) + !enddo + !print*,'X' + !!m_x = 0d0 + !!m_x(idx_i,idx_j) = max_elem + !!m_x(idx_j,idx_i) = -max_elem + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(cos4theta, sin4theta) + deallocate(A,B,beta,gamma) + +end + +subroutine theta_PM(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: a,b,i,j,tmp_i,tmp_j,rho,mu,nu,idx_i,idx_j + double precision, allocatable :: Aij(:,:), Bij(:,:), Pa(:,:) + + allocate(Aij(n,n), Bij(n,n), Pa(n,n)) + + do a = 1, nucl_num ! loop over the nuclei + Pa = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + Pa(tmp_i,tmp_j) = Pa(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! A + do j = 1, n + do i = 1, n + Aij(i,j) = Aij(i,j) + Pa(i,j)**2 - 0.25d0 * (Pa(i,i) - Pa(j,j))**2 + enddo + enddo + + ! B + do j = 1, n + do i = 1, n + Bij(i,j) = Bij(i,j) + Pa(i,j) * (Pa(i,i) - Pa(j,j)) + enddo + enddo + + enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(Bij(i,j), -Aij(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + idx_i = i + idx_j = j + endif + enddo + enddo + + ! Debug + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,'Max',idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(Aij,Bij,Pa) + +end + +! Spatial extent + +! The spatial extent of an orbital $i$ is computed as +! \begin{align*} +! \sum_{\lambda=x,y,z}\sqrt{ - ^2} +! \end{align*} + +! From that we can also compute the average and the standard deviation + + +subroutine compute_spatial_extent(spatial_extent) + + implicit none + + BEGIN_DOC + ! Compute the spatial extent of the MOs + END_DOC + + double precision, intent(out) :: spatial_extent(mo_num) + double precision :: average_core, average_act, average_inact, average_virt + double precision :: std_var_core, std_var_act, std_var_inact, std_var_virt + integer :: i,j,k,l + + spatial_extent = 0d0 + + do i = 1, mo_num + spatial_extent(i) = mo_spread_x(i,i) - mo_dipole_x(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_y(i,i) - mo_dipole_y(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_z(i,i) - mo_dipole_z(i,i)**2 + enddo + + do i = 1, mo_num + spatial_extent(i) = dsqrt(spatial_extent(i)) + enddo + + average_core = 0d0 + std_var_core = 0d0 + if (dim_list_core_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core) + call compute_std_var_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core, std_var_core) + endif + + average_act = 0d0 + std_var_act = 0d0 + if (dim_list_act_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act) + call compute_std_var_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act, std_var_act) + endif + + average_inact = 0d0 + std_var_inact = 0d0 + if (dim_list_inact_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact) + call compute_std_var_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact, std_var_inact) + endif + + average_virt = 0d0 + std_var_virt = 0d0 + if (dim_list_virt_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt) + call compute_std_var_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt, std_var_virt) + endif + + print*,'' + print*,'=============================' + print*,' Spatial extent of the MOs' + print*,'=============================' + print*,'' + + print*, 'elec_num:', elec_num + print*, 'elec_alpha_num:', elec_alpha_num + print*, 'elec_beta_num:', elec_beta_num + print*, 'core:', dim_list_core_orb + print*, 'act:', dim_list_act_orb + print*, 'inact:', dim_list_inact_orb + print*, 'virt:', dim_list_virt_orb + print*, 'mo_num:', mo_num + print*,'' + + print*,'-- Core MOs --' + print*,'Average:', average_core + print*,'Std var:', std_var_core + print*,'' + + print*,'-- Active MOs --' + print*,'Average:', average_act + print*,'Std var:', std_var_act + print*,'' + + print*,'-- Inactive MOs --' + print*,'Average:', average_inact + print*,'Std var:', std_var_inact + print*,'' + + print*,'-- Virtual MOs --' + print*,'Average:', average_virt + print*,'Std var:', std_var_virt + print*,'' + + print*,'Spatial extent:' + do i = 1, mo_num + print*, i, spatial_extent(i) + enddo + +end + +subroutine compute_average_sp_ext(spatial_extent, list, list_size, average) + + implicit none + + BEGIN_DOC + ! Compute the average spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(out) :: average + integer :: i, tmp_i + + average = 0d0 + do tmp_i = 1, list_size + i = list(tmp_i) + average = average + spatial_extent(i) + enddo + + average = average / DBLE(list_size) + +end + +subroutine compute_std_var_sp_ext(spatial_extent, list, list_size, average, std_var) + + implicit none + + BEGIN_DOC + ! Compute the standard deviation of the spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(in) :: average + double precision, intent(out) :: std_var + integer :: i, tmp_i + + std_var = 0d0 + + do tmp_i = 1, list_size + i = list(tmp_i) + std_var = std_var + (spatial_extent(i) - average)**2 + enddo + + std_var = dsqrt(1d0/DBLE(list_size) * std_var) + +end + +! Utils + + +subroutine apply_pre_rotation() + + implicit none + + BEGIN_DOC + ! Apply a rotation between the MOs + END_DOC + + double precision, allocatable :: pre_rot(:,:), prev_mos(:,:), R(:,:) + double precision :: t1,t2,t3 + integer :: i,j,tmp_i,tmp_j + integer :: info + logical :: enforce_step_cancellation + + print*,'---apply_pre_rotation---' + call wall_time(t1) + + allocate(pre_rot(mo_num,mo_num), prev_mos(ao_num,mo_num), R(mo_num,mo_num)) + + ! Initialization of the matrix + pre_rot = 0d0 + + if (kick_in_mos) then + ! Pre rotation for core MOs + if (dim_list_core_orb >= 2) then + do tmp_j = 1, dim_list_core_orb + j = list_core(tmp_j) + do tmp_i = 1, dim_list_core_orb + i = list_core(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for active MOs + if (dim_list_act_orb >= 2) then + do tmp_j = 1, dim_list_act_orb + j = list_act(tmp_j) + do tmp_i = 1, dim_list_act_orb + i = list_act(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for inactive MOs + if (dim_list_inact_orb >= 2) then + do tmp_j = 1, dim_list_inact_orb + j = list_inact(tmp_j) + do tmp_i = 1, dim_list_inact_orb + i = list_inact(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for virtual MOs + if (dim_list_virt_orb >= 2) then + do tmp_j = 1, dim_list_virt_orb + j = list_virt(tmp_j) + do tmp_i = 1, dim_list_virt_orb + i = list_virt(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Nothing for deleted ones + + ! Compute pre rotation matrix from pre_rot + call rotation_matrix(pre_rot,mo_num,R,mo_num,mo_num,info,enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Cancellation of the pre rotation, too big error in the rotation matrix' + print*, 'Reduce the angle for the pre rotation, abort' + call abort + endif + + ! New Mos (we don't car eabout the previous MOs prev_mos) + call apply_mo_rotation(R,prev_mos) + + ! Update the things related to mo_coef + TOUCH mo_coef + call save_mos + endif + + deallocate(pre_rot, prev_mos, R) + + call wall_time(t2) + t3 = t2-t1 + print*,'Time in apply_pre_rotation:', t3 + print*,'---End apply_pre_rotation---' + +end + +subroutine x_tmp_orb_loc_v2(tmp_n, tmp_list_size, tmp_list, v_grad, H,tmp_x, tmp_m_x) + + implicit none + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(in) :: v_grad(tmp_n) + double precision, intent(in) :: H(tmp_n, tmp_n) + double precision, intent(out) :: tmp_m_x(tmp_list_size, tmp_list_size), tmp_x(tmp_list_size) + !double precision, allocatable :: x(:) + double precision :: lambda , accu, max_elem + integer :: i,j,tmp_i,tmp_j,tmp_k + + ! Allocation + !allocate(x(tmp_n)) + + ! Level shifted hessian + lambda = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < lambda) then + lambda = H(tmp_k,tmp_k) + endif + enddo + + ! min element in the hessian + if (lambda < 0d0) then + lambda = -lambda + 1d-6 + endif + + print*, 'lambda', lambda + + ! Good + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) > 1d-6) then + tmp_x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * v_grad(tmp_k)!(-v_grad(tmp_k)) + !x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * (-v_grad(tmp_k)) + endif + enddo + + ! 1D tmp -> 2D tmp + tmp_m_x = 0d0 + do tmp_j = 1, tmp_list_size - 1 + do tmp_i = tmp_j + 1, tmp_list_size + call mat_to_vec_index(tmp_i,tmp_j,tmp_k) + tmp_m_x(tmp_i, tmp_j) = tmp_x(tmp_k)!x(tmp_k) + enddo + enddo + + ! Antisym + do tmp_i = 1, tmp_list_size - 1 + do tmp_j = tmp_i + 1, tmp_list_size + tmp_m_x(tmp_i,tmp_j) = - tmp_m_x(tmp_j,tmp_i) + enddo + enddo + + ! Deallocation + !deallocate(x) + +end subroutine + +subroutine ao_to_mo_no_sym(A_ao,LDA_ao,A_mo,LDA_mo) + implicit none + BEGIN_DOC + ! Transform A from the |AO| basis to the |MO| basis + ! + ! $C^\dagger.A_{ao}.C$ + END_DOC + integer, intent(in) :: LDA_ao,LDA_mo + double precision, intent(in) :: A_ao(LDA_ao,ao_num) + double precision, intent(out) :: A_mo(LDA_mo,mo_num) + double precision, allocatable :: T(:,:) + + allocate ( T(ao_num,mo_num) ) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T + + call dgemm('N','N', ao_num, mo_num, ao_num, & + 1.d0, A_ao,LDA_ao, & + mo_coef, size(mo_coef,1), & + 0.d0, T, size(T,1)) + + call dgemm('T','N', mo_num, mo_num, ao_num, & + 1.d0, mo_coef,size(mo_coef,1), & + T, ao_num, & + 0.d0, A_mo, size(A_mo,1)) + + deallocate(T) +end + +subroutine run_sort_by_fock_energies() + + implicit none + + BEGIN_DOC + ! Saves the current MOs ordered by diagonal element of the Fock operator. + END_DOC + + integer :: i,j,k,l,tmp_i,tmp_k,tmp_list_size + integer, allocatable :: iorder(:), tmp_list(:) + double precision, allocatable :: fock_energies_tmp(:), tmp_mo_coef(:,:) + + ! Test + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + print*,'MO class: ',trim(mo_class(tmp_list(1))) + + allocate(iorder(tmp_list_size), fock_energies_tmp(tmp_list_size), tmp_mo_coef(ao_num,tmp_list_size)) + !print*,'MOs before sorting them by f_p^p energies:' + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + fock_energies_tmp(i) = Fock_matrix_diag_mo(tmp_i) + iorder(i) = i + !print*, tmp_i, fock_energies_tmp(i) + enddo + + call dsort(fock_energies_tmp, iorder, tmp_list_size) + + print*,'MOs after sorting them by f_p^p energies:' + do i = 1, tmp_list_size + k = iorder(i) + tmp_k = tmp_list(k) + print*, tmp_k, fock_energies_tmp(k) + do j = 1, ao_num + tmp_mo_coef(j,k) = mo_coef(j,tmp_k) + enddo + enddo + + ! Update the MOs after sorting them by energies + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + do j = 1, ao_num + mo_coef(j,tmp_i) = tmp_mo_coef(j,i) + enddo + enddo + + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + print*,'' + + deallocate(iorder, fock_energies_tmp, tmp_list, tmp_mo_coef) + endif + + enddo + + touch mo_coef + call save_mos + +end + +function is_core(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a core orbital + END_DOC + + integer, intent(in) :: i + logical :: is_core + + integer :: j + + ! Init + is_core = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_core = .True. + exit + endif + enddo + +end + +function is_del(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a deleted orbital + END_DOC + + integer, intent(in) :: i + logical :: is_del + + integer :: j + + ! Init + is_del = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_del = .True. + exit + endif + enddo + +end + +subroutine set_classes_loc() + + implicit none + + integer :: i + logical :: ok1, ok2 + logical :: is_core, is_del + integer(bit_kind) :: res(N_int,2) + + if (auto_mo_class) then + do i = 1, mo_num + if (is_core(i)) cycle + if (is_del(i)) cycle + call apply_hole(psi_det(1,1,1), 1, i, res, ok1, N_int) + call apply_hole(psi_det(1,1,1), 2, i, res, ok2, N_int) + if (ok1 .and. ok2) then + mo_class(i) = 'Inactive' + else if (.not. ok1 .and. .not. ok2) then + mo_class(i) = 'Virtual' + else + mo_class(i) = 'Active' + endif + enddo + touch mo_class + endif + +end + +subroutine unset_classes_loc() + + implicit none + + integer :: i + logical :: ok1, ok2 + logical :: is_core, is_del + integer(bit_kind) :: res(N_int,2) + + if (auto_mo_class) then + do i = 1, mo_num + if (is_core(i)) cycle + if (is_del(i)) cycle + mo_class(i) = 'Active' + enddo + touch mo_class + endif + +end diff --git a/src/mo_localization/org/TANGLE_org_mode.sh b/src/mo_localization/org/TANGLE_org_mode.sh new file mode 100755 index 00000000..059cbe7d --- /dev/null +++ b/src/mo_localization/org/TANGLE_org_mode.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +list='ls *.org' +for element in $list +do + emacs --batch $element -f org-babel-tangle +done diff --git a/src/mo_localization/org/break_spatial_sym.org b/src/mo_localization/org/break_spatial_sym.org new file mode 100644 index 00000000..d82f1c60 --- /dev/null +++ b/src/mo_localization/org/break_spatial_sym.org @@ -0,0 +1,28 @@ +! A small program to break the spatial symmetry of the MOs. + +! You have to defined your MO classes or set security_mo_class to false +! with: +! qp set orbital_optimization security_mo_class false + +! The default angle for the rotations is too big for this kind of +! application, a value between 1e-3 and 1e-6 should break the spatial +! symmetry with just a small change in the energy. + +#+BEGIN_SRC f90 :comments org :tangle break_spatial_sym.irp.f +program break_spatial_sym + + !BEGIN_DOC + ! Break the symmetry of the MOs with a rotation + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + call set_classes_loc + call apply_pre_rotation + call unset_classes_loc + +end +#+END_SRC diff --git a/src/mo_localization/org/debug_gradient_loc.org b/src/mo_localization/org/debug_gradient_loc.org new file mode 100644 index 00000000..6d147dd0 --- /dev/null +++ b/src/mo_localization/org/debug_gradient_loc.org @@ -0,0 +1,67 @@ +#+BEGIN_SRC f90 :comments org :tangle debug_gradient_loc.irp.f +program debug_gradient_loc + + !BEGIN_DOC + ! Check if the gradient is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: v_grad(:), v_grad2(:) + double precision :: norm, max_elem, threshold, max_error + integer :: i, nb_error + + threshold = 1d-12 + + list_size = dim_list_act_orb + + allocate(list(list_size)) + + list = list_act + + n = list_size*(list_size-1)/2 + + allocate(v_grad(n),v_grad2(n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call gradient_FB(n,list_size,list,v_grad,max_elem,norm) + call gradient_FB_omp(n,list_size,list,v_grad2,max_elem,norm) + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey' + call gradient_PM(n,list_size,list,v_grad,max_elem,norm) + call gradient_PM(n,list_size,list,v_grad2,max_elem,norm) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,v_grad(i) + enddo + + v_grad = v_grad - v_grad2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(v_grad(i)) > threshold) then + print*,v_grad(i) + nb_error = nb_error + 1 + if (dabs(v_grad(i)) > max_elem) then + max_elem = v_grad(i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(v_grad,v_grad2) + +end +#+END_SRC diff --git a/src/mo_localization/org/debug_hessian_loc.org b/src/mo_localization/org/debug_hessian_loc.org new file mode 100644 index 00000000..e47cf38d --- /dev/null +++ b/src/mo_localization/org/debug_hessian_loc.org @@ -0,0 +1,67 @@ +#+BEGIN_SRC f90 :comments org :tangle debug_hessian_loc.irp.f +program debug_hessian_loc + + !BEGIN_DOC + ! Check if the hessian is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: H(:), H2(:) + double precision :: threshold, max_error, max_elem + integer :: i, nb_error + + threshold = 1d-12 + + list_size = dim_list_act_orb + + allocate(list(list_size)) + + list = list_act + + n = list_size*(list_size-1)/2 + + allocate(H(n),H2(n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call hessian_FB(n,list_size,list,H) + call hessian_FB_omp(n,list_size,list,H2) + elseif(localization_method == 'pipek') then + print*,'Pipek-Mezey' + call hessian_PM(n,list_size,list,H) + call hessian_PM(n,list_size,list,H2) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,H(i) + enddo + + H = H - H2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(H(i)) > threshold) then + print*,H(i) + nb_error = nb_error + 1 + if (dabs(H(i)) > max_elem) then + max_elem = H(i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(H,H2) + +end +#+END_SRC diff --git a/src/mo_localization/org/kick_the_mos.org b/src/mo_localization/org/kick_the_mos.org new file mode 100644 index 00000000..c0c6c02d --- /dev/null +++ b/src/mo_localization/org/kick_the_mos.org @@ -0,0 +1,18 @@ +#+BEGIN_SRC f90 :comments org :tangle kick_the_mos.irp.f +program kick_the_mos + + !BEGIN_DOC + ! To do a small rotation of the MOs + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + call set_classes_loc + call apply_pre_rotation + call unset_classes_loc + +end +#+END_SRC diff --git a/src/mo_localization/org/localization.org b/src/mo_localization/org/localization.org new file mode 100644 index 00000000..aaf9f18d --- /dev/null +++ b/src/mo_localization/org/localization.org @@ -0,0 +1,2899 @@ +* Orbital localization + +Molecular orbitals localization + +** Doc + +The program localizes the orbitals in function of their mo_class: +- core MOs +- inactive MOs +- active MOs +- virtual MOs +- deleted MOs -> no orbital localization + +Core MOs are localized with core MOs, inactives MOs are localized with +inactives MOs and so on. But deleted orbitals are not localized. + +WARNING: +- The user MUST SPECIFY THE MO CLASSES, otherwise if default mo class + is false the localization will be done for all the orbitals between + them, so the occupied and virtual MOs will be combined together + which is clearly not what we want to do. If default lpmo class is true + the localization will be done for the core, occupied and virtual + orbitals, but pay attention the mo_class are not deleted after... +- The mo class is not important (except "deleted") because it is not + link to the kind of MOs for CASSCF or CIPSI. It is just a way to + separate the MOs in order to localize them separetely, for example + to separate the core MOs, the occupied MOs and the virtuals MOs. +- The user MUST CHANGE THE MO CLASSES AFTER THE LOCALIZATION in order + to have the right mo class for his next calculation... + +For more information on the mo_class: +lpqp set_mo_class -h + +*** Foster-Boys localization +Foster-Boys localization: +- cite Foster +Boys, S. F., 1960, Rev. Mod. Phys. 32, 296. +DOI:https://doi.org/10.1103/RevModPhys.32.300 +Boys, S. F., 1966, in Quantum Theory of Atoms, Molecules, +and the Solid State, edited by P.-O. Löwdin (Academic +Press, New York), p. 253. +Daniel A. Kleier, Thomas A. Halgren, John H. Hall Jr., and William +N. Lipscomb, J. Chem. Phys. 61, 3905 (1974) +doi: 10.1063/1.1681683 +Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Comput. Chem. 2013, 34, +1456– 1462. DOI: 10.1002/jcc.23281 +Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Chem. Theory +Comput. 2012, 8, 9, 3137–3146 +DOI: https://doi.org/10.1021/ct300473g +Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Chem. Phys. 137, 224114 +(2012) +DOI: https://doi.org/10.1063/1.4769866 +Nicola Marzari, Arash A. Mostofi, Jonathan R. Yates, Ivo Souza, and David Vanderbilt +Rev. Mod. Phys. 84, 1419 +https://doi.org/10.1103/RevModPhys.84.1419 + +The Foster-Boys localization is a method to generate localized MOs +(LMOs) by minimizing the Foster-Boys criterion: +$$ C_{FB} = \sum_{i=1}^N \left[ < \phi_i | r^2 | \phi_i > - < \phi_i | r | +\phi_i >^2 \right] $$. +In fact it is equivalent to maximise +$$ C_2 = \sum_{i>j, \ i=1}^N \left[ < \phi_i | r | \phi_i > - < +\phi_j | r | \phi_j > \left]^2$$ +or +$$ C_3 = \sum_{i=1}^N \left[ < \phi_i | r | \phi_i > \right]^2.$$ + +Noting +$$A_{ii} = < \phi_i | r^2 | \phi_i > $$ +$$B_{ii} = < \phi_i | r | \phi_i > $$ + +$$ \beta = (B_{pp} - B_{qq})^2 - 4 B_{pq}^2 $$ +$$ \gamma = 4 B_{pq} (B_{pp} - B_{qq}) $$ + +\begin{align*} +C_{FB}(\theta) &= \sum_{i=1}^N \left[ A_{ii} - B_{ii}^2 \right] \\ +&- \left[ A_{pp} - B_{pp}^2 + A_{qq} - B_{qq}^2 \right] \\ +&+ \left[ A_{pp} + A_{qq} - B_{pp}^2 - B_{qq}^2 ++ \frac{1}{4} [(1-\cos(4\theta) \beta + \sin(4\theta) \gamma] \right] \\ +&= C_1(\theta=0) + \frac{1}{4} [(1-\cos(4\theta)) \beta + \sin(4\theta) \gamma] +\end{align*} + +The derivatives are: +\begin{align*} +\frac{\partial C_{FB}(\theta)}{\partial \theta} = \beta \sin(4\theta) + \gamma \cos(4 \theta) +\end{align*} + +\begin{align*} +\frac{\partial^2 C_{FB}(\theta)}{\partial \theta^2} = 4 \beta \cos(4\theta) - 4 \gamma \sin(4 \theta) +\end{align*} + +Similarly: +\begin{align*} +C_3(\theta) &= \sum_{i=1}^N [B_{ii}^2] \\ +&- B_{pp}^2 - B_{qq}^2 \\ +&+ B_{pp}^2 + B_{qq}^2 - \frac{1}{4} [(1-\cos(4\theta) \beta + \sin(4\theta) \gamma] \\ +&= C_3(\theta=0) - \frac{1}{4} [(1-\cos(4\theta)) \beta + \sin(4\theta) \gamma] +\end{align*} + +The derivatives are: +\begin{align*} +\frac{\partial C_3(\theta)}{\partial \theta} = - \beta \sin(4\theta) - \gamma \cos(4 \theta) +\end{align*} + +\begin{align*} +\frac{\partial^2 C_3(\theta)}{\partial \theta^2} = - 4 \beta \cos(4\theta) + 4 \gamma \sin(4 \theta) +\end{align*} + +And since we compute the derivatives around $\theta = 0$ (around the +actual position) we have: +\begin{align*} +\left. \frac{\partial{C_{FB}(\theta)}}{\partial \theta}\right|_{\theta=0} = \gamma +\end{align*} + +\begin{align*} +\left. \frac{\partial^2 C_{FB}(\theta)}{\partial \theta^2}\right|_{\theta=0} = 4 \beta +\end{align*} + +Locality of the orbitals: +- cite Hoyvik +As the Foster-Boys method tries to minimize the sum of the second +moment MO spread, the locality of each MO can be expressed as the +second moment of the MO spread. For the MO i, the locality criterion is +\begin{align*} +\sigma_i &= \sqrt{ - ^2} \\ +&= \sqrt{ - ^2 + - ^2 + - ^2} +\end{align*} + + +*** Pipek-Mezey localization +-cite pipek mezey 1989 +J. Pipek, P. G. Mezey, J. Chem. Phys. 90, 4916 (1989) +DOI: 10.1063/1.456588 + +Foster-Boys localization does not preserve the $\sigma - \pi$ separation of the +MOs, it leads to "banana" orbitals. The Pipek-Mezey localization +normally preserves this separation. + +The optimum functional $\mathcal{P}$ is obtained for the maximum of +$D^{-1}$ +\begin{align*} +\mathcal{P} = \sum_{i=1}^n \sum_{A=1}^N \left[ \right]^2 +\end{align*} + +As for the Foster Boys localization, the change in the functional for +the rotation of two MOs can be obtained using very similar terms +\begin{align*} +\beta_{st}^{PM} = \sum_{A=1}^N \left( ^2 - \frac{1}{4} \left[ - \right]^2 \right) +\end{align*} +\begin{align*} +\gamma_{st}^{PM} = \sum_{A=1}^N \left[ - \right] +\end{align*} +The matrix element of the operator $P_A$ are obtained using +\begin{align*} +<\rho | \tilde{\mu}> = \delta_{\rho \mu} +\end{align*} +which leads to +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} +$\sum_{\rho}$ -> sum over all the AOs +$\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +$c^t$ -> expansion coefficient of orbital |t> + +So similarly the first and second derivatives are + +\begin{align*} +\left. \frac{\partial \mathcal{P} (\theta)}{\partial \theta} \right|_{\theta=0}= \gamma^{PM} +\end{align*} + +\begin{align*} +\left. \frac{\partial^2 \mathcal{P} (\theta)}{\partial \theta^2}\right|_{\theta=0} = 4 \beta^{PM} +\end{align*} + +** Localization procedure + +Localization procedure: + +To do the localization we compute the gradient and the +diagonal hessian of the Foster-Boys criterion with respect to the MO +rotations and we minimize it with the Newton method. + +In order to avoid the problem of starting on a saddle point, the +localization procedure starts by giving a little kick in the MOs, by +putting "kick in mos" true, in order to break the symmetry and escape +from a possible saddle point. + +In order to speed up the iteration we compute the gradient, the +diagonal hessian and the step in temporary matrices of the size +(number MOs in mo class by number MOs in mo class) + +** Remarks + +Variables: + +The indexes i and j refere to the positions of the elements in +the "full space", i.e., the arrays containing elements for all the MOs, +but the indexes tmp_i and tmp_j to the positions of the elements in +the "reduced space/subspace", i.e., the arrays containing elements for +a restricted number of MOs. +Example: +The gradient for the localization of the core MOs can be expressed +as a vector of length mo_num*(mo_num-1)/2 with only +n_core_orb*(n_core_orb-1)/2 non zero elements, so it is more relevant +to use a vector of size n_act_orb*(n_core_orb-1)/2. +So here the gradient is a vector of size +tmp_list_size*(tmp_list_size)/2 where tmp_list_size is the number of +MOs is the corresponding mo class. +The same thing happened for the hessian, the matrix containing the +step and the rotation matrix, which are tmp_list_size by tmp_list_size +matrices. + +Ex gradient for 4 core orbitales: +\begin{align*} +\begin{pmatrix} +0 & -a & -b & -d & \hdots & 0 \\ +a & 0 & -c & -e & \hdots & 0 \\ +b & c & 0 & -f & \hdots & 0 \\ +d & e & f & 0 & \hdots & 0 \\ +\vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\ +0 & 0 & 0 & 0 & \hdots & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +a \\ +b \\ +c \\ +e \\ +f \\ +0 \\ +\vdots \\ +0 \\ +\end{pmatrix} +\end{align*} + +\begin{align*} +\begin{pmatrix} +0 & -a & -b & -d & \hdots & 0 \\ +a & 0 & -c & -e & \hdots & 0 \\ +b & c & 0 & -f & \hdots & 0 \\ +d & e & f & 0 & \hdots & 0 \\ +\vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\ +0 & 0 & 0 & 0 & \hdots & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +0 & -a & -b & -d \\ +a & 0 & -c & -e \\ +b & c & 0 & -f \\ +d & e & f & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +a \\ +b \\ +c \\ +e \\ +f \\ +\end{pmatrix} +\end{align*} + +The same thing can be done if indexes of the orbitales are not +consecutives since it's done with lists of MOs: + +\begin{align*} +\begin{pmatrix} +0 & -a & 0 & -b & -d & \hdots & 0 \\ +a & 0 & 0 & -c & -e & \hdots & 0 \\ +0 & 0 & 0 & 0 & 0 & \hdots & 0 \\ +b & c & 0 & 0 & -f & \hdots & 0 \\ +d & e & 0 & f & 0 & \hdots & 0 \\ +\vdots & \vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\ +0 & 0 & 0 & 0 & 0 & \hdots & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +0 & -a & -b & -d \\ +a & 0 & -c & -e \\ +b & c & 0 & -f \\ +d & e & f & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +a \\ +b \\ +c \\ +e \\ +f \\ +\end{pmatrix} +\end{align*} + +The dipoles are updated using the "ao to mo" subroutine without the +"restore symmetry" which is actually in N^4 but can be rewrite in N^2 +log(N^2). +The bottleneck of the program is normally N^3 with the matrix +multiplications/diagonalizations. The use of the full hessian can be +an improvement but it will scale in N^4... + +** Program + +#+BEGIN_SRC f90 org :tangle localization.irp.f +program localization + + implicit none + + call set_classes_loc + call run_localization + call unset_classes_loc + +end +#+END_SRC + + +Variables: +| pre_rot(mo_num, mo_num) | double precision | Matrix for the pre rotation | +| R(mo_num,mo_num) | double precision | Rotation matrix | +| tmp_R(:,:) | double precision | Rottation matrix in a subsapce | +| prev_mos(ao_num, mo_num) | double precision | Previous mo_coef | +| spatial_extent(mo_num) | double precision | Spatial extent of the orbitals | +| criterion | double precision | Localization criterion | +| prev_criterion | double precision | Previous criterion | +| criterion_model | double precision | Estimated next criterion | +| rho | double precision | Ratio to measure the agreement between the model | +| | | and the reality | +| delta | double precision | Radisu of the trust region | +| norm_grad | double precision | Norm of the gradient | +| info | integer | for dsyev from Lapack | +| max_elem | double precision | maximal element in the gradient | +| v_grad(:) | double precision | Gradient | +| H(:,:) | double precision | Hessian (diagonal) | +| e_val(:) | double precision | Eigenvalues of the hessian | +| W(:,:) | double precision | Eigenvectors of the hessian | +| tmp_x(:) | double precision | Step in 1D (in a subaspace) | +| tmp_m_x(:,:) | double precision | Step in 2D (in a subaspace) | +| tmp_list(:) | double precision | List of MOs in a mo_class | +| i,j,k | integer | Indexes in the full MO space | +| tmp_i, tmp_j, tmp_k | integer | Indexes in a subspace | +| l | integer | Index for the mo_class | +| key(:) | integer | Key to sort the eigenvalues of the hessian | +| nb_iter | integer | Number of iterations | +| must_exit | logical | To exit the trust region loop | +| cancel_step | logical | To cancel a step | +| not_*converged | logical | To localize the different mo classes | +| t* | double precision | To measure the time | +| n | integer | mo_num*(mo_num-1)/2, number of orbital parameters | +| tmp_n | integer | dim_subspace*(dim_subspace-1)/2 | +| | | Number of dimension in the subspace | + +Variables in qp_edit for the localization: +| localization_method | +| localization_max_nb_iter | +| default_mo_class | +| thresh_loc_max_elem_grad | +| kick_in_mos | +| angle_pre_rot | + ++ all the variables for the trust region + +Cf. qp_edit orbital optimization + +#+BEGIN_SRC f90 :comments org :tangle localization.irp.f +subroutine run_localization + + include 'pi.h' + + BEGIN_DOC + ! Orbital localization + END_DOC + + implicit none + + ! Variables + double precision, allocatable :: pre_rot(:,:), R(:,:) + double precision, allocatable :: prev_mos(:,:), spatial_extent(:), tmp_R(:,:) + double precision :: criterion, norm_grad + integer :: i,j,k,l,p, tmp_i, tmp_j, tmp_k + integer :: info + integer :: n, tmp_n, tmp_list_size + double precision, allocatable :: v_grad(:), H(:), tmp_m_x(:,:), tmp_x(:),W(:),e_val(:) + double precision :: max_elem, t1, t2, t3, t4, t5, t6 + integer, allocatable :: tmp_list(:), key(:) + double precision :: prev_criterion, rho, delta, criterion_model + integer :: nb_iter, nb_sub_iter + logical :: not_converged, not_core_converged + logical :: not_act_converged, not_inact_converged, not_virt_converged + logical :: use_trust_region, must_exit, cancel_step,enforce_step_cancellation + + n = mo_num*(mo_num-1)/2 + + ! Allocation + allocate(spatial_extent(mo_num)) + allocate(pre_rot(mo_num, mo_num), R(mo_num, mo_num)) + allocate(prev_mos(ao_num, mo_num)) + + ! Locality before the localization + call compute_spatial_extent(spatial_extent) + + ! Choice of the method + print*,'' + print*,'Localization method:',localization_method + if (localization_method == 'boys') then + print*,'Foster-Boys localization' + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey localization' + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + print*,'' + + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### Before the pre rotation' + + ! Debug + if (debug_hf) then + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + if (tmp_list_size >= 2) then + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, mo_class(tmp_list(1)) + endif + + deallocate(tmp_list) + + enddo + + ! Debug + !print*,'HF', HF_energy + +#+END_SRC + +** Loc +#+BEGIN_SRC f90 :comments org :tangle localization.irp.f + ! Pre rotation, to give a little kick in the MOs + call apply_pre_rotation() + + ! Criterion after the pre rotation + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### After the pre rotation' + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, trim(mo_class(tmp_list(1))) + + deallocate(tmp_list) + endif + + enddo + + ! Debug + !print*,'HF', HF_energy + + print*,'' + print*,'========================' + print*,' Orbital localization' + print*,'========================' + print*,'' + + !Initialization + not_converged = .TRUE. + + ! To do the localization only if there is at least 2 MOs + if (dim_list_core_orb >= 2) then + not_core_converged = .TRUE. + else + not_core_converged = .FALSE. + endif + + if (dim_list_act_orb >= 2) then + not_act_converged = .TRUE. + else + not_act_converged = .FALSE. + endif + + if (dim_list_inact_orb >= 2) then + not_inact_converged = .TRUE. + else + not_inact_converged = .FALSE. + endif + + if (dim_list_virt_orb >= 2) then + not_virt_converged = .TRUE. + else + not_virt_converged = .FALSE. + endif + + ! Loop over the mo_classes + do l = 1, 4 + + if (l==1) then ! core + not_converged = not_core_converged + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + not_converged = not_act_converged + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + not_converged = not_inact_converged + tmp_list_size = dim_list_inact_orb + else ! virt + not_converged = not_virt_converged + tmp_list_size = dim_list_virt_orb + endif + + ! Next iteration if converged = true + if (.not. not_converged) then + cycle + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + ! Display + if (not_converged) then + print*,'' + print*,'###', trim(mo_class(tmp_list(1))), 'MOs ###' + print*,'' + endif + + ! Size for the 2D -> 1D transformation + tmp_n = tmp_list_size * (tmp_list_size - 1)/2 + + ! Without hessian + trust region + if (.not. localization_use_hessian) then + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size), tmp_x(tmp_n)) + + ! Criterion + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Init + nb_iter = 0 + delta = 1d0 + + !Loop + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Angles of rotation + call theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + tmp_m_x = - tmp_m_x * delta + + ! Rotation submatrix + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + ! To ensure that the rotation matrix is unitary + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + delta = delta * 0.5d0 + cycle + else + delta = min(delta * 2d0, 1d0) + endif + + ! Full rotation matrix and application of the rotation + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + call apply_mo_rotation(R, prev_mos) + + ! Update the needed data + call update_data_localization() + + ! New criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + print*,'Max elem :', max_elem + print*,'Delta :', delta + + nb_iter = nb_iter + 1 + + ! Exit + if (nb_iter >= localization_max_nb_iter .or. dabs(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + enddo + + ! Save the changes + call update_data_localization() + call save_mos() + TOUCH mo_coef + + ! Deallocate + deallocate(v_grad, tmp_m_x, tmp_list) + deallocate(tmp_R, tmp_x) + + ! Trust region + else + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), H(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size)) + allocate(tmp_x(tmp_n), W(tmp_n), e_val(tmp_n), key(tmp_n)) + + ! ### Initialization ### + delta = 0d0 ! can be deleted (normally) + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must be 0.5 + + ! Compute the criterion before the loop + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Loop until the convergence + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Gradient + call gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + ! Diagonal hessian + call hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + ! Diagonalization of the diagonal hessian by hands + !call diagonalization_hessian(tmp_n,H,e_val,w) + do i = 1, tmp_n + e_val(i) = H(i) + enddo + + ! Key list for dsort + do i = 1, tmp_n + key(i) = i + enddo + + ! Sort of the eigenvalues + call dsort(e_val, key, tmp_n) + + ! Eigenvectors + W = 0d0 + do i = 1, tmp_n + W(i) = dble(key(i)) + enddo + + ! To enter in the loop just after + cancel_step = .True. + nb_sub_iter = 0 + + ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + print*,'-----------------------------' + print*, mo_class(tmp_list(1)) + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'Max elem grad:', max_elem + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n,1, H, W, e_val, v_grad, prev_criterion, & + rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + ! Internal loop exit condition + if (must_exit) then + print*,'trust_region_step_w_expected_e sent: Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) + + ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + + ! Rotation of the MOs + call apply_mo_rotation(R, prev_mos) + + ! Update the things related to mo_coef + call update_data_localization() + + ! Update the criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, & + criterion_model, rho, cancel_step) + + ! Cancellation of the step, previous MOs + if (cancel_step) then + mo_coef = prev_mos + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + !call save_mos() !### depend of the time for 1 iteration + + ! To exit the external loop if must_exti = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! External loop exit conditions + if (DABS(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + if (nb_iter > localization_max_nb_iter) then + not_converged = .False. + endif + enddo + + ! Deallocation of temporary arrays + deallocate(v_grad, H, tmp_m_x, tmp_R, tmp_list, tmp_x, W, e_val, key) + + ! Save the MOs + call save_mos() + TOUCH mo_coef + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + endif + enddo + + ! Seems unecessary + TOUCH mo_coef + + ! To sort the MOs using the diagonal elements of the Fock matrix + if (sort_mos_by_e) then + call run_sort_by_fock_energies() + endif + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + ! Locality after the localization + call compute_spatial_extent(spatial_extent) + +end +#+END_SRC + +** Gathering +Gradient/hessian/criterion for the localization: +They are chosen in function of the localization method + +Gradient: + +qp_edit : +| localization_method | method for the localization | + +Input: +| tmp_n | integer | Number of parameters in the MO subspace | +| tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +| tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +Output: +| v_grad(tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the gradient of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + + if (localization_method == 'boys') then + call gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + !call gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + elseif (localization_method== 'pipek') then + call gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + else + print*,'Unkown method:'//localization_method + call abort + endif + +end +#+END_SRC + +Hessian: + +Output: +| H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + + if (localization_method == 'boys') then + call hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + !call hessian_FB(tmp_n, tmp_list_size, tmp_list, H) ! non OMP for debugging + elseif (localization_method == 'pipek') then + call hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end +#+END_SRC + +Criterion: + +Output: +| criterion | double precision | Criterion for the orbital localization | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_localization(tmp_list_size, tmp_list,criterion) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the localization criterion of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + + if (localization_method == 'boys') then + call criterion_FB(tmp_list_size, tmp_list, criterion) + elseif (localization_method == 'pipek') then + !call criterion_PM(tmp_list_size, tmp_list,criterion) + call criterion_PM_v3(tmp_list_size, tmp_list, criterion) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end +#+END_SRC + +Subroutine to update the datas needed for the localization +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine update_data_localization() + + include 'pi.h' + + implicit none + + if (localization_method == 'boys') then + ! Update the dipoles + call ao_to_mo_no_sym(ao_dipole_x, ao_num, mo_dipole_x, mo_num) + call ao_to_mo_no_sym(ao_dipole_y, ao_num, mo_dipole_y, mo_num) + call ao_to_mo_no_sym(ao_dipole_z, ao_num, mo_dipole_z, mo_num) + elseif (localization_method == 'pipek') then + ! Nothing required + else + print*,'Unkown method: '//localization_method + call abort + endif +end +#+END_SRC + +Angles: + +Output: +| tmp_m_x(tmp_list_size, tmp_list_size) | double precision | Angles for the rotations in the subspace | +| max_elem | double precision | Maximal angle | + + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the rotation angles between the MOs for the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: tmp_m_x(tmp_list_size,tmp_list_size), max_elem + + if (localization_method == 'boys') then + call theta_FB(tmp_list, tmp_list_size, tmp_m_x, max_elem) + elseif (localization_method== 'pipek') then + call theta_PM(tmp_list, tmp_list_size, tmp_m_x, max_elem) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end +#+END_SRC + +** Foster-Boys +*** Gradient +Input: +| tmp_n | integer | Number of parameters in the MO subspace | +| tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +| tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +Output: +| v_grad(tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +Internal: +| m_grad(tmp_n,tmp_n) | double precision | Gradient in the matrix form | +| i,j,k | integer | indexes in the full space | +| tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +| t* | double precision | to compute the time | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB:', t3 + + print*,'---End gradient_FB---' + +end subroutine +#+END_SRC + +*** Gradient (OMP) +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + use omp_lib + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB_omp---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,m_grad,v_grad,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + !$OMP END DO + + ! 2D -> 1D + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB_omp:', t3 + + print*,'---End gradient_FB_omp---' + +end subroutine +#+END_SRC + +*** Hessian + +Output: +| H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +Internal: +Internal: +| beta(tmp_n,tmp_n) | double precision | beta in the documentation below to compute the hesian | +| i,j,k | integer | indexes in the full space | +| tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +| t* | double precision | to compute the time | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB---' + + call wall_time(t1) + + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + + ! Diagonal of the hessian + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB:', t3 + + print*,'---End hessian_FB---' + +end subroutine +#+END_SRC + +*** Hessian (OMP) +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i,tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB_omp---' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,beta,H,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + !$OMP END DO + + ! Initialization + !$OMP DO + do i = 1, tmp_n + H(i) = 0d0 + enddo + !$OMP END DO + + ! Diagonalm of the hessian + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB_omp:', t3 + + print*,'---End hessian_FB_omp---' + +end subroutine +#+END_SRC + +** Pipek-Mezey +*** Gradient v1 +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size)) + + ! Initialization + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Gradient + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int) + +end subroutine grad_pipek +#+END_SRC + +*** Gradient + +The gradient is + +\begin{align*} +\left. \frac{\partial \mathcal{P} (\theta)}{\partial \theta} \right|_{\theta=0}= \gamma^{PM} +\end{align*} +with +\begin{align*} +\gamma_{st}^{PM} = \sum_{A=1}^N \left[ - \right] +\end{align*} + +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} +$\sum_{\rho}$ -> sum over all the AOs +$\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +$c^t$ -> expansion coefficient of orbital |t> + +Input: +| tmp_n | integer | Number of parameters in the MO subspace | +| tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +| tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +Output: +| v_grad(tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +Internal: +| m_grad(tmp_list_size,tmp_list_size) | double precision | Gradient in a 2D array | +| tmp_int(tmp_list_size,tmp_list_size) | | Temporary array to store the integrals | +| tmp_accu(tmp_list_size,tmp_list_size) | | Temporary array to store a matrix | +| | | product and compute tmp_int | +| CS(tmp_list_size,ao_num) | | Array to store the result of mo_coef * ao_overlap | +| tmp_mo_coef(ao_num,tmp_list_size) | | Array to store just the useful MO coefficients | +| | | depending of the mo_class | +| tmp_mo_coef2(nucl_n_aos(a),tmp_list_size) | | Array to store just the useful MO coefficients | +| | | depending of the nuclei | +| tmp_CS(tmp_list_size,nucl_n_aos(a)) | | Array to store just the useful mo_coef * ao_overlap | +| | | values depending of the nuclei | +| a | | index to loop over the nuclei | +| b | | index to loop over the AOs which belongs to the nuclei a | +| mu | | index to refer to an AO which belongs to the nuclei a | +| rho | | index to loop over all the AOs | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + double precision :: t1,t2,t3 + + print*,'' + print*,'---gradient_PM---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int,CS,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_PM:', t3 + + print*,'---End gradient_PM---' + +end +#+END_SRC + +*** Hessian v1 +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hess_pipek(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size)) + + beta = 0d0 + + do a = 1, nucl_num + tmp_int = 0d0 + + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta,tmp_int) + +end +#+END_SRC + +*** Hessian + +The hessian is +\begin{align*} +\left. \frac{\partial^2 \mathcal{P} (\theta)}{\partial \theta^2}\right|_{\theta=0} = 4 \beta^{PM} +\end{align*} +\begin{align*} +\beta_{st}^{PM} = \sum_{A=1}^N \left( ^2 - \frac{1}{4} \left[ - \right]^2 \right) +\end{align*} + +with +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} +$\sum_{\rho}$ -> sum over all the AOs +$\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +$c^t$ -> expansion coefficient of orbital |t> + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:),CS(:,:),tmp_mo_coef(:,:),tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_PM---' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size),tmp_accu(tmp_list_size,tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + beta = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta,tmp_int) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_PM:', t3 + + print*,'---End hessian_PM---' + +end + +#+END_SRC + +** Criterion +*** Criterion PM (old) +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_crit_pipek(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(tmp_int(mo_num, mo_num)) + + criterion = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do i = 1, mo_num + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(i,i) = tmp_int(i,i) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,i) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,i)) + + enddo + enddo + enddo + + do i = 1, mo_num + criterion = criterion + tmp_int(i,i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int) + +end +#+END_SRC + +*** Criterion PM + +The criterion is computed as +\begin{align*} +\mathcal{P} = \sum_{i=1}^n \sum_{A=1}^N \left[ \right]^2 +\end{align*} +with +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_PM(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:),CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + print*,'' + print*,'---criterion_PM---' + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),CS(mo_num,ao_num)) + + ! Initialization + criterion = 0d0 + + call dgemm('T','N',mo_num,ao_num,ao_num,1d0,mo_coef,size(mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_i) = tmp_int(tmp_i,tmp_i) + 0.5d0 * (CS(i,mu) * mo_coef(mu,i) + mo_coef(mu,i) * CS(i,mu)) + + ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS) + + print*,'---End criterion_PM---' + +end +#+END_SRC + +*** Criterion PM v3 +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho,nu,c + double precision :: t1,t2,t3 + + print*,'' + print*,'---criterion_PM_v3---' + + call wall_time(t1) + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + criterion = 0d0 + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + ! ao_overlap(ao_num,ao_num) + ! mo_coef(ao_num,mo_num) + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + + do j = 1, tmp_list_size + do i = 1, tmp_list_size + tmp_int(i,j) = 0d0 + enddo + enddo + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + ! Integrals + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Criterion + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS,tmp_accu,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in criterion_PM_v3:', t3 + + print*,'---End criterion_PM_v3---' + +end +#+END_SRC + +*** Criterion FB (old) + +The criterion is just computed as + +\begin{align*} +C = - \sum_i^{mo_{num}} (^2 + ^2 + ^2) +\end{align*} + +The minus sign is here in order to minimize this criterion + +Output: +| criterion | double precision | criterion for the Foster-Boys localization | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_FB_old(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + double precision, intent(out) :: criterion + integer :: i + + ! Criterion (= \sum_i ^2 ) + criterion = 0d0 + do i = 1, mo_num + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine +#+END_SRC + +*** Criterion FB +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_FB(tmp_list_size, tmp_list, criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + integer :: i, tmp_i + + ! Criterion (= - \sum_i ^2 ) + criterion = 0d0 + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine +#+END_SRC + +** Theta + +In: +| n | integer | number of MOs in the considered MO class | +| l | integer | list of MOs of the considered class | + +Out: +| m_x(n,n) | double precision | Matrix containing the rotation angle between all the different | +| | | pairs of MOs to apply the rotations (need a minus sign) | +| max_elem | double precision | Maximal angle in absolute value | + +$$\cos(4 \theta) = \frac{-A{ij}}{\sqrt{(A_{ij}^2 + B_{ij}^2)} $$ +$$\sin(4 \theta) = \frac{B{ij}}{\sqrt{(A_{ij}^2 + B_{ij}^2)} $$ +$$\tan(4 \theta) = \frac{\sin(4 \theta)}{\cos(4 \theta)}$$ +where $\theta$ is in fact $\theta_{ij}$ + +For Foster-Boys localization: +$$A_{ij} = ^2 - \frac{1}{4} ( - )^2$$ +$$B_{ij} = ( - )$$ + + +For Pipek-Mezey localization: +$$A_{ij} = \sum_A ^2 - \frac{1}{4} ( - )^2$$ +$$B_{ij} = \sum_A ( - )$$ +with +$$ = \frac{1}{2} \sum_\rho \sum_{\mu \in A} ( c_\rho^{i*} S_{\rho +\mu} c_\mu^j + c_\mu^{i*} S_{\mu \rho} c_\rho^j)$$ +$i,j$ MOs +$\mu, \rho$ AOs +$A$ nucleus +$S$ overlap matrix +$c$ MO coefficient +$r$ position operator + +#+begin_src f90 :tangle localization_sub.irp.f +subroutine theta_FB(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: i,j, tmp_i, tmp_j + double precision, allocatable :: cos4theta(:,:), sin4theta(:,:) + double precision, allocatable :: A(:,:), B(:,:), beta(:,:), gamma(:,:) + integer :: idx_i,idx_j + + allocate(cos4theta(n, n), sin4theta(n, n)) + allocate(A(n,n), B(n,n), beta(n,n), gamma(n,n)) + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + A(tmp_i,tmp_j) = mo_dipole_x(i,j)**2 - 0.25d0 * (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 & + + mo_dipole_y(i,j)**2 - 0.25d0 * (mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 & + + mo_dipole_z(i,j)**2 - 0.25d0 * (mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 + enddo + A(j,j) = 0d0 + enddo + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + B(tmp_i,tmp_j) = mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j)) - 4d0 * mo_dipole_x(i,j)**2 & + ! + (mo_dipole_y(i,i) - mo_dipole_y(j,j)) - 4d0 * mo_dipole_y(i,j)**2 & + ! + (mo_dipole_z(i,i) - mo_dipole_z(j,j)) - 4d0 * mo_dipole_z(i,j)**2 + ! enddo + !enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! gamma(tmp_i,tmp_j) = 4d0 * ( mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + ! + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + ! + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j))) + ! enddo + !enddo + + ! + !do j = 1, n + ! do i = 1, n + ! cos4theta(i,j) = - A(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + !do j = 1, n + ! do i = 1, n + ! sin4theta(i,j) = B(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(B(i,j), -A(i,j)) + !m_x(i,j) = 0.25d0 * atan2(sin4theta(i,j), cos4theta(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + !idx_i = i + !idx_j = j + endif + enddo + enddo + + ! Debug + !print*,'' + !print*,'sin/B' + !do i = 1, n + ! write(*,'(100F10.4)') sin4theta(i,:) + ! !B(i,:) + !enddo + !print*,'cos/A' + !do i = 1, n + ! write(*,'(100F10.4)') cos4theta(i,:) + ! !A(i,:) + !enddo + !print*,'X' + !!m_x = 0d0 + !!m_x(idx_i,idx_j) = max_elem + !!m_x(idx_j,idx_i) = -max_elem + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(cos4theta, sin4theta) + deallocate(A,B,beta,gamma) + +end +#+end_src + +#+begin_src f90 :comments org :tangle localization_sub.irp.f +subroutine theta_PM(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: a,b,i,j,tmp_i,tmp_j,rho,mu,nu,idx_i,idx_j + double precision, allocatable :: Aij(:,:), Bij(:,:), Pa(:,:) + + allocate(Aij(n,n), Bij(n,n), Pa(n,n)) + + do a = 1, nucl_num ! loop over the nuclei + Pa = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + Pa(tmp_i,tmp_j) = Pa(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! A + do j = 1, n + do i = 1, n + Aij(i,j) = Aij(i,j) + Pa(i,j)**2 - 0.25d0 * (Pa(i,i) - Pa(j,j))**2 + enddo + enddo + + ! B + do j = 1, n + do i = 1, n + Bij(i,j) = Bij(i,j) + Pa(i,j) * (Pa(i,i) - Pa(j,j)) + enddo + enddo + + enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(Bij(i,j), -Aij(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + idx_i = i + idx_j = j + endif + enddo + enddo + + ! Debug + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,'Max',idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(Aij,Bij,Pa) + +end +#+end_src + +** Spatial extent + +The spatial extent of an orbital $i$ is computed as +\begin{align*} +\sum_{\lambda=x,y,z}\sqrt{ - ^2} +\end{align*} + +From that we can also compute the average and the standard deviation + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_spatial_extent(spatial_extent) + + implicit none + + BEGIN_DOC + ! Compute the spatial extent of the MOs + END_DOC + + double precision, intent(out) :: spatial_extent(mo_num) + double precision :: average_core, average_act, average_inact, average_virt + double precision :: std_var_core, std_var_act, std_var_inact, std_var_virt + integer :: i,j,k,l + + spatial_extent = 0d0 + + do i = 1, mo_num + spatial_extent(i) = mo_spread_x(i,i) - mo_dipole_x(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_y(i,i) - mo_dipole_y(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_z(i,i) - mo_dipole_z(i,i)**2 + enddo + + do i = 1, mo_num + spatial_extent(i) = dsqrt(spatial_extent(i)) + enddo + + average_core = 0d0 + std_var_core = 0d0 + if (dim_list_core_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core) + call compute_std_var_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core, std_var_core) + endif + + average_act = 0d0 + std_var_act = 0d0 + if (dim_list_act_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act) + call compute_std_var_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act, std_var_act) + endif + + average_inact = 0d0 + std_var_inact = 0d0 + if (dim_list_inact_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact) + call compute_std_var_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact, std_var_inact) + endif + + average_virt = 0d0 + std_var_virt = 0d0 + if (dim_list_virt_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt) + call compute_std_var_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt, std_var_virt) + endif + + print*,'' + print*,'=============================' + print*,' Spatial extent of the MOs' + print*,'=============================' + print*,'' + + print*, 'elec_num:', elec_num + print*, 'elec_alpha_num:', elec_alpha_num + print*, 'elec_beta_num:', elec_beta_num + print*, 'core:', dim_list_core_orb + print*, 'act:', dim_list_act_orb + print*, 'inact:', dim_list_inact_orb + print*, 'virt:', dim_list_virt_orb + print*, 'mo_num:', mo_num + print*,'' + + print*,'-- Core MOs --' + print*,'Average:', average_core + print*,'Std var:', std_var_core + print*,'' + + print*,'-- Active MOs --' + print*,'Average:', average_act + print*,'Std var:', std_var_act + print*,'' + + print*,'-- Inactive MOs --' + print*,'Average:', average_inact + print*,'Std var:', std_var_inact + print*,'' + + print*,'-- Virtual MOs --' + print*,'Average:', average_virt + print*,'Std var:', std_var_virt + print*,'' + + print*,'Spatial extent:' + do i = 1, mo_num + print*, i, spatial_extent(i) + enddo + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_average_sp_ext(spatial_extent, list, list_size, average) + + implicit none + + BEGIN_DOC + ! Compute the average spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(out) :: average + integer :: i, tmp_i + + average = 0d0 + do tmp_i = 1, list_size + i = list(tmp_i) + average = average + spatial_extent(i) + enddo + + average = average / DBLE(list_size) + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_std_var_sp_ext(spatial_extent, list, list_size, average, std_var) + + implicit none + + BEGIN_DOC + ! Compute the standard deviation of the spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(in) :: average + double precision, intent(out) :: std_var + integer :: i, tmp_i + + std_var = 0d0 + + do tmp_i = 1, list_size + i = list(tmp_i) + std_var = std_var + (spatial_extent(i) - average)**2 + enddo + + std_var = dsqrt(1d0/DBLE(list_size) * std_var) + +end +#+END_SRC + +** Utils + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine apply_pre_rotation() + + implicit none + + BEGIN_DOC + ! Apply a rotation between the MOs + END_DOC + + double precision, allocatable :: pre_rot(:,:), prev_mos(:,:), R(:,:) + double precision :: t1,t2,t3 + integer :: i,j,tmp_i,tmp_j + integer :: info + logical :: enforce_step_cancellation + + print*,'---apply_pre_rotation---' + call wall_time(t1) + + allocate(pre_rot(mo_num,mo_num), prev_mos(ao_num,mo_num), R(mo_num,mo_num)) + + ! Initialization of the matrix + pre_rot = 0d0 + + if (kick_in_mos) then + ! Pre rotation for core MOs + if (dim_list_core_orb >= 2) then + do tmp_j = 1, dim_list_core_orb + j = list_core(tmp_j) + do tmp_i = 1, dim_list_core_orb + i = list_core(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for active MOs + if (dim_list_act_orb >= 2) then + do tmp_j = 1, dim_list_act_orb + j = list_act(tmp_j) + do tmp_i = 1, dim_list_act_orb + i = list_act(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for inactive MOs + if (dim_list_inact_orb >= 2) then + do tmp_j = 1, dim_list_inact_orb + j = list_inact(tmp_j) + do tmp_i = 1, dim_list_inact_orb + i = list_inact(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for virtual MOs + if (dim_list_virt_orb >= 2) then + do tmp_j = 1, dim_list_virt_orb + j = list_virt(tmp_j) + do tmp_i = 1, dim_list_virt_orb + i = list_virt(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Nothing for deleted ones + + ! Compute pre rotation matrix from pre_rot + call rotation_matrix(pre_rot,mo_num,R,mo_num,mo_num,info,enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Cancellation of the pre rotation, too big error in the rotation matrix' + print*, 'Reduce the angle for the pre rotation, abort' + call abort + endif + + ! New Mos (we don't car eabout the previous MOs prev_mos) + call apply_mo_rotation(R,prev_mos) + + ! Update the things related to mo_coef + TOUCH mo_coef + call save_mos + endif + + deallocate(pre_rot, prev_mos, R) + + call wall_time(t2) + t3 = t2-t1 + print*,'Time in apply_pre_rotation:', t3 + print*,'---End apply_pre_rotation---' + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine x_tmp_orb_loc_v2(tmp_n, tmp_list_size, tmp_list, v_grad, H,tmp_x, tmp_m_x) + + implicit none + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(in) :: v_grad(tmp_n) + double precision, intent(in) :: H(tmp_n, tmp_n) + double precision, intent(out) :: tmp_m_x(tmp_list_size, tmp_list_size), tmp_x(tmp_list_size) + !double precision, allocatable :: x(:) + double precision :: lambda , accu, max_elem + integer :: i,j,tmp_i,tmp_j,tmp_k + + ! Allocation + !allocate(x(tmp_n)) + + ! Level shifted hessian + lambda = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < lambda) then + lambda = H(tmp_k,tmp_k) + endif + enddo + + ! min element in the hessian + if (lambda < 0d0) then + lambda = -lambda + 1d-6 + endif + + print*, 'lambda', lambda + + ! Good + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) > 1d-6) then + tmp_x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * v_grad(tmp_k)!(-v_grad(tmp_k)) + !x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * (-v_grad(tmp_k)) + endif + enddo + + ! 1D tmp -> 2D tmp + tmp_m_x = 0d0 + do tmp_j = 1, tmp_list_size - 1 + do tmp_i = tmp_j + 1, tmp_list_size + call mat_to_vec_index(tmp_i,tmp_j,tmp_k) + tmp_m_x(tmp_i, tmp_j) = tmp_x(tmp_k)!x(tmp_k) + enddo + enddo + + ! Antisym + do tmp_i = 1, tmp_list_size - 1 + do tmp_j = tmp_i + 1, tmp_list_size + tmp_m_x(tmp_i,tmp_j) = - tmp_m_x(tmp_j,tmp_i) + enddo + enddo + + ! Deallocation + !deallocate(x) + +end subroutine +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine ao_to_mo_no_sym(A_ao,LDA_ao,A_mo,LDA_mo) + implicit none + BEGIN_DOC + ! Transform A from the |AO| basis to the |MO| basis + ! + ! $C^\dagger.A_{ao}.C$ + END_DOC + integer, intent(in) :: LDA_ao,LDA_mo + double precision, intent(in) :: A_ao(LDA_ao,ao_num) + double precision, intent(out) :: A_mo(LDA_mo,mo_num) + double precision, allocatable :: T(:,:) + + allocate ( T(ao_num,mo_num) ) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T + + call dgemm('N','N', ao_num, mo_num, ao_num, & + 1.d0, A_ao,LDA_ao, & + mo_coef, size(mo_coef,1), & + 0.d0, T, size(T,1)) + + call dgemm('T','N', mo_num, mo_num, ao_num, & + 1.d0, mo_coef,size(mo_coef,1), & + T, ao_num, & + 0.d0, A_mo, size(A_mo,1)) + + deallocate(T) +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine run_sort_by_fock_energies() + + implicit none + + BEGIN_DOC + ! Saves the current MOs ordered by diagonal element of the Fock operator. + END_DOC + + integer :: i,j,k,l,tmp_i,tmp_k,tmp_list_size + integer, allocatable :: iorder(:), tmp_list(:) + double precision, allocatable :: fock_energies_tmp(:), tmp_mo_coef(:,:) + + ! Test + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + print*,'MO class: ',trim(mo_class(tmp_list(1))) + + allocate(iorder(tmp_list_size), fock_energies_tmp(tmp_list_size), tmp_mo_coef(ao_num,tmp_list_size)) + !print*,'MOs before sorting them by f_p^p energies:' + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + fock_energies_tmp(i) = Fock_matrix_diag_mo(tmp_i) + iorder(i) = i + !print*, tmp_i, fock_energies_tmp(i) + enddo + + call dsort(fock_energies_tmp, iorder, tmp_list_size) + + print*,'MOs after sorting them by f_p^p energies:' + do i = 1, tmp_list_size + k = iorder(i) + tmp_k = tmp_list(k) + print*, tmp_k, fock_energies_tmp(k) + do j = 1, ao_num + tmp_mo_coef(j,k) = mo_coef(j,tmp_k) + enddo + enddo + + ! Update the MOs after sorting them by energies + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + do j = 1, ao_num + mo_coef(j,tmp_i) = tmp_mo_coef(j,i) + enddo + enddo + + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + print*,'' + + deallocate(iorder, fock_energies_tmp, tmp_list, tmp_mo_coef) + endif + + enddo + + touch mo_coef + call save_mos + +end + +#+END_SRC + + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +function is_core(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a core orbital + END_DOC + + integer, intent(in) :: i + logical :: is_core + + integer :: j + + ! Init + is_core = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_core = .True. + exit + endif + enddo + +end + +function is_del(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a deleted orbital + END_DOC + + integer, intent(in) :: i + logical :: is_del + + integer :: j + + ! Init + is_del = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_del = .True. + exit + endif + enddo + +end + +subroutine set_classes_loc() + + implicit none + + integer :: i + logical :: ok1, ok2 + logical :: is_core, is_del + integer(bit_kind) :: res(N_int,2) + + if (auto_mo_class) then + do i = 1, mo_num + if (is_core(i)) cycle + if (is_del(i)) cycle + call apply_hole(psi_det(1,1,1), 1, i, res, ok1, N_int) + call apply_hole(psi_det(1,1,1), 2, i, res, ok2, N_int) + if (ok1 .and. ok2) then + mo_class(i) = 'Inactive' + else if (.not. ok1 .and. .not. ok2) then + mo_class(i) = 'Virtual' + else + mo_class(i) = 'Active' + endif + enddo + touch mo_class + endif + +end + +subroutine unset_classes_loc() + + implicit none + + integer :: i + logical :: ok1, ok2 + logical :: is_core, is_del + integer(bit_kind) :: res(N_int,2) + + if (auto_mo_class) then + do i = 1, mo_num + if (is_core(i)) cycle + if (is_del(i)) cycle + mo_class(i) = 'Active' + enddo + touch mo_class + endif + +end +#+END_SRC From b71888f459d7407d7589f001a4d25418dde9df63 Mon Sep 17 00:00:00 2001 From: ydamour Date: Tue, 18 Apr 2023 13:56:30 +0200 Subject: [PATCH 043/337] add mo optimization --- src/mo_optimization/83.mo_optimization.bats | 62 + src/mo_optimization/EZFIO.cfg | 29 + src/mo_optimization/NEED | 7 + src/mo_optimization/README.md | 74 + src/mo_optimization/class.irp.f | 12 + src/mo_optimization/constants.h | 1 + .../debug_gradient_list_opt.irp.f | 78 + src/mo_optimization/debug_gradient_opt.irp.f | 76 + .../debug_hessian_list_opt.irp.f | 147 ++ src/mo_optimization/debug_hessian_opt.irp.f | 171 ++ .../diagonal_hessian_list_opt.irp.f | 1556 ++++++++++++++++ .../diagonal_hessian_opt.irp.f | 1511 ++++++++++++++++ .../diagonalization_hessian.irp.f | 136 ++ .../first_diagonal_hessian_list_opt.irp.f | 372 ++++ .../first_diagonal_hessian_opt.irp.f | 344 ++++ .../first_gradient_list_opt.irp.f | 125 ++ src/mo_optimization/first_gradient_opt.irp.f | 128 ++ .../first_hessian_list_opt.irp.f | 365 ++++ src/mo_optimization/first_hessian_opt.irp.f | 360 ++++ src/mo_optimization/gradient_list_opt.irp.f | 381 ++++ src/mo_optimization/gradient_opt.irp.f | 346 ++++ src/mo_optimization/hessian_list_opt.irp.f | 1129 ++++++++++++ src/mo_optimization/hessian_opt.irp.f | 1043 +++++++++++ src/mo_optimization/my_providers.irp.f | 141 ++ src/mo_optimization/orb_opt.irp.f | 22 + src/mo_optimization/org/TANGLE_org_mode.sh | 7 + src/mo_optimization/org/TODO.org | 17 + .../org/debug_gradient_list_opt.org | 79 + .../org/debug_gradient_opt.org | 77 + .../org/debug_hessian_list_opt.org | 148 ++ src/mo_optimization/org/debug_hessian_opt.org | 172 ++ .../org/diagonal_hessian_list_opt.org | 1561 +++++++++++++++++ .../org/diagonal_hessian_opt.org | 1516 ++++++++++++++++ .../org/diagonalization_hessian.org | 138 ++ .../org/first_diagonal_hessian_list_opt.org | 376 ++++ .../org/first_diagonal_hessian_opt.org | 348 ++++ .../org/first_gradient_list_opt.org | 127 ++ .../org/first_gradient_opt.org | 130 ++ .../org/first_hessian_list_opt.org | 370 ++++ src/mo_optimization/org/first_hessian_opt.org | 365 ++++ src/mo_optimization/org/gradient_list_opt.org | 393 +++++ src/mo_optimization/org/gradient_opt.org | 358 ++++ src/mo_optimization/org/hessian_list_opt.org | 1141 ++++++++++++ src/mo_optimization/org/hessian_opt.org | 1056 +++++++++++ src/mo_optimization/org/my_providers.org | 308 ++++ src/mo_optimization/org/optimization.org | 91 + src/mo_optimization/org/orb_opt_trust_v2.org | 349 ++++ .../org/state_average_energy.org | 73 + .../org/state_weight_normalization.org | 31 + src/mo_optimization/org/update_parameters.org | 16 + .../org/update_st_av_ci_energy.org | 26 + .../run_orb_opt_trust_v2.irp.f | 317 ++++ src/mo_optimization/save_energy.irp.f | 9 + .../state_average_energy.irp.f | 72 + .../state_weight_normalization.irp.f | 29 + src/mo_optimization/update_parameters.irp.f | 15 + .../update_st_av_ci_energy.irp.f | 25 + 57 files changed, 18356 insertions(+) create mode 100644 src/mo_optimization/83.mo_optimization.bats create mode 100644 src/mo_optimization/EZFIO.cfg create mode 100644 src/mo_optimization/NEED create mode 100644 src/mo_optimization/README.md create mode 100644 src/mo_optimization/class.irp.f create mode 100644 src/mo_optimization/constants.h create mode 100644 src/mo_optimization/debug_gradient_list_opt.irp.f create mode 100644 src/mo_optimization/debug_gradient_opt.irp.f create mode 100644 src/mo_optimization/debug_hessian_list_opt.irp.f create mode 100644 src/mo_optimization/debug_hessian_opt.irp.f create mode 100644 src/mo_optimization/diagonal_hessian_list_opt.irp.f create mode 100644 src/mo_optimization/diagonal_hessian_opt.irp.f create mode 100644 src/mo_optimization/diagonalization_hessian.irp.f create mode 100644 src/mo_optimization/first_diagonal_hessian_list_opt.irp.f create mode 100644 src/mo_optimization/first_diagonal_hessian_opt.irp.f create mode 100644 src/mo_optimization/first_gradient_list_opt.irp.f create mode 100644 src/mo_optimization/first_gradient_opt.irp.f create mode 100644 src/mo_optimization/first_hessian_list_opt.irp.f create mode 100644 src/mo_optimization/first_hessian_opt.irp.f create mode 100644 src/mo_optimization/gradient_list_opt.irp.f create mode 100644 src/mo_optimization/gradient_opt.irp.f create mode 100644 src/mo_optimization/hessian_list_opt.irp.f create mode 100644 src/mo_optimization/hessian_opt.irp.f create mode 100644 src/mo_optimization/my_providers.irp.f create mode 100644 src/mo_optimization/orb_opt.irp.f create mode 100755 src/mo_optimization/org/TANGLE_org_mode.sh create mode 100644 src/mo_optimization/org/TODO.org create mode 100644 src/mo_optimization/org/debug_gradient_list_opt.org create mode 100644 src/mo_optimization/org/debug_gradient_opt.org create mode 100644 src/mo_optimization/org/debug_hessian_list_opt.org create mode 100644 src/mo_optimization/org/debug_hessian_opt.org create mode 100644 src/mo_optimization/org/diagonal_hessian_list_opt.org create mode 100644 src/mo_optimization/org/diagonal_hessian_opt.org create mode 100644 src/mo_optimization/org/diagonalization_hessian.org create mode 100644 src/mo_optimization/org/first_diagonal_hessian_list_opt.org create mode 100644 src/mo_optimization/org/first_diagonal_hessian_opt.org create mode 100644 src/mo_optimization/org/first_gradient_list_opt.org create mode 100644 src/mo_optimization/org/first_gradient_opt.org create mode 100644 src/mo_optimization/org/first_hessian_list_opt.org create mode 100644 src/mo_optimization/org/first_hessian_opt.org create mode 100644 src/mo_optimization/org/gradient_list_opt.org create mode 100644 src/mo_optimization/org/gradient_opt.org create mode 100644 src/mo_optimization/org/hessian_list_opt.org create mode 100644 src/mo_optimization/org/hessian_opt.org create mode 100644 src/mo_optimization/org/my_providers.org create mode 100644 src/mo_optimization/org/optimization.org create mode 100644 src/mo_optimization/org/orb_opt_trust_v2.org create mode 100644 src/mo_optimization/org/state_average_energy.org create mode 100644 src/mo_optimization/org/state_weight_normalization.org create mode 100644 src/mo_optimization/org/update_parameters.org create mode 100644 src/mo_optimization/org/update_st_av_ci_energy.org create mode 100644 src/mo_optimization/run_orb_opt_trust_v2.irp.f create mode 100644 src/mo_optimization/save_energy.irp.f create mode 100644 src/mo_optimization/state_average_energy.irp.f create mode 100644 src/mo_optimization/state_weight_normalization.irp.f create mode 100644 src/mo_optimization/update_parameters.irp.f create mode 100644 src/mo_optimization/update_st_av_ci_energy.irp.f diff --git a/src/mo_optimization/83.mo_optimization.bats b/src/mo_optimization/83.mo_optimization.bats new file mode 100644 index 00000000..5bc3d313 --- /dev/null +++ b/src/mo_optimization/83.mo_optimization.bats @@ -0,0 +1,62 @@ +#!/usr/bin/env bats + +source $QP_ROOT/tests/bats/common.bats.sh +source $QP_ROOT/quantum_package.rc + + +function run() { + thresh=2e-3 + test_exe scf || skip + qp set_file $1 + qp edit --check + qp reset -a + qp run scf + qp set_frozen_core + qp set determinants n_states 2 + qp set determinants read_wf true + qp set mo_two_e_ints io_mo_two_e_integrals None + file="$(echo $1 | sed 's/.ezfio//g')" + qp run cis + qp run debug_gradient_list_opt > $file.debug_g.out + err3="$(grep 'Max error:' $file.debug_g.out | awk '{print $3}')" + qp run debug_hessian_list_opt > $file.debug_h1.out + err1="$(grep 'Max error:' $file.debug_h1.out | awk '{print $3}')" + qp run orb_opt > $file.opt1.out + energy1="$(grep 'State average energy:' $file.opt1.out | tail -n 1 | awk '{print $4}')" + qp set orbital_optimization optimization_method diag + qp reset -d + qp run scf + qp run cis + qp run debug_hessian_list_opt > $file.debug_h2.out + err2="$(grep 'Max error_H:' $file.debug_h2.out | awk '{print $3}')" + qp run orb_opt > $file.opt2.out + energy2="$(grep 'State average energy:' $file.opt2.out | tail -n 1 | awk '{print $4}')" + qp set orbital_optimization optimization_method full + qp reset -d + qp run scf + eq $energy1 $2 $thresh + eq $energy2 $3 $thresh + eq $err1 0.0 1e-12 + eq $err2 0.0 1e-12 + eq $err3 0.0 1e-12 +} + +@test "b2_stretched" { +run b2_stretched.ezfio -48.9852901484277 -48.9852937541510 +} + +@test "h2o" { +run h2o.ezfio -75.9025622449206 -75.8691844585879 +} + +@test "h2s" { +run h2s.ezfio -398.576255809878 -398.574145943928 +} + +@test "hbo" { +run hbo.ezfio -99.9234823022109 -99.9234763597840 +} + +@test "hco" { +run hco.ezfio -113.204915552241 -113.204905207050 +} diff --git a/src/mo_optimization/EZFIO.cfg b/src/mo_optimization/EZFIO.cfg new file mode 100644 index 00000000..8944e507 --- /dev/null +++ b/src/mo_optimization/EZFIO.cfg @@ -0,0 +1,29 @@ +[optimization_method] +type: character*(32) +doc: Define the kind of hessian for the orbital optimization full : full hessian, diag : diagonal hessian, none : no hessian +interface: ezfio,provider,ocaml +default: full + +[n_det_start] +type: integer +doc: Number of determinants after which the orbital optimization will start, n_det_start must be greater than 1. The algorithm does a cipsi until n_det > n_det_start and the optimization starts after +interface: ezfio,provider,ocaml +default: 5 + +[n_det_max_opt] +type: integer +doc: Maximal number of the determinants in the wf for the orbital optimization (to stop the optimization if n_det > n_det_max_opt) +interface: ezfio,provider,ocaml +default: 200000 + +[optimization_max_nb_iter] +type: integer +doc: Maximal number of iterations for the orbital optimization +interface: ezfio,provider,ocaml +default: 20 + +[thresh_opt_max_elem_grad] +type: double precision +doc: Threshold for the convergence, the optimization exits when the biggest element in the gradient is smaller than thresh_optimization_max_elem_grad +interface: ezfio,provider,ocaml +default: 1.e-5 diff --git a/src/mo_optimization/NEED b/src/mo_optimization/NEED new file mode 100644 index 00000000..91f41ee3 --- /dev/null +++ b/src/mo_optimization/NEED @@ -0,0 +1,7 @@ +two_body_rdm +hartree_fock +cipsi +davidson_undressed +selectors_full +generators_full +utils_trust_region diff --git a/src/mo_optimization/README.md b/src/mo_optimization/README.md new file mode 100644 index 00000000..94f29aee --- /dev/null +++ b/src/mo_optimization/README.md @@ -0,0 +1,74 @@ +# Orbital optimization + +## Methods +Different methods are available: +- full hessian +``` +qp set orbital_optimization optimization_method full +``` +- diagonal hessian +``` +qp set orbital_optimization optimization_method diag +``` +- identity matrix +``` +qp set orbital_optimization optimization_method none +``` + +After the optimization the ezfio contains the optimized orbitals + +## For a fixed number of determinants +To optimize the MOs for the actual determinants: +``` +qp run orb_opt +``` + +## For a complete optimization, i.e, with a larger and larger wave function +To optimize the MOs with a larger and larger wave function: +``` +qp run optimization +``` + +The results are stored in the EZFIO in "mo_optimization/result_opt", +with the following format: +(1) (2) (3) (4) +1: Number of determinants in the wf, +2: Cispi energy before the optimization, +3: Cipsi energy after the optimization, +4: Energy difference between (2) and (3). + +The optimization process if the following: +- we do a first cipsi step to obtain a small number of determinants in the wf +- we run an orbital optimization for this wf +- we do a new cipsi step to double the number of determinants in the wf +- we run an orbital optimization for this wf +- ... +- we do that until the energy difference between (2) and (3) is + smaller than the targeted accuracy for the cispi (targeted_accuracy_cipsi in qp edit) + or the wf is larger than a given size (n_det_max_opt in qp_edit) +- after that you can reset your determinants (qp reset -d) and run a clean Cispi calculation + +### End of the optimization +You can choos the number of determinants after what the +optimization will stop: +``` +qp set orbital_optimization n_det_max_opt 1e5 # or any number +``` +## Weight of the states +You can change the weights of the differents states directly in qp edit. +It will affect ths weights used in the orbital optimization. + +# Tests +To run the tests: +``` +qp test +``` + +# Org files +The org files are stored in the directory org in order to avoid overwriting on user changes. +The org files can be modified, to export the change to the source code, run +``` +./TANGLE_org_mode.sh +mv *.irp.f ../. +``` + diff --git a/src/mo_optimization/class.irp.f b/src/mo_optimization/class.irp.f new file mode 100644 index 00000000..b4a68ac2 --- /dev/null +++ b/src/mo_optimization/class.irp.f @@ -0,0 +1,12 @@ + BEGIN_PROVIDER [ logical, do_only_1h1p ] +&BEGIN_PROVIDER [ logical, do_only_cas ] +&BEGIN_PROVIDER [ logical, do_ddci ] + implicit none + BEGIN_DOC + ! In the FCI case, all those are always false + END_DOC + do_only_1h1p = .False. + do_only_cas = .False. + do_ddci = .False. +END_PROVIDER + diff --git a/src/mo_optimization/constants.h b/src/mo_optimization/constants.h new file mode 100644 index 00000000..1cd00bda --- /dev/null +++ b/src/mo_optimization/constants.h @@ -0,0 +1 @@ + logical, parameter :: debug=.False. diff --git a/src/mo_optimization/debug_gradient_list_opt.irp.f b/src/mo_optimization/debug_gradient_list_opt.irp.f new file mode 100644 index 00000000..867e0105 --- /dev/null +++ b/src/mo_optimization/debug_gradient_list_opt.irp.f @@ -0,0 +1,78 @@ +! Debug the gradient + +! *Program to check the gradient* + +! The program compares the result of the first and last code for the +! gradient. + +! Provided: +! | mo_num | integer | number of MOs | + +! Internal: +! | n | integer | number of orbitals pairs (p,q) p threshold) then + print*,i,v_grad(i) + nb_error = nb_error + 1 + + if (ABS(v_grad(i)) > max_error) then + max_error = v_grad(i) + endif + + endif + enddo + + print*,'' + print*,'Check the gradient' + print*,'Threshold:', threshold + print*,'Nb error:', nb_error + print*,'Max error:', max_error + + ! Deallocation + deallocate(v_grad,v_grad2) + +end program diff --git a/src/mo_optimization/debug_gradient_opt.irp.f b/src/mo_optimization/debug_gradient_opt.irp.f new file mode 100644 index 00000000..8aeec18f --- /dev/null +++ b/src/mo_optimization/debug_gradient_opt.irp.f @@ -0,0 +1,76 @@ +! Debug the gradient + +! *Program to check the gradient* + +! The program compares the result of the first and last code for the +! gradient. + +! Provided: +! | mo_num | integer | number of MOs | + +! Internal: +! | n | integer | number of orbitals pairs (p,q) p threshold) then + print*,v_grad(i) + nb_error = nb_error + 1 + + if (ABS(v_grad(i)) > max_error) then + max_error = v_grad(i) + endif + + endif + enddo + + print*,'' + print*,'Check the gradient' + print*,'Threshold :', threshold + print*,'Nb error :', nb_error + print*,'Max error :', max_error + + ! Deallocation + deallocate(v_grad,v_grad2) + +end program diff --git a/src/mo_optimization/debug_hessian_list_opt.irp.f b/src/mo_optimization/debug_hessian_list_opt.irp.f new file mode 100644 index 00000000..d1aa79c4 --- /dev/null +++ b/src/mo_optimization/debug_hessian_list_opt.irp.f @@ -0,0 +1,147 @@ +! Debug the hessian + +! *Program to check the hessian matrix* + +! The program compares the result of the first and last code for the +! hessian. First of all the 4D hessian and after the 2D hessian. + +! Provided: +! | mo_num | integer | number of MOs | +! | optimization_method | string | Method for the orbital optimization: | +! | | | - 'full' -> full hessian | +! | | | - 'diag' -> diagonal hessian | +! | dim_list_act_orb | integer | number of active MOs | +! | list_act(dim_list_act_orb) | integer | list of the actives MOs | +! | | | | + +! Internal: +! | m | integer | number of MOs in the list | +! | | | (active MOs) | +! | n | integer | number of orbitals pairs (p,q) p threshold) then + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + endif + enddo + enddo + enddo + enddo + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + ! Deallocation + deallocate(H, H2, h_f, h_f2) + + else + + print*, 'Use the diagonal hessian matrix' + allocate(H(n,1),H2(n,1)) + call diag_hessian_list_opt(n,m,list_act,H) + call first_diag_hessian_list_opt(n,m,list_act,H2) + + H = H - H2 + + max_error_H = 0d0 + nb_error_H = 0 + + do i = 1, n + if (ABS(H(i,1)) > threshold) then + print*, H(i,1) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,1)) > ABS(max_error_H)) then + max_error_H = H(i,1) + endif + + endif + enddo + + endif + + print*,'' + if (optimization_method == 'full') then + print*,'Check of the full hessian' + print*,'Threshold:', threshold + print*,'Nb error:', nb_error + print*,'Max error:', max_error + print*,'' + else + print*,'Check of the diagonal hessian' + endif + + print*,'Nb error_H:', nb_error_H + print*,'Max error_H:', max_error_H + +end program diff --git a/src/mo_optimization/debug_hessian_opt.irp.f b/src/mo_optimization/debug_hessian_opt.irp.f new file mode 100644 index 00000000..6d22cc01 --- /dev/null +++ b/src/mo_optimization/debug_hessian_opt.irp.f @@ -0,0 +1,171 @@ +! Debug the hessian + +! *Program to check the hessian matrix* + +! The program compares the result of the first and last code for the +! hessian. First of all the 4D hessian and after the 2D hessian. + +! Provided: +! | mo_num | integer | number of MOs | + +! Internal: +! | n | integer | number of orbitals pairs (p,q) p threshold) then + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + endif + enddo + enddo + enddo + enddo + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + elseif (optimization_method == 'diag') then + + print*, 'Use the diagonal hessian matrix' + call diag_hessian_opt(n,H,h_f) + call first_diag_hessian_opt(n,H2,h_f2) + + h_f = h_f - h_f2 + max_error = 0d0 + nb_error = 0 + threshold = 1d-12 + + do l = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do i = 1, mo_num + + if (ABS(h_f(i,j,k,l)) > threshold) then + + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + + endif + + enddo + enddo + enddo + enddo + + h=H-H2 + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + else + print*,'Unknown optimization_method, please select full, diag' + call abort + endif + + print*,'' + if (optimization_method == 'full') then + print*,'Check the full hessian' + else + print*,'Check the diagonal hessian' + endif + + print*,'Threshold :', threshold + print*,'Nb error :', nb_error + print*,'Max error :', max_error + print*,'' + print*,'Nb error_H :', nb_error_H + print*,'Max error_H :', max_error_H + + ! Deallocation + deallocate(H,H2,h_f,h_f2) + +end program diff --git a/src/mo_optimization/diagonal_hessian_list_opt.irp.f b/src/mo_optimization/diagonal_hessian_list_opt.irp.f new file mode 100644 index 00000000..fe54fa7a --- /dev/null +++ b/src/mo_optimization/diagonal_hessian_list_opt.irp.f @@ -0,0 +1,1556 @@ +! Diagonal hessian + +! The hessian of the CI energy with respects to the orbital rotation is : +! (C-c C-x C-l) + +! \begin{align*} +! H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ +! &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ +! &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ +! &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} +! With pq a permutation operator : + +! \begin{align*} +! \mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +! \end{align*} +! \begin{align*} +! \mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +! &= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +! \end{align*} + +! Where p,q,r,s,t,u,v are general spatial orbitals +! mo_num : the number of molecular orbitals +! $$h$$ : One electron integrals +! $$\gamma$$ : One body density matrix (state average in our case) +! $$v$$ : Two electron integrals +! $$\Gamma$$ : Two body density matrice (state average in our case) + +! Source : +! Seniority-based coupled cluster theory +! J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +! Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +! Here for the diagonal of the hessian it's a little more complicated +! than for the hessian. It's not just compute the diagonal terms of the +! hessian because of the permutations. + +! The hessian is (p,q,r,s), so the diagonal terms are (p,q,p,q). But +! with the permutations : p <-> q, r <-> s, p <-> q and r <-> s, we have +! a diagonal term, if : +! p = r and q = s, => (p,q,p,q) +! or +! q = r and p = s, => (p,q,q,p) + +! For that reason, we will use 2D temporary arrays to store the +! elements. One for the terms (p,q,p,q) and an other for the terms of +! kind (p,q,q,p). We will also use a 1D temporary array to store the +! terms of the kind (p,p,p,p) due to the kronoecker delta. + +! *Compute the diagonal hessian of energy with respects to orbital +! rotations* +! By diagonal hessian we mean, diagonal elements of the hessian + +! Provided: +! | mo_num | integer | number of MOs | +! | mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +! | one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +! | two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | + +! Output: +! | H(n,n) | double precision | Hessian matrix | +! | h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +! | | | in n by n matrix | + +! Internal: +! | hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +! | | | the permutations | +! | p, q, r, s | integer | indexes of the hessian elements | +! | t, u, v | integer | indexes for the sums | +! | pq, rs | integer | indexes for the transformation of the hessian | +! | | | (4D -> 2D) | +! | t1,t2,t3 | double precision | time to compute the hessian | +! | t4,t5,t6 | double precision | time to compute the differ each element | +! | tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (private) | +! | tmp_bi_int_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (shared) | +! | tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (private) | +! | tmp_2rdm_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (shared) | +! | tmp_accu(mo_num,mo_num) | double precision | temporary array (private) | +! | tmp_accu_shared(mo_num,mo_num) | double precision | temporary array (shared) | +! | tmp_accu_1(mo_num) | double precision | temporary array (private) | +! | tmp_accu_1_shared(mo_num) | double precision | temporary array (shared) | +! | tmp_h_pppp(mo_num) | double precision | matrix containing the hessien elements hessian(p,p,p,p) | +! | tmp_h_pqpq(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,p,q) | +! | tmp_h_pqqp(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,q,p) | + +! Function: +! | get_two_e_integral | double precision | bi-electronic integrals | + + +subroutine diag_hessian_list_opt(n, m, list, H)!, h_tmpr) + + use omp_lib + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n, m, list(m) + + ! out + double precision, intent(out) :: H(n)!, h_tmpr(m,m,m,m) + + ! internal + !double precision, allocatable :: !hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + integer :: p,q,k + integer :: r,s,t,u,v + integer :: pq,rs + integer :: tmp_p,tmp_q,tmp_r,tmp_s,tmp_pq,tmp_rs + double precision :: t1,t2,t3,t4,t5,t6 + double precision, allocatable :: tmp_bi_int_3(:,:,:),tmp_bi_int_3_shared(:,:,:) + double precision, allocatable :: tmp_2rdm_3(:,:,:),tmp_2rdm_3_shared(:,:,:) + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_accu_shared(:,:), tmp_accu_1_shared(:) + double precision, allocatable :: tmp_h_pppp(:), tmp_h_pqpq(:,:), tmp_h_pqqp(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'--- Diagonal_hessian_list_opt---' + + ! Allocation of shared arrays + !allocate(hessian(m,m,m,m))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_h_pppp(m),tmp_h_pqpq(m,m),tmp_h_pqqp(m,m)) + allocate(tmp_2rdm_3_shared(mo_num,mo_num,m)) + allocate(tmp_bi_int_3_shared(mo_num,mo_num,m)) + allocate(tmp_accu_1_shared(m),tmp_accu_shared(m,m)) + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu,k, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, & + !$OMP tmp_p,tmp_q,tmp_r,tmp_s) & + !$OMP SHARED(H, tmp_h_pppp, tmp_h_pqpq, tmp_h_pqqp, & + !$OMP mo_num,n,m, mo_one_e_integrals, one_e_dm_mo, list, & + !$OMP tmp_bi_int_3_shared, tmp_2rdm_3_shared,tmp_accu_shared, & + !$OMP tmp_accu_1_shared,two_e_dm_mo,mo_integrals_map,t1,t2,t3,t4,t5,t6) & + !$OMP DEFAULT(NONE) + + ! Allocation of the private arrays + allocate(tmp_accu(m,m)) + +! Initialization of the arrays + +!!$OMP DO +!do tmp_s = 1,m +! do tmp_r = 1, m +! do tmp_q = 1, m +! do tmp_p = 1, m +! hessian(tmp_p,tmp_q,tmp_r,tmp_s) = 0d0 +! enddo +! enddo +! enddo +!enddo +!!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + tmp_h_pppp(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + tmp_h_pqpq(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + tmp_h_pqqp(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t1) +!$OMP END MASTER + +! Line 1, term 1 + +! \begin{align*} +! \frac{1}{2} \sum_u \delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (q==r) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 1',t6 +!$OMP END MASTER + +! Line 1, term 2 + +! \begin{align*} +! \frac{1}{2} \sum_u \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (p==s) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! *Part 1 : p=r and q=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * (& +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * (& +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(q,u)) +! = +! mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_q) = tmp_accu_1_shared(tmp_q) + & + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_q) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 2',t6 +!$OMP END MASTER + +! Line 1, term 3 + +! \begin{align*} +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - mo_one_e_integrals(s,p) * one_e_rdm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_rdm_mo(p,s) + +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) +! = +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) +! = +! - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) & + - 2d0 * mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + enddo +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) +! = +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) +! = +! - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) & + - 2d0 * mo_one_e_integrals(p,p) * one_e_dm_mo(q,q) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 3',t6 +!$OMP END MASTER + +! Line 2, term 1 + +! \begin{align*} +! \frac{1}{2} \sum_{tuv} \delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (q==r) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + +! enddo +! enddo +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & +! + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +allocate(tmp_bi_int_3(mo_num, mo_num, m),tmp_2rdm_3(mo_num, mo_num, m)) + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) & + + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & +! + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'l2 1',t6 +!$OMP END MASTER + +! Line 2, term 2 + +! \begin{align*} +! \frac{1}{2} \sum_{tuv} \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (p==s) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + +! enddo +! enddo +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! 0.5d0 * ( & +! get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v) & +! + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) +& + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(q,t,u,v) & +! + get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_q) = two_e_dm_mo(u,v,q,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_q = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_q) = tmp_accu_1_shared(tmp_q) +& + tmp_bi_int_3(u,v,tmp_q) * tmp_2rdm_3(u,v,tmp_q) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'l2 2',t6 +!$OMP END MASTER + +! Line 3, term 1 + +! \begin{align*} +! \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)))) then + +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! With optimization + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +! = +! get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) & +! + get_two_e_integral(q,q,u,v,mo_integrals_map) * two_e_dm_mo(p,p,u,v) +! = +! 2d0 * get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) + +! Arrays of the kind (u,v,p,p) can be transform in 4D arrays (u,v,p). +! Using u,v as one variable a matrix multiplication appears. +! $$c_{p,q} = \sum_{uv} a_{p,uv} b_{uv,q}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,v,tmp_q) = two_e_dm_mo(u,v,q,q) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_shared(u,v,tmp_p) = get_two_e_integral(u,v,p,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu(tmp_p,tmp_q) + tmp_accu(tmp_q,tmp_p) + + enddo +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +! = +! get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) & +! + get_two_e_integral(q,p,u,v,mo_integrals_map) * two_e_dm_mo(p,q,u,v) +! = +! 2d0 * get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!$OMP MASTER +call wall_time(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = 2d0 * get_two_e_integral(u,v,q,p,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,q) + + enddo + enddo + enddo + + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) & + + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3,tmp_2rdm_3) + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l3 1',t6 +!$OMP END MASTER + +! Line 3, term 2 + +! \begin{align*} +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & +! .or. ((p==s) .and. (q==r))) then + +! do t = 1, mo_num +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + +! enddo +! enddo + +! endif + +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +! = +! - get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & +! - get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) & +! - get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(p,u,q,t) & +! - get_two_e_integral(q,u,t,p,mo_integrals_map) * two_e_dm_mo(p,u,t,q) +! = +! - 2d0 * get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) +! = +! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) & +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!---------- +! Part 1.1 +!---------- +! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) + +allocate(tmp_bi_int_3(m, mo_num, m), tmp_2rdm_3(m, mo_num, m)) + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_shared(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do tmp_q = 1, m + q = list(tmp_q) + + tmp_bi_int_3(tmp_q,u,tmp_p) = 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do tmp_q = 1, m + q = list(tmp_q) + + tmp_2rdm_3(tmp_q,u,tmp_p) = two_e_dm_mo(q,u,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do u = 1, mo_num + do tmp_q = 1, m + + tmp_accu_shared(tmp_p,tmp_q) = tmp_accu_shared(tmp_p,tmp_q) & + - tmp_bi_int_3(tmp_q,u,tmp_p) * tmp_2rdm_3(tmp_q,u,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu_shared(tmp_p,tmp_q) + + enddo +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3, tmp_2rdm_3) + + + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!-------- +! Part 1.2 +!-------- +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +allocate(tmp_bi_int_3(mo_num, m, m),tmp_2rdm_3(mo_num, m, m)) + + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_shared(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do u = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + + tmp_bi_int_3(t,tmp_q,tmp_p) = 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p= 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + + tmp_2rdm_3(t,tmp_q,tmp_p) = two_e_dm_mo(t,p,q,u) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_q = 1, m + do tmp_p = 1, m + do t = 1, mo_num + + tmp_accu_shared(tmp_p,tmp_q) = tmp_accu_shared(tmp_p,tmp_q) & + - tmp_bi_int_3(t,tmp_q,tmp_p) * tmp_2rdm_3(t,tmp_q,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu_shared(tmp_p,tmp_q) + + enddo +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3,tmp_2rdm_3) + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +! = +! - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & +! - get_two_e_integral(t,p,p,u,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & +! - get_two_e_integral(q,u,q,t,mo_integrals_map) * two_e_dm_mo(p,u,p,t) & +! - get_two_e_integral(q,u,t,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) +! = +! - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & +! - get_two_e_integral(q,t,q,u,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(t,p,p,u) +! = +! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & +! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +! Arrays of the kind (t,p,u,p) can be transformed in 3D arrays. By doing +! so and using t,u as one variable, a matrix multiplication appears : +! $$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + + +!---------- +! Part 2.1 +!---------- +! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & +! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3_shared(t,u,tmp_q) = two_e_dm_mo(t,q,u,q) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,tmp_p) = get_two_e_integral(t,p,u,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + +!$OMP DO +do tmp_p = 1, m + do tmp_q = 1, m + + tmp_h_pqqp(tmp_q,tmp_p) = tmp_h_pqqp(tmp_q,tmp_p) - tmp_accu(tmp_q,tmp_p) - tmp_accu(tmp_p,tmp_q) + + enddo +enddo +!$OMP END DO + + + +! Arrays of the kind (t,u,p,p) can be transformed in 3D arrays. By doing +! so and using t,u as one variable, a matrix multiplication appears : +! $$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + + +!-------- +! Part 2.2 +!-------- +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,tmp_p) = get_two_e_integral(t,u,p,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,t,tmp_q) = two_e_dm_mo(q,u,t,q) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_shared,& + mo_num*mo_num, tmp_bi_int_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) - tmp_accu(tmp_p,tmp_q) - tmp_accu(tmp_q,tmp_p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l3 2',t6 +!$OMP END MASTER + +!$OMP MASTER +CALL wall_TIME(t2) +t2 = t2 - t1 +print*, 'Time to compute the hessian :', t2 +!$OMP END MASTER + +! Deallocation of private arrays +! In the OMP section ! + +deallocate(tmp_accu) + +! Permutations +! As we mentioned before there are two permutation operator in the +! formula : +! Hessian(p,q,r,s) = P_pq P_rs [...] +! => Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + +!!$OMP DO +!do tmp_p = 1, m +! hessian(tmp_p,tmp_p,tmp_p,tmp_p) = hessian(tmp_p,tmp_p,tmp_p,tmp_p) + tmp_h_pppp(tmp_p) +!enddo +!!$OMP END DO + +!!$OMP DO +!do tmp_q = 1, m +! do tmp_p = 1, m +! hessian(tmp_p,tmp_q,tmp_p,tmp_q) = hessian(tmp_p,tmp_q,tmp_p,tmp_q) + tmp_h_pqpq(tmp_p,tmp_q) +! enddo +!enddo +!!$OMP END DO +! +!!$OMP DO +!do tmp_q = 1, m +! do tmp_p = 1, m +! hessian(tmp_p,tmp_q,tmp_q,tmp_p) = hessian(tmp_p,tmp_q,tmp_q,tmp_p) + tmp_h_pqqp(tmp_p,tmp_q) +! enddo +!enddo +!!$OMP END DO + +!!$OMP DO +!do tmp_s = 1, m +! do tmp_r = 1, m +! do tmp_q = 1, m +! do tmp_p = 1, m + +! h_tmpr(tmp_p,tmp_q,tmp_r,tmp_s) = (hessian(tmp_p,tmp_q,tmp_r,tmp_s) - hessian(tmp_q,tmp_p,tmp_r,tmp_s) & +! - hessian(tmp_p,tmp_q,tmp_s,tmp_r) + hessian(tmp_q,tmp_p,tmp_s,tmp_r)) + +! enddo +! enddo +! enddo +!enddo +!!$OMP END DO + +! 4D -> 2D matrix +! We need a 2D matrix for the Newton method's. Since the Hessian is +! "antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +! We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +! with p 2D :',t6 +!!$OMP END MASTER + +!$OMP END PARALLEL +call omp_set_max_active_levels(4) + +! Display +!if (debug) then +! print*,'2D diag Hessian matrix' +! do tmp_pq = 1, n +! write(*,'(100(F10.5))') H(tmp_pq,:) +! enddo +!endif + +! Deallocation of shared arrays, end + + +!deallocate(hessian)!,h_tmpr) + deallocate(tmp_h_pppp,tmp_h_pqpq,tmp_h_pqqp) + deallocate(tmp_accu_1_shared, tmp_accu_shared) + + print*,'---End diagonal_hessian_list_opt---' + +end subroutine diff --git a/src/mo_optimization/diagonal_hessian_opt.irp.f b/src/mo_optimization/diagonal_hessian_opt.irp.f new file mode 100644 index 00000000..7688ec37 --- /dev/null +++ b/src/mo_optimization/diagonal_hessian_opt.irp.f @@ -0,0 +1,1511 @@ +! Diagonal hessian + +! The hessian of the CI energy with respects to the orbital rotation is : +! (C-c C-x C-l) + +! \begin{align*} +! H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ +! &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ +! &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ +! &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} +! With pq a permutation operator : + +! \begin{align*} +! \mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +! \end{align*} +! \begin{align*} +! \mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +! &= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +! \end{align*} + +! Where p,q,r,s,t,u,v are general spatial orbitals +! mo_num : the number of molecular orbitals +! $$h$$ : One electron integrals +! $$\gamma$$ : One body density matrix (state average in our case) +! $$v$$ : Two electron integrals +! $$\Gamma$$ : Two body density matrice (state average in our case) + +! Source : +! Seniority-based coupled cluster theory +! J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +! Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +! Here for the diagonal of the hessian it's a little more complicated +! than for the hessian. It's not just compute the diagonal terms of the +! hessian because of the permutations. + +! The hessian is (p,q,r,s), so the diagonal terms are (p,q,p,q). But +! with the permutations : p <-> q, r <-> s, p <-> q and r <-> s, we have +! a diagonal term, if : +! p = r and q = s, => (p,q,p,q) +! or +! q = r and p = s, => (p,q,q,p) + +! For that reason, we will use 2D temporary arrays to store the +! elements. One for the terms (p,q,p,q) and an other for the terms of +! kind (p,q,q,p). We will also use a 1D temporary array to store the +! terms of the kind (p,p,p,p) due to the kronoecker delta. + +! *Compute the diagonal hessian of energy with respects to orbital +! rotations* +! By diagonal hessian we mean, diagonal elements of the hessian + +! Provided: +! | mo_num | integer | number of MOs | +! | mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +! | one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +! | two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | + +! Output: +! | H(n,n) | double precision | Hessian matrix | +! | h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +! | | | in n by n matrix | + +! Internal: +! | hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +! | | | the permutations | +! | p, q, r, s | integer | indexes of the hessian elements | +! | t, u, v | integer | indexes for the sums | +! | pq, rs | integer | indexes for the transformation of the hessian | +! | | | (4D -> 2D) | +! | t1,t2,t3 | double precision | time to compute the hessian | +! | t4,t5,t6 | double precision | time to compute the differ each element | +! | tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (private) | +! | tmp_bi_int_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (shared) | +! | tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (private) | +! | tmp_2rdm_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (shared) | +! | tmp_accu(mo_num,mo_num) | double precision | temporary array (private) | +! | tmp_accu_shared(mo_num,mo_num) | double precision | temporary array (shared) | +! | tmp_accu_1(mo_num) | double precision | temporary array (private) | +! | tmp_accu_1_shared(mo_num) | double precision | temporary array (shared) | +! | tmp_h_pppp(mo_num) | double precision | matrix containing the hessien elements hessian(p,p,p,p) | +! | tmp_h_pqpq(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,p,q) | +! | tmp_h_pqqp(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,q,p) | + +! Function: +! | get_two_e_integral | double precision | bi-electronic integrals | + + +subroutine diag_hessian_opt(n,H)!, h_tmpr) + + use omp_lib + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: H(n)!,n), h_tmpr(mo_num,mo_num,mo_num,mo_num) + + ! internal + !double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + integer :: p,q,k + integer :: r,s,t,u,v + integer :: pq,rs + integer :: istate + double precision :: t1,t2,t3,t4,t5,t6 + double precision, allocatable :: tmp_bi_int_3(:,:,:),tmp_bi_int_3_shared(:,:,:) + double precision, allocatable :: tmp_2rdm_3(:,:,:),tmp_2rdm_3_shared(:,:,:) + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_accu_shared(:,:), tmp_accu_1_shared(:) + double precision, allocatable :: tmp_h_pppp(:), tmp_h_pqpq(:,:), tmp_h_pqqp(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'--- diagonal hessian---' + print*,'Use the diagonal hessian' + + ! Allocation of shared arrays + !allocate(hessian(mo_num,mo_num,mo_num,mo_num))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_h_pppp(mo_num),tmp_h_pqpq(mo_num,mo_num),tmp_h_pqqp(mo_num,mo_num)) + allocate(tmp_2rdm_3_shared(mo_num,mo_num,mo_num)) + allocate(tmp_bi_int_3_shared(mo_num,mo_num,mo_num)) + allocate(tmp_accu_1_shared(mo_num),tmp_accu_shared(mo_num,mo_num)) + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu,k, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3) & + !$OMP SHARED(H, tmp_h_pppp, tmp_h_pqpq, tmp_h_pqqp, & + !$OMP mo_num,n, mo_one_e_integrals, one_e_dm_mo, & + !$OMP tmp_bi_int_3_shared, tmp_2rdm_3_shared,tmp_accu_shared, & + !$OMP tmp_accu_1_shared,two_e_dm_mo,mo_integrals_map,t1,t2,t3,t4,t5,t6) & + !$OMP DEFAULT(NONE) + + ! Allocation of the private arrays + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num),tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(mo_num,mo_num)) + +! Initialization of the arrays + +!!$OMP DO +!do s = 1,mo_num +! do r = 1, mo_num +! do q = 1, mo_num +! do p = 1, mo_num +! hessian(p,q,r,s) = 0d0 +! enddo +! enddo +! enddo +!enddo +!!$OMP END DO + +!$OMP DO +do p = 1, mo_num + tmp_h_pppp(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + tmp_h_pqpq(p,q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + tmp_h_pqqp(p,q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t1) +!$OMP END MASTER + +! Line 1, term 1 + +! \begin{align*} +! \frac{1}{2} \sum_u \delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (q==r) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 1',t6 +!$OMP END MASTER + +! Line 1, term 2 + +! \begin{align*} +! \frac{1}{2} \sum_u \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (p==s) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! *Part 1 : p=r and q=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * (& +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * (& +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(q,u)) +! = +! mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(q) = tmp_accu_1_shared(q) + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(q) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 2',t6 +!$OMP END MASTER + +! Line 1, term 3 + +! \begin{align*} +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - mo_one_e_integrals(s,p) * one_e_rdm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_rdm_mo(p,s) + +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) +! = +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) +! = +! - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) & + - 2d0 * mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + enddo +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) +! = +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) +! = +! - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) & + - 2d0 * mo_one_e_integrals(p,p) * one_e_dm_mo(q,q) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 3',t6 +!$OMP END MASTER + +! Line 2, term 1 + +! \begin{align*} +! \frac{1}{2} \sum_{tuv} \delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (q==r) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + +! enddo +! enddo +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & +! + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) & + + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do p =1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & +! + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + & + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'l2 1',t6 +!$OMP END MASTER + +! Line 2, term 2 + +! \begin{align*} +! \frac{1}{2} \sum_{tuv} \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (p==s) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + +! enddo +! enddo +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! 0.5d0 * ( & +! get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v) & +! + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) +& + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(q,t,u,v) & +! + get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP DO +do p = 1,mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,q) = two_e_dm_mo(u,v,q,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(q) = tmp_accu_1_shared(q) +& + tmp_bi_int_3(u,v,q) * tmp_2rdm_3(u,v,q) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'l2 2',t6 +!$OMP END MASTER + +! Line 3, term 1 + +! \begin{align*} +! \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)))) then + +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! With optimization + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +! = +! get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) & +! + get_two_e_integral(q,q,u,v,mo_integrals_map) * two_e_dm_mo(p,p,u,v) +! = +! 2d0 * get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) + +! Arrays of the kind (u,v,p,p) can be transform in 4D arrays (u,v,p). +! Using u,v as one variable a matrix multiplication appears. +! $$c_{p,q} = \sum_{uv} a_{p,uv} b_{uv,q}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,v,q) = two_e_dm_mo(u,v,q,q) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_shared(u,v,p) = get_two_e_integral(u,v,p,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu(p,q) + tmp_accu(q,p) + + enddo +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +! = +! get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) & +! + get_two_e_integral(q,p,u,v,mo_integrals_map) * two_e_dm_mo(p,q,u,v) +! = +! 2d0 * get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!$OMP MASTER +call wall_time(t4) +!$OMP END MASTER + +!$OMP DO +do q = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = 2d0 * get_two_e_integral(u,v,q,p,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,q) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) & + + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l3 1',t6 +!$OMP END MASTER + +! Line 3, term 2 + +! \begin{align*} +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & +! .or. ((p==s) .and. (q==r))) then + +! do t = 1, mo_num +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + +! enddo +! enddo + +! endif + +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +! = +! - get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & +! - get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) & +! - get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(p,u,q,t) & +! - get_two_e_integral(q,u,t,p,mo_integrals_map) * two_e_dm_mo(p,u,t,q) +! = +! - 2d0 * get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) +! = +! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) & +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!---------- +! Part 1.1 +!---------- +! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_bi_int_3(q,u,p) = 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_2rdm_3(q,u,p) = two_e_dm_mo(q,u,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_accu_shared(p,q) = tmp_accu_shared(p,q) & + - tmp_bi_int_3(q,u,p) * tmp_2rdm_3(q,u,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu_shared(p,q) + + enddo +enddo +!$OMP END DO + + + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!-------- +! Part 1.2 +!-------- +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do u = 1, mo_num + + do p = 1, mo_num + do q = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3(t,q,p) = 2d0*get_two_e_integral(t,q,p,u,mo_integrals_map) + + enddo + enddo + enddo + + do p= 1, mo_num + do q = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3(t,q,p) = two_e_dm_mo(t,p,q,u) + + enddo + enddo + enddo + + !$OMP CRITICAL + do q = 1, mo_num + do p = 1, mo_num + do t = 1, mo_num + + tmp_accu_shared(p,q) = tmp_accu_shared(p,q) & + - tmp_bi_int_3(t,q,p) * tmp_2rdm_3(t,q,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu_shared(p,q) + + enddo +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +! = +! - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & +! - get_two_e_integral(t,p,p,u,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & +! - get_two_e_integral(q,u,q,t,mo_integrals_map) * two_e_dm_mo(p,u,p,t) & +! - get_two_e_integral(q,u,t,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) +! = +! - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & +! - get_two_e_integral(q,t,q,u,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(t,p,p,u) +! = +! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & +! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +! Arrays of the kind (t,p,u,p) can be transformed in 3D arrays. By doing +! so and using t,u as one variable, a matrix multiplication appears : +! $$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + + +!---------- +! Part 2.1 +!---------- +! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & +! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) + +!$OMP DO +do q = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3_shared(t,u,q) = two_e_dm_mo(t,q,u,q) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,p) = get_two_e_integral(t,p,u,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + +!$OMP DO +do p = 1, mo_num + do q = 1, mo_num + + tmp_h_pqqp(q,p) = tmp_h_pqqp(q,p) - tmp_accu(q,p) - tmp_accu(p,q) + + enddo +enddo +!$OMP END DO + + + +! Arrays of the kind (t,u,p,p) can be transformed in 3D arrays. By doing +! so and using t,u as one variable, a matrix multiplication appears : +! $$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + + +!-------- +! Part 2.2 +!-------- +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +!$OMP DO +do p = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,p) = get_two_e_integral(t,u,p,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,t,q) = two_e_dm_mo(q,u,t,q) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3_shared,& + mo_num*mo_num, tmp_bi_int_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) - tmp_accu(p,q) - tmp_accu(q,p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l3 2',t6 +!$OMP END MASTER + +!$OMP MASTER +CALL wall_TIME(t2) +t2 = t2 - t1 +print*, 'Time to compute the hessian :', t2 +!$OMP END MASTER + +! Deallocation of private arrays +! In the OMP section ! + +deallocate(tmp_2rdm_3,tmp_bi_int_3) +deallocate(tmp_accu) + +! Permutations +! As we mentioned before there are two permutation operator in the +! formula : +! Hessian(p,q,r,s) = P_pq P_rs [...] +! => Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + +!!$OMP DO +!do p = 1, mo_num +! hessian(p,p,p,p) = hessian(p,p,p,p) + tmp_h_pppp(p) +!enddo +!!$OMP END DO + +!!$OMP DO +!do q = 1, mo_num +! do p = 1, mo_num +! hessian(p,q,p,q) = hessian(p,q,p,q) + tmp_h_pqpq(p,q) +! enddo +!enddo +!!$OMP END DO +! +!!$OMP DO +!do q = 1, mo_num +! do p = 1, mo_num +! hessian(p,q,q,p) = hessian(p,q,q,p) + tmp_h_pqqp(p,q) +! enddo +!enddo +!!$OMP END DO + +!!$OMP DO +!do s = 1, mo_num +! do r = 1, mo_num +! do q = 1, mo_num +! do p = 1, mo_num + +! h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + +! enddo +! enddo +! enddo +!enddo +!!$OMP END DO + +! 4D -> 2D matrix +! We need a 2D matrix for the Newton method's. Since the Hessian is +! "antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +! We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +! with p 2D :',t6 +!!$OMP END MASTER + +!$OMP END PARALLEL +call omp_set_max_active_levels(4) + +! Display +!if (debug) then +! print*,'2D diag Hessian matrix' +! do pq = 1, n +! write(*,'(100(F10.5))') H(pq,:) +! enddo +!endif + +! Deallocation of shared arrays, end + + +!deallocate(hessian)!,h_tmpr) + deallocate(tmp_h_pppp,tmp_h_pqpq,tmp_h_pqqp) + deallocate(tmp_accu_1_shared, tmp_accu_shared) + + print*,'---diagonal_hessian' + +end subroutine diff --git a/src/mo_optimization/diagonalization_hessian.irp.f b/src/mo_optimization/diagonalization_hessian.irp.f new file mode 100644 index 00000000..e25879d9 --- /dev/null +++ b/src/mo_optimization/diagonalization_hessian.irp.f @@ -0,0 +1,136 @@ +! Diagonalization of the hessian + +! Just a matrix diagonalization using Lapack + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | H(n,n) | double precision | hessian | + +! Output: +! | e_val(n) | double precision | eigenvalues of the hessian | +! | w(n,n) | double precision | eigenvectors of the hessian | + +! Internal: +! | nb_negative_nv | integer | number of negative eigenvalues | +! | lwork | integer | for Lapack | +! | work(lwork,n) | double precision | temporary array for Lapack | +! | info | integer | if 0 -> ok, else problem in the diagonalization | +! | i,j | integer | dummy indexes | + + +subroutine diagonalization_hessian(n,H,e_val,w) + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: H(n,n) + + ! out + double precision, intent(out) :: e_val(n), w(n,n) + + ! internal + double precision, allocatable :: work(:,:) + integer, allocatable :: key(:) + integer :: info,lwork + integer :: i,j + integer :: nb_negative_vp + double precision :: t1,t2,t3,max_elem + + print*,'' + print*,'---Diagonalization_hessian---' + + call wall_time(t1) + + if (optimization_method == 'full') then + ! Allocation + ! For Lapack + lwork=3*n-1 + + allocate(work(lwork,n)) + + ! Calculation + + ! Copy the hessian matrix, the eigenvectors will be store in W + W=H + + ! Diagonalization of the hessian + call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info) + + if (info /= 0) then + print*, 'Error diagonalization : diagonalization_hessian' + print*, 'info = ', info + call ABORT + endif + + if (debug) then + print *, 'vp Hess:' + write(*,'(100(F10.5))') real(e_val(:)) + endif + + ! Number of negative eigenvalues + max_elem = 0d0 + nb_negative_vp = 0 + do i = 1, n + if (e_val(i) < 0d0) then + nb_negative_vp = nb_negative_vp + 1 + if (e_val(i) < max_elem) then + max_elem = e_val(i) + endif + !print*,'e_val < 0 :', e_val(i) + endif + enddo + print*,'Number of negative eigenvalues:', nb_negative_vp + print*,'Lowest eigenvalue:',max_elem + + !nb_negative_vp = 0 + !do i = 1, n + ! if (e_val(i) < -thresh_eig) then + ! nb_negative_vp = nb_negative_vp + 1 + ! endif + !enddo + !print*,'Number of negative eigenvalues <', -thresh_eig,':', nb_negative_vp + + ! Deallocation + deallocate(work) + + elseif (optimization_method == 'diag') then + ! Diagonalization of the diagonal hessian by hands + allocate(key(n)) + + do i = 1, n + e_val(i) = H(i,i) + enddo + + ! Key list for dsort + do i = 1, n + key(i) = i + enddo + + ! Sort of the eigenvalues + call dsort(e_val, key, n) + + ! Eigenvectors + W = 0d0 + do i = 1, n + j = key(i) + W(j,i) = 1d0 + enddo + + deallocate(key) + else + print*,'Diagonalization_hessian, abort' + call abort + endif + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in diagonalization_hessian:', t3 + + print*,'---End diagonalization_hessian---' + +end subroutine diff --git a/src/mo_optimization/first_diagonal_hessian_list_opt.irp.f b/src/mo_optimization/first_diagonal_hessian_list_opt.irp.f new file mode 100644 index 00000000..58536993 --- /dev/null +++ b/src/mo_optimization/first_diagonal_hessian_list_opt.irp.f @@ -0,0 +1,372 @@ +subroutine first_diag_hessian_list_opt(tmp_n,m,list,H)!, h_tmpr) + + include 'constants.h' + + implicit none + + !=========================================================================== + ! Compute the diagonal hessian of energy with respects to orbital rotations + !=========================================================================== + + !=========== + ! Variables + !=========== + + ! in + integer, intent(in) :: tmp_n, m, list(m) + ! tmp_n : integer, tmp_n = m*(m-1)/2 + + ! out + double precision, intent(out) :: H(tmp_n)!, h_tmpr(m,m,m,m) + ! H : n by n double precision matrix containing the 2D hessian + + ! internal + double precision, allocatable :: hessian(:,:,:,:), tmp(:,:),h_tmpr(:,:,:,:) + integer :: p,q, tmp_p,tmp_q + integer :: r,s,t,u,v,tmp_r,tmp_s,tmp_t,tmp_u,tmp_v + integer :: pq,rs,tmp_pq,tmp_rs + double precision :: t1,t2,t3 + ! hessian : mo_num 4D double precision matrix containing the hessian before the permutations + ! h_tmpr : mo_num 4D double precision matrix containing the hessian after the permutations + ! p,q,r,s : integer, indexes of the 4D hessian matrix + ! t,u,v : integer, indexes to compute hessian elements + ! pq,rs : integer, indexes for the conversion from 4D to 2D hessian matrix + ! t1,t2,t3 : double precision, t3 = t2 - t1, time to compute the hessian + + ! Function + double precision :: get_two_e_integral + ! get_two_e_integral : double precision function, two e integrals + + ! Provided : + ! mo_one_e_integrals : mono e- integrals + ! get_two_e_integral : two e- integrals + ! one_e_dm_mo_alpha, one_e_dm_mo_beta : one body density matrix + ! two_e_dm_mo : two body density matrix + + print*,'---first_diag_hess_list---' + + !============ + ! Allocation + !============ + + allocate(hessian(m,m,m,m),tmp(tmp_n,tmp_n),h_tmpr(mo_num,mo_num,mo_num,mo_num)) + + !============= + ! Calculation + !============= + + ! From Anderson et. al. (2014) + ! The Journal of Chemical Physics 141, 244104 (2014); doi: 10.1063/1.4904384 + + ! LaTeX formula : + + !\begin{align*} + !H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + !&= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + !+ \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_u^r)] + !-(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + !&+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} +v_{uv}^{st} \Gamma_{pt}^{uv}) + !+ \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + !&+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{ps}^{uv}) \\ + !&- \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) + !\end{align*} + + !================ + ! Initialization + !================ + hessian = 0d0 + + CALL wall_time(t1) + + !======================== + ! First line, first term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (q==r) then + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !========================= + ! First line, second term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (p==s) then + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !======================== + ! First line, third term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + endif + + enddo + enddo + enddo + enddo + + !========================= + ! Second line, first term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !========================== + ! Second line, second term + !========================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !======================== + ! Third line, first term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + endif + + enddo + enddo + enddo + enddo + + !========================= + ! Third line, second term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do t = 1, mo_num + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + endif + + enddo + enddo + enddo + enddo + + CALL wall_time(t2) + t2 = t2 - t1 + print*, 'Time to compute the hessian :', t2 + + !============== + ! Permutations + !============== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + do tmp_s = 1, m + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + + h_tmpr(tmp_p,tmp_q,tmp_r,tmp_s) = (hessian(tmp_p,tmp_q,tmp_r,tmp_s) - hessian(tmp_q,tmp_p,tmp_r,tmp_s) & + - hessian(tmp_p,tmp_q,tmp_s,tmp_r) + hessian(tmp_q,tmp_p,tmp_s,tmp_r)) + + enddo + enddo + enddo + enddo + + !======================== + ! 4D matrix to 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo + enddo + + !======================== + ! 4D matrix to 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p vector, transformation +! In addition there is a permutation in the gradient formula : +! \begin{equation} +! P_{pq} = 1 - (p <-> q) +! \end{equation} + +! We need a vector to use the gradient. Here the gradient is a +! antisymetric matrix so we can transform it in a vector of length +! mo_num*(mo_num-1)/2. + +! Here we do these two things at the same time. + + +do i=1,n + call vec_to_mat_index(i,p,q) + v_grad(i)=(grad(p,q) - grad(q,p)) +enddo + +! Debug, diplay the vector containing the gradient elements +if (debug) then + print*,'Vector containing the gradient :' + write(*,'(100(F10.5))') v_grad(1:n) +endif + +! Norm of the gradient +! The norm can be useful. + +norm = dnrm2(n,v_grad,1) +print*, 'Gradient norm : ', norm + +! Maximum element in the gradient +! The maximum element in the gradient is very important for the +! convergence criterion of the Newton method. + + +! Max element of the gradient +max_elem = 0d0 +do i = 1, n + if (DABS(v_grad(i)) > DABS(max_elem)) then + max_elem = v_grad(i) + endif +enddo + +print*,'Max element in the gradient :', max_elem + +! Debug, display the matrix containting the gradient elements +if (debug) then + ! Matrix gradient + A = 0d0 + do q=1,m + do p=1,m + A(p,q) = grad(p,q) - grad(q,p) + enddo + enddo + print*,'Matrix containing the gradient :' + do i = 1, m + write(*,'(100(F10.5))') A(i,1:m) + enddo +endif + +! Deallocation of shared arrays and end + +deallocate(grad,A, tmp_mo_one_e_integrals,tmp_one_e_dm_mo) + +print*,'---End gradient---' + +end subroutine diff --git a/src/mo_optimization/gradient_opt.irp.f b/src/mo_optimization/gradient_opt.irp.f new file mode 100644 index 00000000..25be6b5a --- /dev/null +++ b/src/mo_optimization/gradient_opt.irp.f @@ -0,0 +1,346 @@ +! Gradient + +! The gradient of the CI energy with respects to the orbital rotation +! is: +! (C-c C-x C-l) +! $$ +! G(p,q) = \mathcal{P}_{pq} \left[ \sum_r (h_p^r \gamma_r^q - h_r^q \gamma_p^r) + +! \sum_{rst}(v_{pt}^{rs} \Gamma_{rs}^{qt} - v_{rs}^{qt} \Gamma_{pt}^{rs}) +! \right] +! $$ + + +! $$ +! \mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +! $$ + +! $$ +! G(p,q) = \left[ +! \sum_r (h_p^r \gamma_r^q - h_r^q \gamma_p^r) + +! \sum_{rst}(v_{pt}^{rs} \Gamma_{rs}^{qt} - v_{rs}^{qt} \Gamma_{pt}^{rs}) +! \right] - +! \left[ +! \sum_r (h_q^r \gamma_r^p - h_r^p \gamma_q^r) + +! \sum_{rst}(v_{qt}^{rs} \Gamma_{rs}^{pt} - v_{rs}^{pt} +! \Gamma_{qt}^{rs}) +! \right] +! $$ + +! Where p,q,r,s,t are general spatial orbitals +! mo_num : the number of molecular orbitals +! $$h$$ : One electron integrals +! $$\gamma$$ : One body density matrix (state average in our case) +! $$v$$ : Two electron integrals +! $$\Gamma$$ : Two body density matrice (state average in our case) + +! The gradient is a mo_num by mo_num matrix, p,q,r,s,t take all the +! values between 1 and mo_num (1 and mo_num include). + +! To do that we compute $$G(p,q)$$ for all the pairs (p,q). + +! Source : +! Seniority-based coupled cluster theory +! J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +! Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo +! E. Scuseria + +! *Compute the gradient of energy with respects to orbital rotations* + +! Provided: +! | mo_num | integer | number of MOs | +! | mo_one_e_integrals(mo_num,mo_num) | double precision | mono_electronic integrals | +! | one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix | +! | two_e_dm_mo(mo_num,mo_num,mo_num,mo_num) | double precision | two e- density matrix | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | + +! Output: +! | v_grad(n) | double precision | the gradient | +! | max_elem | double precision | maximum element of the gradient | + +! Internal: +! | grad(mo_num,mo_num) | double precison | gradient before the tranformation in a vector | +! | A((mo_num,mo_num) | doubre precision | gradient after the permutations | +! | norm | double precision | norm of the gradient | +! | p, q | integer | indexes of the element in the matrix grad | +! | i | integer | index for the tranformation in a vector | +! | r, s, t | integer | indexes dor the sums | +! | t1, t2, t3 | double precision | t3 = t2 - t1, time to compute the gradient | +! | t4, t5, t6 | double precission | t6 = t5 - t4, time to compute each element | +! | tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bi-electronic integrals | +! | tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the two e- density matrix | +! | tmp_accu(mo_num,mo_num) | double precision | temporary array | + +! Function: +! | get_two_e_integral | double precision | bi-electronic integrals | +! | dnrm2 | double precision | (Lapack) norm | + + +subroutine gradient_opt(n,v_grad,max_elem) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: v_grad(n), max_elem + + ! internal + double precision, allocatable :: grad(:,:),A(:,:) + double precision :: norm + integer :: i,p,q,r,s,t + double precision :: t1,t2,t3,t4,t5,t6 + + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:) + + ! Functions + double precision :: get_two_e_integral, dnrm2 + + + print*,'' + print*,'---gradient---' + + ! Allocation of shared arrays + allocate(grad(mo_num,mo_num),A(mo_num,mo_num)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s,t, & + !$OMP tmp_accu, tmp_bi_int_3, tmp_2rdm_3) & + !$OMP SHARED(grad, one_e_dm_mo, mo_num,mo_one_e_integrals, & + !$OMP mo_integrals_map,t4,t5,t6) & + !$OMP DEFAULT(SHARED) + + ! Allocation of private arrays + allocate(tmp_accu(mo_num,mo_num)) + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num)) + +! Initialization + +!$OMP DO +do q = 1, mo_num + do p = 1,mo_num + grad(p,q) = 0d0 + enddo +enddo +!$OMP END DO + +! Term 1 + +! Without optimization the term 1 is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! grad(p,q) = grad(p,q) & +! + mo_one_e_integrals(p,r) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(r,q) * one_e_dm_mo(p,r) +! enddo +! enddo +! enddo + +! Since the matrix multiplication A.B is defined like : +! \begin{equation} +! c_{ij} = \sum_k a_{ik}.b_{kj} +! \end{equation} +! The previous equation can be rewritten as a matrix multplication + + +!**************** +! Opt first term +!**************** + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +call dgemm('N','N',mo_num,mo_num,mo_num,1d0,mo_one_e_integrals,& +mo_num,one_e_dm_mo,mo_num,0d0,tmp_accu,mo_num) + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + grad(p,q) = grad(p,q) + (tmp_accu(p,q) - tmp_accu(q,p)) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'Gradient, first term (s) :', t6 +!$OMP END MASTER + +! Term 2 + +! Without optimization the second term is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num +! do t= 1, mo_num + +! grad(p,q) = grad(p,q) & +! + get_two_e_integral(p,t,r,s,mo_integrals_map) * two_e_dm_mo(r,s,q,t) & +! - get_two_e_integral(r,s,q,t,mo_integrals_map) * two_e_dm_mo(p,t,r,s) +! enddo +! enddo +! enddo +! enddo +! enddo + +! Using the bielectronic integral properties : +! get_two_e_integral(p,t,r,s,mo_integrals_map) = get_two_e_integral(r,s,p,t,mo_integrals_map) + +! Using the two body matrix properties : +! two_e_dm_mo(p,t,r,s) = two_e_dm_mo(r,s,p,t) + +! t is one the right, we can put it on the external loop and create 3 +! indexes temporary array +! r,s can be seen as one index + +! By doing so, a matrix multiplication appears + + +!***************** +! Opt second term +!***************** + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do s = 1, mo_num + do r = 1, mo_num + + tmp_bi_int_3(r,s,p) = get_two_e_integral(r,s,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do q = 1, mo_num + do s = 1, mo_num + do r = 1, mo_num + + tmp_2rdm_3(r,s,q) = two_e_dm_mo(r,s,q,t) + + enddo + enddo + enddo + + call dgemm('T','N',mo_num,mo_num,mo_num*mo_num,1d0,tmp_bi_int_3,& + mo_num*mo_num,tmp_2rdm_3,mo_num*mo_num,0d0,tmp_accu,mo_num) + + !$OMP CRITICAL + do q = 1, mo_num + do p = 1, mo_num + + grad(p,q) = grad(p,q) + tmp_accu(p,q) - tmp_accu(q,p) + + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'Gradient second term (s) : ', t6 +!$OMP END MASTER + +! Deallocation of private arrays + +deallocate(tmp_bi_int_3,tmp_2rdm_3,tmp_accu) + +!$OMP END PARALLEL + +call omp_set_max_active_levels(4) + +! Permutation, 2D matrix -> vector, transformation +! In addition there is a permutation in the gradient formula : +! \begin{equation} +! P_{pq} = 1 - (p <-> q) +! \end{equation} + +! We need a vector to use the gradient. Here the gradient is a +! antisymetric matrix so we can transform it in a vector of length +! mo_num*(mo_num-1)/2. + +! Here we do these two things at the same time. + + +do i=1,n + call vec_to_mat_index(i,p,q) + v_grad(i)=(grad(p,q) - grad(q,p)) +enddo + +! Debug, diplay the vector containing the gradient elements +if (debug) then + print*,'Vector containing the gradient :' + write(*,'(100(F10.5))') v_grad(1:n) +endif + +! Norm of the gradient +! The norm can be useful. + +norm = dnrm2(n,v_grad,1) +print*, 'Gradient norm : ', norm + +! Maximum element in the gradient +! The maximum element in the gradient is very important for the +! convergence criterion of the Newton method. + + +! Max element of the gradient +max_elem = 0d0 +do i = 1, n + if (ABS(v_grad(i)) > ABS(max_elem)) then + max_elem = v_grad(i) + endif +enddo + +print*,'Max element in the gradient :', max_elem + +! Debug, display the matrix containting the gradient elements +if (debug) then + ! Matrix gradient + A = 0d0 + do q=1,mo_num + do p=1,mo_num + A(p,q) = grad(p,q) - grad(q,p) + enddo + enddo + print*,'Matrix containing the gradient :' + do i = 1, mo_num + write(*,'(100(F10.5))') A(i,1:mo_num) + enddo +endif + +! Deallocation of shared arrays and end + +deallocate(grad,A) + +print*,'---End gradient---' + +end subroutine diff --git a/src/mo_optimization/hessian_list_opt.irp.f b/src/mo_optimization/hessian_list_opt.irp.f new file mode 100644 index 00000000..31af769a --- /dev/null +++ b/src/mo_optimization/hessian_list_opt.irp.f @@ -0,0 +1,1129 @@ +! Hessian + +! The hessian of the CI energy with respects to the orbital rotation is : +! (C-c C-x C-l) + +! \begin{align*} +! H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ +! &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ +! &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ +! &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} +! With pq a permutation operator : + +! \begin{align*} +! \mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +! \end{align*} +! \begin{align*} +! \mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +! &= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +! \end{align*} + +! Where p,q,r,s,t,u,v are general spatial orbitals +! mo_num : the number of molecular orbitals +! $$h$$ : One electron integrals +! $$\gamma$$ : One body density matrix (state average in our case) +! $$v$$ : Two electron integrals +! $$\Gamma$$ : Two body density matrice (state average in our case) + +! The hessian is a 4D matrix of size mo_num, p,q,r,s,t,u,v take all the +! values between 1 and mo_num (1 and mo_num include). + +! To do that we compute all the pairs (pq,rs) + +! Source : +! Seniority-based coupled cluster theory +! J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +! Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +! *Compute the hessian of energy with respects to orbital rotations* + +! Provided: +! | mo_num | integer | number of MOs | +! | mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +! | one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +! | two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | + +! Output: +! | H(n,n) | double precision | Hessian matrix | +! | h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +! | | | in n by n matrix | + +! Internal: +! | hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +! | | | the permutations | +! | p, q, r, s | integer | indexes of the hessian elements | +! | t, u, v | integer | indexes for the sums | +! | pq, rs | integer | indexes for the transformation of the hessian | +! | | | (4D -> 2D) | +! | t1,t2,t3 | double precision | t3 = t2 - t1, time to compute the hessian | +! | t4,t5,t6 | double precision | t6 = t5 - t4, time to compute each element | +! | tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals | +! | tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix | +! | ind_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for matrix multiplication | +! | tmp_accu(mo_num,mo_num) | double precision | temporary array | +! | tmp_accu_sym(mo_num,mo_num) | double precision | temporary array | + +! Function: +! | get_two_e_integral | double precision | bielectronic integrals | + + +subroutine hessian_list_opt(n,m,list,H,h_tmpr) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n,m,list(m) + + ! out + double precision, intent(out) :: H(n,n),h_tmpr(m,m,m,m) + + ! internal + double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + double precision, allocatable :: H_test(:,:) + integer :: p,q,tmp_p,tmp_q,tmp_r,tmp_s + integer :: r,s,t,u,v,k + integer :: pq,rs + double precision :: t1,t2,t3,t4,t5,t6 + ! H_test : monum**2 by mo_num**2 double precision matrix to debug the H matrix + + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:), ind_3(:,:,:),ind_3_3(:,:,:) + double precision, allocatable :: tmp_bi_int_3_3(:,:,:), tmp_2rdm_3_3(:,:,:) + double precision, allocatable :: tmp_accu(:,:), tmp_accu_sym(:,:),tmp_one_e_dm_mo(:,:),tmp_mo_one_e_integrals(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'---hessian---' + print*,'Use the full hessian' + + ! Allocation of shared arrays + allocate(hessian(m,m,m,m),tmp_one_e_dm_mo(mo_num,m),tmp_mo_one_e_integrals(mo_num,m)) + + ! Calculations + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP tmp_p,tmp_q,tmp_r,tmp_s,p,q,r,s, tmp_accu, tmp_accu_sym, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, ind_3, tmp_bi_int_3_3,tmp_2rdm_3_3, ind_3_3 ) & + !$OMP SHARED(m,list,hessian,h_tmpr,H, mo_num,n, & + !$OMP mo_one_e_integrals, one_e_dm_mo, & + !$OMP two_e_dm_mo,mo_integrals_map, & + !$OMP t1,t2,t3,t4,t5,t6,& + !$OMP tmp_mo_one_e_integrals,tmp_one_e_dm_mo)& + !$OMP DEFAULT(NONE) + + ! Allocation of private arrays + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num), ind_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(m,m), tmp_accu_sym(mo_num,mo_num)) + +! Initialization of the arrays + +!$OMP MASTER +do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END MASTER + +!$OMP MASTER +do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_sym(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END MASTER + +!$OMP DO +do tmp_s = 1, m + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = 0d0 + enddo + enddo + enddo +enddo +!$OMP ENDDO + +!$OMP MASTER +CALL wall_TIME(t1) +!$OMP END MASTER + +! Line 1, term 1 + +! Without optimization the term 1 of the line 1 is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (q==r) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! We can write the formula as matrix multiplication. +! $$c_{p,s} = \sum_u a_{p,u} b_{u,s}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + tmp_mo_one_e_integrals(u,tmp_p) = mo_one_e_integrals(u,p) + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_s = 1, m + s = list(tmp_s) + do u = 1, mo_num + tmp_one_e_dm_mo(u,tmp_s) = one_e_dm_mo(u,s) + enddo +enddo +!$OMP END DO + + +call dgemm('T','N', m, m, mo_num, 1d0, tmp_mo_one_e_integrals,& + size(tmp_mo_one_e_integrals,1), tmp_one_e_dm_mo, size(tmp_one_e_dm_mo,1),& + 0d0, tmp_accu, size(tmp_accu,1)) + +!$OMP DO +do tmp_s = 1, m + do tmp_p = 1, m + + tmp_accu_sym(tmp_p,tmp_s) = 0.5d0 * (tmp_accu(tmp_p,tmp_s) + tmp_accu(tmp_s,tmp_p)) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_s = 1, m + do tmp_p = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_r,tmp_r,tmp_s) = hessian(tmp_p,tmp_r,tmp_r,tmp_s) + tmp_accu_sym(tmp_p,tmp_s) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 1',t6 +!$OMP END MASTER + +! Line 1, term 2 +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (p==s) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,r) * (one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * (one_e_dm_mo(r,u)) +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! We can write the formula as matrix multiplication. +! $$c_{r,q} = \sum_u a_{r,u} b_{u,q}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +call dgemm('T','N', m, m, mo_num, 1d0, tmp_mo_one_e_integrals,& + size(tmp_mo_one_e_integrals,1), tmp_one_e_dm_mo, size(tmp_one_e_dm_mo,1),& + 0d0, tmp_accu, size(tmp_accu,1)) + +!$OMP DO +do tmp_r = 1, m + do tmp_q = 1, m + + tmp_accu_sym(tmp_q,tmp_r) = 0.5d0 * (tmp_accu(tmp_q,tmp_r) + tmp_accu(tmp_r,tmp_q)) + + enddo +enddo +!OMP END DO + +!$OMP DO +do tmp_r = 1, m + do tmp_q = 1, m + do tmp_s = 1, m + + hessian(tmp_s,tmp_q,tmp_r,tmp_s) = hessian(tmp_s,tmp_q,tmp_r,tmp_s) + tmp_accu_sym(tmp_q,tmp_r) + + enddo + enddo +enddo +!OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 2',t6 +!$OMP END MASTER + +! Line 1, term 3 + +! Without optimization the third term is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s)) + +! enddo +! enddo +! enddo +! enddo + +! We can just re-order the indexes + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_s = 1, m + s = list(tmp_s) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q)& + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + enddo + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 3',t6 +!$OMP END MASTER + +! Line 2, term 1 + +! Without optimization the fourth term is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (q==r) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + +! enddo +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! Using bielectronic integral properties : +! get_two_e_integral(s,t,u,v,mo_integrals_map) = +! get_two_e_integral(u,v,s,t,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(p,t,u,v) = two_e_dm_mo(u,v,p,t) + +! With t on the external loop, using temporary arrays for each t and by +! taking u,v as one variable a matrix multplication appears. +! $$c_{p,s} = \sum_{uv} a_{p,uv} b_{uv,s}$$ + +! There is a kroenecker delta $$\delta_{qr}$$, so we juste compute the +! terms like : hessian(p,r,r,s) + + +!$OMP MASTER +call wall_TIME(t4) +!$OMP END MASTER + +allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + +!$OMP DO +do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m ! error, the p might be replace by a s + ! it's a temporary array, the result by replacing p and s will be the same + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1.d0, & + tmp_bi_int_3_3, mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do tmp_p = 1, m + do tmp_s = 1, m + + tmp_accu_sym(tmp_s,tmp_p) = 0.5d0 * (tmp_accu(tmp_p,tmp_s)+tmp_accu(tmp_s,tmp_p)) + + enddo + enddo + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + do tmp_p = 1, m + + hessian(tmp_p,tmp_r,tmp_r,tmp_s) = hessian(tmp_p,tmp_r,tmp_r,tmp_s) + tmp_accu_sym(tmp_p,tmp_s) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6=t5-t4 +print*,'l2 1', t6 +!$OMP END MASTER + +! Line 2, term 2 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (p==s) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + +! enddo +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! Using the two electron density matrix properties : +! get_two_e_integral(q,t,u,v,mo_integrals_map) = +! get_two_e_integral(u,v,q,t,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(r,t,u,v) = two_e_dm_mo(u,v,r,t) + +! With t on the external loop, using temporary arrays for each t and by +! taking u,v as one variable a matrix multplication appears. +! $$c_{q,r} = \sum_uv a_{q,uv} b_{uv,r}$$ + +! There is a kroenecker delta $$\delta_{ps}$$, so we juste compute the +! terms like : hessian(s,q,r,s) + + +!****************************** +! Opt Second line, second term +!****************************** + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + + + +!$OMP DO +do t = 1, mo_num + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,v,tmp_q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_r = 1, m + r = list(tmp_r) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,v,tmp_r) = two_e_dm_mo(u,v,r,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1.d0, & + tmp_bi_int_3_3 , mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do tmp_r = 1, m + do tmp_q = 1, m + + tmp_accu_sym(tmp_q,tmp_r) = 0.5d0 * (tmp_accu(tmp_q,tmp_r) + tmp_accu(tmp_r,tmp_q)) + + enddo + enddo + + !$OMP CRITICAL + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_s = 1, m + + hessian(tmp_s,tmp_q,tmp_r,tmp_s) = hessian(tmp_s,tmp_q,tmp_r,tmp_s) + tmp_accu_sym(tmp_q,tmp_r) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l2 2',t6 +!$OMP END MASTER + +! Line 3, term 1 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + +! enddo +! enddo + +! enddo +! enddo +! enddo +! enddo + +! Using the two electron density matrix properties : +! get_two_e_integral(u,v,p,r,mo_integrals_map) = +! get_two_e_integral(p,r,u,v,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(u,v,q,s) = two_e_dm_mo(q,s,u,v) + +! With v on the external loop, using temporary arrays for each v and by +! taking p,r and q,s as one dimension a matrix multplication +! appears. $$c_{pr,qs} = \sum_u a_{pr,u} b_{u,qs}$$ + +! Part 1 + +!$OMP MASTER +call wall_TIME(t4) +!$OMP END MASTER + +!-------- +! part 1 +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) +!-------- + +allocate(tmp_bi_int_3_3(m,m,mo_num), tmp_2rdm_3_3(mo_num,m,m),ind_3_3(m,m,m)) + +!$OMP DO +do v = 1, mo_num + + do u = 1, mo_num + do tmp_r = 1, m + r = list(tmp_r) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_bi_int_3_3(tmp_p,tmp_r,u) = get_two_e_integral(p,r,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + + tmp_2rdm_3_3(u,tmp_q,tmp_s) = two_e_dm_mo(q,s,u,v) + + enddo + enddo + enddo + + do tmp_s = 1, m + + call dgemm('N','N',m*m, m, mo_num, 1d0, tmp_bi_int_3_3,& + size(tmp_bi_int_3_3,1)*size(tmp_bi_int_3_3,2), tmp_2rdm_3_3(1,1,tmp_s),& + mo_num, 0d0, ind_3_3, size(ind_3_3,1) * size(ind_3_3,2)) + + !$OMP CRITICAL + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + ind_3_3(tmp_p,tmp_r,tmp_q) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3,ind_3_3) + + + +! With v on the external loop, using temporary arrays for each v and by +! taking q,s and p,r as one dimension a matrix multplication +! appears. $$c_{qs,pr} = \sum_u a_{qs,u}*b_{u,pr}$$ + +! Part 2 + +!-------- +! part 2 +! get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +!-------- + +allocate(tmp_bi_int_3_3(m,m,mo_num), tmp_2rdm_3_3(mo_num,m,m),ind_3_3(m,m,m)) + +!$OMP DO +do v = 1, mo_num + + do u = 1, mo_num + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + + tmp_bi_int_3_3(tmp_q,tmp_s,u) = get_two_e_integral(q,s,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_r = 1, m + r = list(tmp_r) + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_2rdm_3_3(u,tmp_p,tmp_r) = two_e_dm_mo(p,r,u,v) + + enddo + enddo + enddo + + do tmp_r = 1, m + + call dgemm('N','N', m*m, m, mo_num, 1d0, tmp_bi_int_3_3,& + size(tmp_bi_int_3_3,1)*size(tmp_bi_int_3_3,2), tmp_2rdm_3_3(1,1,tmp_r),& + mo_num, 0d0, ind_3_3, size(ind_3_3,1) * size(ind_3_3,2)) + + !$OMP CRITICAL + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + ind_3_3(tmp_q,tmp_s,tmp_p) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3,ind_3_3) + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5 - t4 +print*,'l3 1', t6 +!$OMP END MASTER + +! Line 3, term 2 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! do t = 1, mo_num +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + +! enddo +! enddo + +! enddo +! enddo +! enddo +! enddo + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 1 + +!-------- +! Part 1 +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) +!-------- + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_r) = two_e_dm_mo(q,u,r,t) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_s) = - get_two_e_integral(u,s,t,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_3,& + mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 2 + +!-------- +! Part 2 +!- get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) +!-------- + +allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_r) = two_e_dm_mo(q,u,t,r) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_s) = - get_two_e_integral(u,t,s,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_3,& + mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 3 + +!-------- +! Part 3 +!- get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) +!-------- + +allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_r) = - get_two_e_integral(u,q,t,r,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_s) = two_e_dm_mo(p,u,s,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_3,& + mo_num*mo_num, tmp_bi_int_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 4 + +!-------- +! Part 4 +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +!-------- + +allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_r) = - get_two_e_integral(u,t,r,q,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_s) = two_e_dm_mo(p,u,t,s) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_3,& + mo_num*mo_num, tmp_bi_int_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5-t4 +print*,'l3 2',t6 +!$OMP END MASTER + +!$OMP MASTER +CALL wall_TIME(t2) +t3 = t2 -t1 +print*,'Time to compute the hessian : ', t3 +!$OMP END MASTER + +! Deallocation of private arrays +! In the omp section ! + +deallocate(tmp_bi_int_3, tmp_2rdm_3, tmp_accu, tmp_accu_sym, ind_3) + +! Permutations +! As we mentioned before there are two permutation operator in the +! formula : +! Hessian(p,q,r,s) = P_pq P_rs [...] +! => Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do s = 1, m + do r = 1, m + do q = 1, m + do p = 1, m + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5-t4 +print*,'Time for permutations :',t6 +!$OMP END MASTER + +! 4D -> 2D matrix +! We need a 2D matrix for the Newton method's. Since the Hessian is +! "antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +! We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +! with p 2D :',t6 +!$OMP END MASTER + +!$OMP END PARALLEL +call omp_set_max_active_levels(4) + +! Display +if (debug) then + print*,'2D Hessian matrix' + do pq = 1, n + write(*,'(100(F10.5))') H(pq,:) + enddo +endif + +! Deallocation of shared arrays, end + +deallocate(hessian,tmp_one_e_dm_mo,tmp_mo_one_e_integrals)!,h_tmpr) +! h_tmpr is intent out in order to debug the subroutine +! It's why we don't deallocate it + + print*,'---End hessian---' + +end subroutine diff --git a/src/mo_optimization/hessian_opt.irp.f b/src/mo_optimization/hessian_opt.irp.f new file mode 100644 index 00000000..0b4312c6 --- /dev/null +++ b/src/mo_optimization/hessian_opt.irp.f @@ -0,0 +1,1043 @@ +! Hessian + +! The hessian of the CI energy with respects to the orbital rotation is : +! (C-c C-x C-l) + +! \begin{align*} +! H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ +! &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ +! &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ +! &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} +! With pq a permutation operator : + +! \begin{align*} +! \mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +! \end{align*} +! \begin{align*} +! \mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +! &= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +! \end{align*} + +! Where p,q,r,s,t,u,v are general spatial orbitals +! mo_num : the number of molecular orbitals +! $$h$$ : One electron integrals +! $$\gamma$$ : One body density matrix (state average in our case) +! $$v$$ : Two electron integrals +! $$\Gamma$$ : Two body density matrice (state average in our case) + +! The hessian is a 4D matrix of size mo_num, p,q,r,s,t,u,v take all the +! values between 1 and mo_num (1 and mo_num include). + +! To do that we compute all the pairs (pq,rs) + +! Source : +! Seniority-based coupled cluster theory +! J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +! Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +! *Compute the hessian of energy with respects to orbital rotations* + +! Provided: +! | mo_num | integer | number of MOs | +! | mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +! | one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +! | two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | + +! Output: +! | H(n,n) | double precision | Hessian matrix | +! | h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +! | | | in n by n matrix | + +! Internal: +! | hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +! | | | the permutations | +! | p, q, r, s | integer | indexes of the hessian elements | +! | t, u, v | integer | indexes for the sums | +! | pq, rs | integer | indexes for the transformation of the hessian | +! | | | (4D -> 2D) | +! | t1,t2,t3 | double precision | t3 = t2 - t1, time to compute the hessian | +! | t4,t5,t6 | double precision | t6 = t5 - t4, time to compute each element | +! | tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals | +! | tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix | +! | ind_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for matrix multiplication | +! | tmp_accu(mo_num,mo_num) | double precision | temporary array | +! | tmp_accu_sym(mo_num,mo_num) | double precision | temporary array | + +! Function: +! | get_two_e_integral | double precision | bielectronic integrals | + + +subroutine hessian_opt(n,H,h_tmpr) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: H(n,n),h_tmpr(mo_num,mo_num,mo_num,mo_num) + + ! internal + double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + double precision, allocatable :: H_test(:,:) + integer :: p,q + integer :: r,s,t,u,v,k + integer :: pq,rs + double precision :: t1,t2,t3,t4,t5,t6 + ! H_test : monum**2 by mo_num**2 double precision matrix to debug the H matrix + + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:), ind_3(:,:,:) + double precision, allocatable :: tmp_accu(:,:), tmp_accu_sym(:,:), tmp_accu_shared(:,:),tmp_accu_sym_shared(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'---hessian---' + print*,'Use the full hessian' + + ! Allocation of shared arrays + allocate(hessian(mo_num,mo_num,mo_num,mo_num))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_accu_shared(mo_num,mo_num),tmp_accu_sym_shared(mo_num,mo_num)) + + ! Calculations + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu, tmp_accu_sym, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, ind_3) & + !$OMP SHARED(hessian,h_tmpr,H, mo_num,n, & + !$OMP mo_one_e_integrals, one_e_dm_mo, & + !$OMP two_e_dm_mo,mo_integrals_map,tmp_accu_sym_shared, tmp_accu_shared, & + !$OMP t1,t2,t3,t4,t5,t6)& + !$OMP DEFAULT(NONE) + + ! Allocation of private arrays + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num), ind_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(mo_num,mo_num), tmp_accu_sym(mo_num,mo_num)) + +! Initialization of the arrays + +!$OMP MASTER +do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo +enddo +!$OMP END MASTER + +!$OMP MASTER +do q = 1, mo_num + do p = 1, mo_num + tmp_accu_sym(p,q) = 0d0 + enddo +enddo +!$OMP END MASTER + +!$OMP DO +do s=1,mo_num + do r=1,mo_num + do q=1,mo_num + do p=1,mo_num + hessian(p,q,r,s) = 0d0 + enddo + enddo + enddo +enddo +!$OMP ENDDO + +!$OMP MASTER +CALL wall_TIME(t1) +!$OMP END MASTER + +! Line 1, term 1 + +! Without optimization the term 1 of the line 1 is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (q==r) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! We can write the formula as matrix multiplication. +! $$c_{p,s} = \sum_u a_{p,u} b_{u,s}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +call dgemm('T','N', mo_num, mo_num, mo_num, 1d0, mo_one_e_integrals,& + size(mo_one_e_integrals,1), one_e_dm_mo, size(one_e_dm_mo,1),& + 0d0, tmp_accu_shared, size(tmp_accu_shared,1)) + +!$OMP DO +do s = 1, mo_num + do p = 1, mo_num + + tmp_accu_sym_shared(p,s) = 0.5d0 * (tmp_accu_shared(p,s) + tmp_accu_shared(s,p)) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do s = 1, mo_num + do p = 1, mo_num + do r = 1, mo_num + + hessian(p,r,r,s) = hessian(p,r,r,s) + tmp_accu_sym_shared(p,s) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 1',t6 +!$OMP END MASTER + +! Line 1, term 2 +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (p==s) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,r) * (one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * (one_e_dm_mo(r,u)) +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! We can write the formula as matrix multiplication. +! $$c_{r,q} = \sum_u a_{r,u} b_{u,q}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +call dgemm('T','N', mo_num, mo_num, mo_num, 1d0, mo_one_e_integrals,& + size(mo_one_e_integrals,1), one_e_dm_mo, size(one_e_dm_mo,1),& + 0d0, tmp_accu_shared, size(tmp_accu_shared,1)) + +!$OMP DO +do r = 1, mo_num + do q = 1, mo_num + + tmp_accu_sym_shared(q,r) = 0.5d0 * (tmp_accu_shared(q,r) + tmp_accu_shared(r,q)) + + enddo +enddo +!OMP END DO + +!$OMP DO +do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + + hessian(s,q,r,s) = hessian(s,q,r,s) + tmp_accu_sym_shared(q,r) + + enddo + enddo +enddo +!OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 2',t6 +!$OMP END MASTER + +! Line 1, term 3 + +! Without optimization the third term is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s)) + +! enddo +! enddo +! enddo +! enddo + +! We can just re-order the indexes + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q)& + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + enddo + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 3',t6 +!$OMP END MASTER + +! Line 2, term 1 + +! Without optimization the fourth term is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (q==r) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + +! enddo +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! Using bielectronic integral properties : +! get_two_e_integral(s,t,u,v,mo_integrals_map) = +! get_two_e_integral(u,v,s,t,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(p,t,u,v) = two_e_dm_mo(u,v,p,t) + +! With t on the external loop, using temporary arrays for each t and by +! taking u,v as one variable a matrix multplication appears. +! $$c_{p,s} = \sum_{uv} a_{p,uv} b_{uv,s}$$ + +! There is a kroenecker delta $$\delta_{qr}$$, so we juste compute the +! terms like : hessian(p,r,r,s) + + +!$OMP MASTER +call wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num ! error, the p might be replace by a s + ! it's a temporary array, the result by replacing p and s will be the same + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1.d0, & + tmp_bi_int_3, mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do p = 1, mo_num + do s = 1, mo_num + + tmp_accu_sym(s,p) = 0.5d0 * (tmp_accu(p,s)+tmp_accu(s,p)) + + enddo + enddo + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + do p = 1, mo_num + + hessian(p,r,r,s) = hessian(p,r,r,s) + tmp_accu_sym(p,s) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6=t5-t4 +print*,'l2 1', t6 +!$OMP END MASTER + +! Line 2, term 2 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (p==s) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + +! enddo +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! Using the two electron density matrix properties : +! get_two_e_integral(q,t,u,v,mo_integrals_map) = +! get_two_e_integral(u,v,q,t,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(r,t,u,v) = two_e_dm_mo(u,v,r,t) + +! With t on the external loop, using temporary arrays for each t and by +! taking u,v as one variable a matrix multplication appears. +! $$c_{q,r} = \sum_uv a_{q,uv} b_{uv,r}$$ + +! There is a kroenecker delta $$\delta_{ps}$$, so we juste compute the +! terms like : hessian(s,q,r,s) + + +!****************************** +! Opt Second line, second term +!****************************** + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do t = 1, mo_num + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do r = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,r) = two_e_dm_mo(u,v,r,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1.d0, & + tmp_bi_int_3 , mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do r = 1, mo_num + do q = 1, mo_num + + tmp_accu_sym(q,r) = 0.5d0 * (tmp_accu(q,r) + tmp_accu(r,q)) + + enddo + enddo + + !$OMP CRITICAL + do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + + hessian(s,q,r,s) = hessian(s,q,r,s) + tmp_accu_sym(q,r) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l2 2',t6 +!$OMP END MASTER + +! Line 3, term 1 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + +! enddo +! enddo + +! enddo +! enddo +! enddo +! enddo + +! Using the two electron density matrix properties : +! get_two_e_integral(u,v,p,r,mo_integrals_map) = +! get_two_e_integral(p,r,u,v,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(u,v,q,s) = two_e_dm_mo(q,s,u,v) + +! With v on the external loop, using temporary arrays for each v and by +! taking p,r and q,s as one dimension a matrix multplication +! appears. $$c_{pr,qs} = \sum_u a_{pr,u} b_{u,qs}$$ + +! Part 1 + +!$OMP MASTER +call wall_TIME(t4) +!$OMP END MASTER + +!-------- +! part 1 +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) +!-------- + +!$OMP DO +do v = 1, mo_num + + do u = 1, mo_num + do r = 1, mo_num + do p = 1, mo_num + + tmp_bi_int_3(p,r,u) = get_two_e_integral(p,r,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do s = 1, mo_num + do q = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,q,s) = two_e_dm_mo(q,s,u,v) + + enddo + enddo + enddo + + do s = 1, mo_num + + call dgemm('N','N',mo_num*mo_num, mo_num, mo_num, 1d0, tmp_bi_int_3,& + size(tmp_bi_int_3,1)*size(tmp_bi_int_3,2), tmp_2rdm_3(1,1,s),& + size(tmp_2rdm_3,1), 0d0, ind_3, size(ind_3,1) * size(ind_3,2)) + + !$OMP CRITICAL + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + hessian(p,q,r,s) = hessian(p,q,r,s) + ind_3(p,r,q) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + + + +! With v on the external loop, using temporary arrays for each v and by +! taking q,s and p,r as one dimension a matrix multplication +! appears. $$c_{qs,pr} = \sum_u a_{qs,u}*b_{u,pr}$$ + +! Part 2 + +!-------- +! part 2 +! get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +!-------- + +!$OMP DO +do v = 1, mo_num + + do u = 1, mo_num + do s = 1, mo_num + do q = 1, mo_num + + tmp_bi_int_3(q,s,u) = get_two_e_integral(q,s,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do r = 1, mo_num + do p = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,p,r) = two_e_dm_mo(p,r,u,v) + + enddo + enddo + enddo + + do r = 1, mo_num + call dgemm('N','N', mo_num*mo_num, mo_num, mo_num, 1d0, tmp_bi_int_3,& + size(tmp_bi_int_3,1)*size(tmp_bi_int_3,2), tmp_2rdm_3(1,1,r),& + size(tmp_2rdm_3,1), 0d0, ind_3, size(ind_3,1) * size(ind_3,2)) + + !$OMP CRITICAL + do s = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + hessian(p,q,r,s) = hessian(p,q,r,s) + ind_3(q,s,p) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5 - t4 +print*,'l3 1', t6 +!$OMP END MASTER + +! Line 3, term 2 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! do t = 1, mo_num +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + +! enddo +! enddo + +! enddo +! enddo +! enddo +! enddo + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 1 + +!-------- +! Part 1 +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) +!-------- + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,r) = two_e_dm_mo(q,u,r,t) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,s) = - get_two_e_integral(u,s,t,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3,& + mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 2 + +!-------- +! Part 2 +!- get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) +!-------- + +!$OMP DO +do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,r) = two_e_dm_mo(q,u,t,r) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,s) = - get_two_e_integral(u,t,s,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3,& + mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 3 + +!-------- +! Part 3 +!- get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) +!-------- + +!$OMP DO +do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,r) = - get_two_e_integral(u,q,t,r,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,s) = two_e_dm_mo(p,u,s,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3,& + mo_num*mo_num, tmp_bi_int_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 4 + +!-------- +! Part 4 +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +!-------- + +!$OMP DO +do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,r) = - get_two_e_integral(u,t,r,q,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,s) = two_e_dm_mo(p,u,t,s) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3,& + mo_num*mo_num, tmp_bi_int_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5-t4 +print*,'l3 2',t6 +!$OMP END MASTER + +!$OMP MASTER +CALL wall_TIME(t2) +t3 = t2 -t1 +print*,'Time to compute the hessian : ', t3 +!$OMP END MASTER + +! Deallocation of private arrays +! In the omp section ! + +deallocate(tmp_bi_int_3, tmp_2rdm_3, tmp_accu, tmp_accu_sym, ind_3) + +! Permutations +! As we mentioned before there are two permutation operator in the +! formula : +! Hessian(p,q,r,s) = P_pq P_rs [...] +! => Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5-t4 +print*,'Time for permutations :',t6 +!$OMP END MASTER + +! 4D -> 2D matrix +! We need a 2D matrix for the Newton method's. Since the Hessian is +! "antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +! We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +! with p 2D :',t6 +!$OMP END MASTER + +!$OMP END PARALLEL +call omp_set_max_active_levels(4) + +! Display +if (debug) then + print*,'2D Hessian matrix' + do pq = 1, n + write(*,'(100(F10.5))') H(pq,:) + enddo +endif + +! Deallocation of shared arrays, end + +deallocate(hessian)!,h_tmpr) +! h_tmpr is intent out in order to debug the subroutine +! It's why we don't deallocate it + + print*,'---End hessian---' + +end subroutine diff --git a/src/mo_optimization/my_providers.irp.f b/src/mo_optimization/my_providers.irp.f new file mode 100644 index 00000000..7469ffd5 --- /dev/null +++ b/src/mo_optimization/my_providers.irp.f @@ -0,0 +1,141 @@ +! Dimensions of MOs + + +BEGIN_PROVIDER [ integer, n_mo_dim ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of MOs we can build, + ! with i>j + END_DOC + + n_mo_dim = mo_num*(mo_num-1)/2 + +END_PROVIDER + +BEGIN_PROVIDER [ integer, n_mo_dim_core ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of core MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_core = dim_list_core_orb*(dim_list_core_orb-1)/2 + +END_PROVIDER + +BEGIN_PROVIDER [ integer, n_mo_dim_act ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of active MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_act = dim_list_act_orb*(dim_list_act_orb-1)/2 + +END_PROVIDER + +BEGIN_PROVIDER [ integer, n_mo_dim_inact ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of inactive MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_inact = dim_list_inact_orb*(dim_list_inact_orb-1)/2 + +END_PROVIDER + +BEGIN_PROVIDER [ integer, n_mo_dim_virt ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of virtual MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_virt = dim_list_virt_orb*(dim_list_virt_orb-1)/2 + +END_PROVIDER + +! Energies/criterions + +BEGIN_PROVIDER [ double precision, my_st_av_energy ] + implicit none + BEGIN_DOC + ! State average CI energy + END_DOC + + !call update_st_av_ci_energy(my_st_av_energy) + call state_average_energy(my_st_av_energy) + +END_PROVIDER + +! With all the MOs + +BEGIN_PROVIDER [ double precision, my_gradient_opt, (n_mo_dim) ] +&BEGIN_PROVIDER [ double precision, my_CC1_opt ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, for all the MOs. + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + PROVIDE mo_two_e_integrals_in_map + + call gradient_opt(n_mo_dim, my_gradient_opt, my_CC1_opt, norm_grad) + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, my_hessian_opt, (n_mo_dim, n_mo_dim) ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, for all the MOs. + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision, allocatable :: h_f(:,:,:,:) + + PROVIDE mo_two_e_integrals_in_map + + allocate(h_f(mo_num, mo_num, mo_num, mo_num)) + + call hessian_list_opt(n_mo_dim, my_hessian_opt, h_f) + +END_PROVIDER + +! With the list of active MOs +! Can be generalized to any mo_class by changing the list/dimension + +BEGIN_PROVIDER [ double precision, my_gradient_list_opt, (n_mo_dim_act) ] +&BEGIN_PROVIDER [ double precision, my_CC2_opt ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, only for the active MOs ! + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + PROVIDE mo_two_e_integrals_in_map !one_e_dm_mo two_e_dm_mo mo_one_e_integrals + + call gradient_list_opt(n_mo_dim_act, dim_list_act_orb, list_act, my_gradient_list_opt, my_CC2_opt, norm_grad) + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, my_hessian_list_opt, (n_mo_dim_act, n_mo_dim_act) ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, only for the active MOs ! + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision, allocatable :: h_f(:,:,:,:) + + PROVIDE mo_two_e_integrals_in_map + + allocate(h_f(dim_list_act_orb, dim_list_act_orb, dim_list_act_orb, dim_list_act_orb)) + + call hessian_list_opt(n_mo_dim_act, dim_list_act_orb, list_act, my_hessian_list_opt, h_f) + +END_PROVIDER diff --git a/src/mo_optimization/orb_opt.irp.f b/src/mo_optimization/orb_opt.irp.f new file mode 100644 index 00000000..71ff9262 --- /dev/null +++ b/src/mo_optimization/orb_opt.irp.f @@ -0,0 +1,22 @@ +! Orbital optimization program + +! This is an optimization program for molecular orbitals. It produces +! orbital rotations in order to lower the energy of a truncated wave +! function. +! This program just optimize the orbitals for a fixed number of +! determinants. This optimization process must be repeated for different +! number of determinants. + + + + +! Main program : orb_opt_trust + + +program orb_opt + read_wf = .true. ! must be True for the orbital optimization !!! + TOUCH read_wf + io_mo_two_e_integrals = 'None' + TOUCH io_mo_two_e_integrals + call run_orb_opt_trust_v2 +end diff --git a/src/mo_optimization/org/TANGLE_org_mode.sh b/src/mo_optimization/org/TANGLE_org_mode.sh new file mode 100755 index 00000000..059cbe7d --- /dev/null +++ b/src/mo_optimization/org/TANGLE_org_mode.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +list='ls *.org' +for element in $list +do + emacs --batch $element -f org-babel-tangle +done diff --git a/src/mo_optimization/org/TODO.org b/src/mo_optimization/org/TODO.org new file mode 100644 index 00000000..960b9ba6 --- /dev/null +++ b/src/mo_optimization/org/TODO.org @@ -0,0 +1,17 @@ +TODO: +** TODO Keep under surveillance the performance of rotation matrix +- is the fix ok ? +** DONE Provider state_average_weight +** DONE Diagonal hessian for orbital optimization with a list of MOs +** DONE Something to force the step cancellation if R.R^T > treshold +** TODO Iterative method to compute the rotation matrix +- doesn't work actually +** DONE Test trust region with polynomial functions +** DONE Optimization/Localization program using the template +** DONE Correction OMP hessian shared/private arrays +** DONE State average energy +** DONE Correction of Rho +** TODO Check the PROVIDE/FREE/TOUCH +** TODO research of lambda without the power 2 +** DONE Clean the OMP sections + diff --git a/src/mo_optimization/org/debug_gradient_list_opt.org b/src/mo_optimization/org/debug_gradient_list_opt.org new file mode 100644 index 00000000..3c6f98c0 --- /dev/null +++ b/src/mo_optimization/org/debug_gradient_list_opt.org @@ -0,0 +1,79 @@ +* Debug the gradient + +*Program to check the gradient* + +The program compares the result of the first and last code for the +gradient. + +Provided: +| mo_num | integer | number of MOs | + +Internal: +| n | integer | number of orbitals pairs (p,q) p threshold) then + print*,i,v_grad(i) + nb_error = nb_error + 1 + + if (ABS(v_grad(i)) > max_error) then + max_error = v_grad(i) + endif + + endif + enddo + + print*,'' + print*,'Check the gradient' + print*,'Threshold:', threshold + print*,'Nb error:', nb_error + print*,'Max error:', max_error + + ! Deallocation + deallocate(v_grad,v_grad2) + +end program +#+END_SRC diff --git a/src/mo_optimization/org/debug_gradient_opt.org b/src/mo_optimization/org/debug_gradient_opt.org new file mode 100644 index 00000000..101e1e8c --- /dev/null +++ b/src/mo_optimization/org/debug_gradient_opt.org @@ -0,0 +1,77 @@ +* Debug the gradient + +*Program to check the gradient* + +The program compares the result of the first and last code for the +gradient. + +Provided: +| mo_num | integer | number of MOs | + +Internal: +| n | integer | number of orbitals pairs (p,q) p threshold) then + print*,v_grad(i) + nb_error = nb_error + 1 + + if (ABS(v_grad(i)) > max_error) then + max_error = v_grad(i) + endif + + endif + enddo + + print*,'' + print*,'Check the gradient' + print*,'Threshold :', threshold + print*,'Nb error :', nb_error + print*,'Max error :', max_error + + ! Deallocation + deallocate(v_grad,v_grad2) + +end program +#+END_SRC diff --git a/src/mo_optimization/org/debug_hessian_list_opt.org b/src/mo_optimization/org/debug_hessian_list_opt.org new file mode 100644 index 00000000..76e8b337 --- /dev/null +++ b/src/mo_optimization/org/debug_hessian_list_opt.org @@ -0,0 +1,148 @@ +* Debug the hessian + +*Program to check the hessian matrix* + +The program compares the result of the first and last code for the +hessian. First of all the 4D hessian and after the 2D hessian. + +Provided: +| mo_num | integer | number of MOs | +| optimization_method | string | Method for the orbital optimization: | +| | | - 'full' -> full hessian | +| | | - 'diag' -> diagonal hessian | +| dim_list_act_orb | integer | number of active MOs | +| list_act(dim_list_act_orb) | integer | list of the actives MOs | +| | | | + +Internal: +| m | integer | number of MOs in the list | +| | | (active MOs) | +| n | integer | number of orbitals pairs (p,q) p threshold) then + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + endif + enddo + enddo + enddo + enddo + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + ! Deallocation + deallocate(H, H2, h_f, h_f2) + + else + + print*, 'Use the diagonal hessian matrix' + allocate(H(n,1),H2(n,1)) + call diag_hessian_list_opt(n,m,list_act,H) + call first_diag_hessian_list_opt(n,m,list_act,H2) + + H = H - H2 + + max_error_H = 0d0 + nb_error_H = 0 + + do i = 1, n + if (ABS(H(i,1)) > threshold) then + print*, H(i,1) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,1)) > ABS(max_error_H)) then + max_error_H = H(i,1) + endif + + endif + enddo + + endif + + print*,'' + if (optimization_method == 'full') then + print*,'Check of the full hessian' + print*,'Threshold:', threshold + print*,'Nb error:', nb_error + print*,'Max error:', max_error + print*,'' + else + print*,'Check of the diagonal hessian' + endif + + print*,'Nb error_H:', nb_error_H + print*,'Max error_H:', max_error_H + +end program +#+END_SRC diff --git a/src/mo_optimization/org/debug_hessian_opt.org b/src/mo_optimization/org/debug_hessian_opt.org new file mode 100644 index 00000000..40f84c82 --- /dev/null +++ b/src/mo_optimization/org/debug_hessian_opt.org @@ -0,0 +1,172 @@ +* Debug the hessian + +*Program to check the hessian matrix* + +The program compares the result of the first and last code for the +hessian. First of all the 4D hessian and after the 2D hessian. + +Provided: +| mo_num | integer | number of MOs | + +Internal: +| n | integer | number of orbitals pairs (p,q) p threshold) then + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + endif + enddo + enddo + enddo + enddo + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + elseif (optimization_method == 'diag') then + + print*, 'Use the diagonal hessian matrix' + call diag_hessian_opt(n,H,h_f) + call first_diag_hessian_opt(n,H2,h_f2) + + h_f = h_f - h_f2 + max_error = 0d0 + nb_error = 0 + threshold = 1d-12 + + do l = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do i = 1, mo_num + + if (ABS(h_f(i,j,k,l)) > threshold) then + + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + + endif + + enddo + enddo + enddo + enddo + + h=H-H2 + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + else + print*,'Unknown optimization_method, please select full, diag' + call abort + endif + + print*,'' + if (optimization_method == 'full') then + print*,'Check the full hessian' + else + print*,'Check the diagonal hessian' + endif + + print*,'Threshold :', threshold + print*,'Nb error :', nb_error + print*,'Max error :', max_error + print*,'' + print*,'Nb error_H :', nb_error_H + print*,'Max error_H :', max_error_H + + ! Deallocation + deallocate(H,H2,h_f,h_f2) + +end program +#+END_SRC diff --git a/src/mo_optimization/org/diagonal_hessian_list_opt.org b/src/mo_optimization/org/diagonal_hessian_list_opt.org new file mode 100644 index 00000000..a12ca981 --- /dev/null +++ b/src/mo_optimization/org/diagonal_hessian_list_opt.org @@ -0,0 +1,1561 @@ +* Diagonal hessian + +The hessian of the CI energy with respects to the orbital rotation is : +(C-c C-x C-l) + +\begin{align*} +H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] + -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) + + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) + - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} +With pq a permutation operator : + +\begin{align*} +\mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +\end{align*} +\begin{align*} +\mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +&= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +\end{align*} + +Where p,q,r,s,t,u,v are general spatial orbitals +mo_num : the number of molecular orbitals +$$h$$ : One electron integrals +$$\gamma$$ : One body density matrix (state average in our case) +$$v$$ : Two electron integrals +$$\Gamma$$ : Two body density matrice (state average in our case) + +Source : +Seniority-based coupled cluster theory +J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +Here for the diagonal of the hessian it's a little more complicated +than for the hessian. It's not just compute the diagonal terms of the +hessian because of the permutations. + +The hessian is (p,q,r,s), so the diagonal terms are (p,q,p,q). But +with the permutations : p <-> q, r <-> s, p <-> q and r <-> s, we have +a diagonal term, if : +p = r and q = s, => (p,q,p,q) +or +q = r and p = s, => (p,q,q,p) + +For that reason, we will use 2D temporary arrays to store the +elements. One for the terms (p,q,p,q) and an other for the terms of +kind (p,q,q,p). We will also use a 1D temporary array to store the +terms of the kind (p,p,p,p) due to the kronoecker delta. + +*Compute the diagonal hessian of energy with respects to orbital +rotations* +By diagonal hessian we mean, diagonal elements of the hessian + +Provided: +| mo_num | integer | number of MOs | +| mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +| one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +| two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | + +Output: +| H(n,n) | double precision | Hessian matrix | +| h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +| | | in n by n matrix | + +Internal: +| hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +| | | the permutations | +| p, q, r, s | integer | indexes of the hessian elements | +| t, u, v | integer | indexes for the sums | +| pq, rs | integer | indexes for the transformation of the hessian | +| | | (4D -> 2D) | +| t1,t2,t3 | double precision | time to compute the hessian | +| t4,t5,t6 | double precision | time to compute the differ each element | +| tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (private) | +| tmp_bi_int_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (shared) | +| tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (private) | +| tmp_2rdm_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (shared) | +| tmp_accu(mo_num,mo_num) | double precision | temporary array (private) | +| tmp_accu_shared(mo_num,mo_num) | double precision | temporary array (shared) | +| tmp_accu_1(mo_num) | double precision | temporary array (private) | +| tmp_accu_1_shared(mo_num) | double precision | temporary array (shared) | +| tmp_h_pppp(mo_num) | double precision | matrix containing the hessien elements hessian(p,p,p,p) | +| tmp_h_pqpq(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,p,q) | +| tmp_h_pqqp(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,q,p) | + +Function: +| get_two_e_integral | double precision | bi-electronic integrals | + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f +subroutine diag_hessian_list_opt(n, m, list, H)!, h_tmpr) + + use omp_lib + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n, m, list(m) + + ! out + double precision, intent(out) :: H(n)!, h_tmpr(m,m,m,m) + + ! internal + !double precision, allocatable :: !hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + integer :: p,q,k + integer :: r,s,t,u,v + integer :: pq,rs + integer :: tmp_p,tmp_q,tmp_r,tmp_s,tmp_pq,tmp_rs + double precision :: t1,t2,t3,t4,t5,t6 + double precision, allocatable :: tmp_bi_int_3(:,:,:),tmp_bi_int_3_shared(:,:,:) + double precision, allocatable :: tmp_2rdm_3(:,:,:),tmp_2rdm_3_shared(:,:,:) + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_accu_shared(:,:), tmp_accu_1_shared(:) + double precision, allocatable :: tmp_h_pppp(:), tmp_h_pqpq(:,:), tmp_h_pqqp(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'--- Diagonal_hessian_list_opt---' + + ! Allocation of shared arrays + !allocate(hessian(m,m,m,m))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_h_pppp(m),tmp_h_pqpq(m,m),tmp_h_pqqp(m,m)) + allocate(tmp_2rdm_3_shared(mo_num,mo_num,m)) + allocate(tmp_bi_int_3_shared(mo_num,mo_num,m)) + allocate(tmp_accu_1_shared(m),tmp_accu_shared(m,m)) + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu,k, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, & + !$OMP tmp_p,tmp_q,tmp_r,tmp_s) & + !$OMP SHARED(H, tmp_h_pppp, tmp_h_pqpq, tmp_h_pqqp, & + !$OMP mo_num,n,m, mo_one_e_integrals, one_e_dm_mo, list, & + !$OMP tmp_bi_int_3_shared, tmp_2rdm_3_shared,tmp_accu_shared, & + !$OMP tmp_accu_1_shared,two_e_dm_mo,mo_integrals_map,t1,t2,t3,t4,t5,t6) & + !$OMP DEFAULT(NONE) + + ! Allocation of the private arrays + allocate(tmp_accu(m,m)) +#+END_SRC + +** Initialization of the arrays +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !!$OMP DO + !do tmp_s = 1,m + ! do tmp_r = 1, m + ! do tmp_q = 1, m + ! do tmp_p = 1, m + ! hessian(tmp_p,tmp_q,tmp_r,tmp_s) = 0d0 + ! enddo + ! enddo + ! enddo + !enddo + !!$OMP END DO + + !$OMP DO + do tmp_p = 1, m + tmp_h_pppp(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + tmp_h_pqpq(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + tmp_h_pqqp(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t1) + !$OMP END MASTER +#+END_SRC + +** Line 1, term 1 + +\begin{align*} +\frac{1}{2} \sum_u \delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (q==r) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and q=r* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & ++ mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and q=r* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & ++ mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 2 + +\begin{align*} +\frac{1}{2} \sum_u \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (p==s) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + enddo + endif + endif + enddo + enddo + enddo +enddo + +*Part 1 : p=r and q=s and p=s* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * (& + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * (& + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(q,u)) + = + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_q) = tmp_accu_1_shared(tmp_q) + & + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_q) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 3 + +\begin{align*} +-(h_p^s \gamma_r^q + h_r^q \gamma_p^s) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_rdm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_rdm_mo(p,s) + + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s* + +hessian(p,q,r,s) -> hessian(p,q,p,q) + + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) += + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) += + - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) & + - 2d0 * mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s* + +hessian(p,q,r,s) -> hessian(p,q,p,q) + + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) += + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) += + - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) & + - 2d0 * mo_one_e_integrals(p,p) * one_e_dm_mo(q,q) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 3',t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 1 + +\begin{align*} +\frac{1}{2} \sum_{tuv} \delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and q=r* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & ++ get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & ++ get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + allocate(tmp_bi_int_3(mo_num, mo_num, m),tmp_2rdm_3(mo_num, mo_num, m)) + + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) & + + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and q=r* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & ++ get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & ++ get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'l2 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 2 + +\begin{align*} +\frac{1}{2} \sum_{tuv} \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and p=s* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & ++ get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + 0.5d0 * ( & + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v) & ++ get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) +& + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & ++ get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(q,t,u,v) & ++ get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_q) = two_e_dm_mo(u,v,q,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_q = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_q) = tmp_accu_1_shared(tmp_q) +& + tmp_bi_int_3(u,v,tmp_q) * tmp_2rdm_3(u,v,tmp_q) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'l2 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 1 + +\begin{align*} +\sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)))) then + + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +With optimization + +*Part 1 : p=r and q=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & ++ get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + = + get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) & ++ get_two_e_integral(q,q,u,v,mo_integrals_map) * two_e_dm_mo(p,p,u,v) + = + 2d0 * get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) + +Arrays of the kind (u,v,p,p) can be transform in 4D arrays (u,v,p). +Using u,v as one variable a matrix multiplication appears. +$$c_{p,q} = \sum_{uv} a_{p,uv} b_{uv,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,v,tmp_q) = two_e_dm_mo(u,v,q,q) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_shared(u,v,tmp_p) = get_two_e_integral(u,v,p,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu(tmp_p,tmp_q) + tmp_accu(tmp_q,tmp_p) + + enddo + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & ++ get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + = + get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) & ++ get_two_e_integral(q,p,u,v,mo_integrals_map) * two_e_dm_mo(p,q,u,v) + = + 2d0 * get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + call wall_time(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = 2d0 * get_two_e_integral(u,v,q,p,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,q) + + enddo + enddo + enddo + + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) & + + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3,tmp_2rdm_3) + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l3 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 2 + +\begin{align*} +- \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do t = 1, mo_num + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + endif + + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + = + - get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & + - get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) & + - get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(p,u,q,t) & + - get_two_e_integral(q,u,t,p,mo_integrals_map) * two_e_dm_mo(p,u,t,q) + = + - 2d0 * get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & + - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + = + - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) & + - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !---------- + ! Part 1.1 + !---------- + ! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) + + allocate(tmp_bi_int_3(m, mo_num, m), tmp_2rdm_3(m, mo_num, m)) + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_shared(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do tmp_q = 1, m + q = list(tmp_q) + + tmp_bi_int_3(tmp_q,u,tmp_p) = 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do tmp_q = 1, m + q = list(tmp_q) + + tmp_2rdm_3(tmp_q,u,tmp_p) = two_e_dm_mo(q,u,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do u = 1, mo_num + do tmp_q = 1, m + + tmp_accu_shared(tmp_p,tmp_q) = tmp_accu_shared(tmp_p,tmp_q) & + - tmp_bi_int_3(tmp_q,u,tmp_p) * tmp_2rdm_3(tmp_q,u,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu_shared(tmp_p,tmp_q) + + enddo + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3, tmp_2rdm_3) +#+END_SRC + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !-------- + ! Part 1.2 + !-------- + ! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + + allocate(tmp_bi_int_3(mo_num, m, m),tmp_2rdm_3(mo_num, m, m)) + + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_shared(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do u = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + + tmp_bi_int_3(t,tmp_q,tmp_p) = 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p= 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + + tmp_2rdm_3(t,tmp_q,tmp_p) = two_e_dm_mo(t,p,q,u) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_q = 1, m + do tmp_p = 1, m + do t = 1, mo_num + + tmp_accu_shared(tmp_p,tmp_q) = tmp_accu_shared(tmp_p,tmp_q) & + - tmp_bi_int_3(t,tmp_q,tmp_p) * tmp_2rdm_3(t,tmp_q,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu_shared(tmp_p,tmp_q) + + enddo + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3,tmp_2rdm_3) +#+END_SRC + +*Part 2 : q=r and p=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + = + - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & + - get_two_e_integral(t,p,p,u,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & + - get_two_e_integral(q,u,q,t,mo_integrals_map) * two_e_dm_mo(p,u,p,t) & + - get_two_e_integral(q,u,t,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + = + - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & + - get_two_e_integral(q,t,q,u,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + + - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & + - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(t,p,p,u) + = + - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & + - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + + - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & + - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +Arrays of the kind (t,p,u,p) can be transformed in 3D arrays. By doing +so and using t,u as one variable, a matrix multiplication appears : +$$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !---------- + ! Part 2.1 + !---------- + ! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & + ! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3_shared(t,u,tmp_q) = two_e_dm_mo(t,q,u,q) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,tmp_p) = get_two_e_integral(t,p,u,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP DO + do tmp_p = 1, m + do tmp_q = 1, m + + tmp_h_pqqp(tmp_q,tmp_p) = tmp_h_pqqp(tmp_q,tmp_p) - tmp_accu(tmp_q,tmp_p) - tmp_accu(tmp_p,tmp_q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +Arrays of the kind (t,u,p,p) can be transformed in 3D arrays. By doing +so and using t,u as one variable, a matrix multiplication appears : +$$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !-------- + ! Part 2.2 + !-------- + ! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & + ! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,tmp_p) = get_two_e_integral(t,u,p,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,t,tmp_q) = two_e_dm_mo(q,u,t,q) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_shared,& + mo_num*mo_num, tmp_bi_int_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) - tmp_accu(tmp_p,tmp_q) - tmp_accu(tmp_q,tmp_p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l3 2',t6 + !$OMP END MASTER + + !$OMP MASTER + CALL wall_TIME(t2) + t2 = t2 - t1 + print*, 'Time to compute the hessian :', t2 + !$OMP END MASTER +#+END_SRC + +** Deallocation of private arrays +In the OMP section ! +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + deallocate(tmp_accu) +#+END_SRC + +** Permutations +As we mentioned before there are two permutation operator in the +formula : +Hessian(p,q,r,s) = P_pq P_rs [...] +=> Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !!$OMP DO + !do tmp_p = 1, m + ! hessian(tmp_p,tmp_p,tmp_p,tmp_p) = hessian(tmp_p,tmp_p,tmp_p,tmp_p) + tmp_h_pppp(tmp_p) + !enddo + !!$OMP END DO + + !!$OMP DO + !do tmp_q = 1, m + ! do tmp_p = 1, m + ! hessian(tmp_p,tmp_q,tmp_p,tmp_q) = hessian(tmp_p,tmp_q,tmp_p,tmp_q) + tmp_h_pqpq(tmp_p,tmp_q) + ! enddo + !enddo + !!$OMP END DO + ! + !!$OMP DO + !do tmp_q = 1, m + ! do tmp_p = 1, m + ! hessian(tmp_p,tmp_q,tmp_q,tmp_p) = hessian(tmp_p,tmp_q,tmp_q,tmp_p) + tmp_h_pqqp(tmp_p,tmp_q) + ! enddo + !enddo + !!$OMP END DO + + !!$OMP DO + !do tmp_s = 1, m + ! do tmp_r = 1, m + ! do tmp_q = 1, m + ! do tmp_p = 1, m + + ! h_tmpr(tmp_p,tmp_q,tmp_r,tmp_s) = (hessian(tmp_p,tmp_q,tmp_r,tmp_s) - hessian(tmp_q,tmp_p,tmp_r,tmp_s) & + ! - hessian(tmp_p,tmp_q,tmp_s,tmp_r) + hessian(tmp_q,tmp_p,tmp_s,tmp_r)) + + ! enddo + ! enddo + ! enddo + !enddo + !!$OMP END DO +#+END_SRC + +** 4D -> 2D matrix +We need a 2D matrix for the Newton method's. Since the Hessian is +"antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +with p 2D :',t6 + !!$OMP END MASTER + + !$OMP END PARALLEL + call omp_set_max_active_levels(4) + + ! Display + !if (debug) then + ! print*,'2D diag Hessian matrix' + ! do tmp_pq = 1, n + ! write(*,'(100(F10.5))') H(tmp_pq,:) + ! enddo + !endif +#+END_SRC + +** Deallocation of shared arrays, end + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !deallocate(hessian)!,h_tmpr) + deallocate(tmp_h_pppp,tmp_h_pqpq,tmp_h_pqqp) + deallocate(tmp_accu_1_shared, tmp_accu_shared) + + print*,'---End diagonal_hessian_list_opt---' + +end subroutine +#+END_SRC + diff --git a/src/mo_optimization/org/diagonal_hessian_opt.org b/src/mo_optimization/org/diagonal_hessian_opt.org new file mode 100644 index 00000000..efd75065 --- /dev/null +++ b/src/mo_optimization/org/diagonal_hessian_opt.org @@ -0,0 +1,1516 @@ +* Diagonal hessian + +The hessian of the CI energy with respects to the orbital rotation is : +(C-c C-x C-l) + +\begin{align*} +H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] + -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) + + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) + - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} +With pq a permutation operator : + +\begin{align*} +\mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +\end{align*} +\begin{align*} +\mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +&= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +\end{align*} + +Where p,q,r,s,t,u,v are general spatial orbitals +mo_num : the number of molecular orbitals +$$h$$ : One electron integrals +$$\gamma$$ : One body density matrix (state average in our case) +$$v$$ : Two electron integrals +$$\Gamma$$ : Two body density matrice (state average in our case) + +Source : +Seniority-based coupled cluster theory +J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +Here for the diagonal of the hessian it's a little more complicated +than for the hessian. It's not just compute the diagonal terms of the +hessian because of the permutations. + +The hessian is (p,q,r,s), so the diagonal terms are (p,q,p,q). But +with the permutations : p <-> q, r <-> s, p <-> q and r <-> s, we have +a diagonal term, if : +p = r and q = s, => (p,q,p,q) +or +q = r and p = s, => (p,q,q,p) + +For that reason, we will use 2D temporary arrays to store the +elements. One for the terms (p,q,p,q) and an other for the terms of +kind (p,q,q,p). We will also use a 1D temporary array to store the +terms of the kind (p,p,p,p) due to the kronoecker delta. + +*Compute the diagonal hessian of energy with respects to orbital +rotations* +By diagonal hessian we mean, diagonal elements of the hessian + +Provided: +| mo_num | integer | number of MOs | +| mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +| one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +| two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | + +Output: +| H(n,n) | double precision | Hessian matrix | +| h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +| | | in n by n matrix | + +Internal: +| hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +| | | the permutations | +| p, q, r, s | integer | indexes of the hessian elements | +| t, u, v | integer | indexes for the sums | +| pq, rs | integer | indexes for the transformation of the hessian | +| | | (4D -> 2D) | +| t1,t2,t3 | double precision | time to compute the hessian | +| t4,t5,t6 | double precision | time to compute the differ each element | +| tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (private) | +| tmp_bi_int_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (shared) | +| tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (private) | +| tmp_2rdm_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (shared) | +| tmp_accu(mo_num,mo_num) | double precision | temporary array (private) | +| tmp_accu_shared(mo_num,mo_num) | double precision | temporary array (shared) | +| tmp_accu_1(mo_num) | double precision | temporary array (private) | +| tmp_accu_1_shared(mo_num) | double precision | temporary array (shared) | +| tmp_h_pppp(mo_num) | double precision | matrix containing the hessien elements hessian(p,p,p,p) | +| tmp_h_pqpq(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,p,q) | +| tmp_h_pqqp(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,q,p) | + +Function: +| get_two_e_integral | double precision | bi-electronic integrals | + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f +subroutine diag_hessian_opt(n,H)!, h_tmpr) + + use omp_lib + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: H(n)!,n), h_tmpr(mo_num,mo_num,mo_num,mo_num) + + ! internal + !double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + integer :: p,q,k + integer :: r,s,t,u,v + integer :: pq,rs + integer :: istate + double precision :: t1,t2,t3,t4,t5,t6 + double precision, allocatable :: tmp_bi_int_3(:,:,:),tmp_bi_int_3_shared(:,:,:) + double precision, allocatable :: tmp_2rdm_3(:,:,:),tmp_2rdm_3_shared(:,:,:) + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_accu_shared(:,:), tmp_accu_1_shared(:) + double precision, allocatable :: tmp_h_pppp(:), tmp_h_pqpq(:,:), tmp_h_pqqp(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'--- diagonal hessian---' + print*,'Use the diagonal hessian' + + ! Allocation of shared arrays + !allocate(hessian(mo_num,mo_num,mo_num,mo_num))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_h_pppp(mo_num),tmp_h_pqpq(mo_num,mo_num),tmp_h_pqqp(mo_num,mo_num)) + allocate(tmp_2rdm_3_shared(mo_num,mo_num,mo_num)) + allocate(tmp_bi_int_3_shared(mo_num,mo_num,mo_num)) + allocate(tmp_accu_1_shared(mo_num),tmp_accu_shared(mo_num,mo_num)) + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu,k, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3) & + !$OMP SHARED(H, tmp_h_pppp, tmp_h_pqpq, tmp_h_pqqp, & + !$OMP mo_num,n, mo_one_e_integrals, one_e_dm_mo, & + !$OMP tmp_bi_int_3_shared, tmp_2rdm_3_shared,tmp_accu_shared, & + !$OMP tmp_accu_1_shared,two_e_dm_mo,mo_integrals_map,t1,t2,t3,t4,t5,t6) & + !$OMP DEFAULT(NONE) + + ! Allocation of the private arrays + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num),tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(mo_num,mo_num)) +#+END_SRC + +** Initialization of the arrays +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !!$OMP DO + !do s = 1,mo_num + ! do r = 1, mo_num + ! do q = 1, mo_num + ! do p = 1, mo_num + ! hessian(p,q,r,s) = 0d0 + ! enddo + ! enddo + ! enddo + !enddo + !!$OMP END DO + + !$OMP DO + do p = 1, mo_num + tmp_h_pppp(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + tmp_h_pqpq(p,q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + tmp_h_pqqp(p,q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t1) + !$OMP END MASTER +#+END_SRC + +** Line 1, term 1 + +\begin{align*} +\frac{1}{2} \sum_u \delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (q==r) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and q=r* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & ++ mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and q=r* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & ++ mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 2 + +\begin{align*} +\frac{1}{2} \sum_u \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (p==s) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + enddo + endif + endif + enddo + enddo + enddo +enddo + +*Part 1 : p=r and q=s and p=s* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * (& + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * (& + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(q,u)) + = + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(q) = tmp_accu_1_shared(q) + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(q) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 3 + +\begin{align*} +-(h_p^s \gamma_r^q + h_r^q \gamma_p^s) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_rdm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_rdm_mo(p,s) + + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s* + +hessian(p,q,r,s) -> hessian(p,q,p,q) + + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) += + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) += + - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) & + - 2d0 * mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s* + +hessian(p,q,r,s) -> hessian(p,q,p,q) + + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) += + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) += + - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) & + - 2d0 * mo_one_e_integrals(p,p) * one_e_dm_mo(q,q) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 3',t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 1 + +\begin{align*} +\frac{1}{2} \sum_{tuv} \delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and q=r* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & ++ get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & ++ get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) & + + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do p =1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and q=r* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & ++ get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & ++ get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + & + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'l2 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 2 + +\begin{align*} +\frac{1}{2} \sum_{tuv} \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and p=s* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & ++ get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + 0.5d0 * ( & + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v) & ++ get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) +& + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & ++ get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(q,t,u,v) & ++ get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP DO + do p = 1,mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,q) = two_e_dm_mo(u,v,q,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(q) = tmp_accu_1_shared(q) +& + tmp_bi_int_3(u,v,q) * tmp_2rdm_3(u,v,q) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'l2 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 1 + +\begin{align*} +\sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)))) then + + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +With optimization + +*Part 1 : p=r and q=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & ++ get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + = + get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) & ++ get_two_e_integral(q,q,u,v,mo_integrals_map) * two_e_dm_mo(p,p,u,v) + = + 2d0 * get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) + +Arrays of the kind (u,v,p,p) can be transform in 4D arrays (u,v,p). +Using u,v as one variable a matrix multiplication appears. +$$c_{p,q} = \sum_{uv} a_{p,uv} b_{uv,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,v,q) = two_e_dm_mo(u,v,q,q) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_shared(u,v,p) = get_two_e_integral(u,v,p,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu(p,q) + tmp_accu(q,p) + + enddo + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & ++ get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + = + get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) & ++ get_two_e_integral(q,p,u,v,mo_integrals_map) * two_e_dm_mo(p,q,u,v) + = + 2d0 * get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + call wall_time(t4) + !$OMP END MASTER + + !$OMP DO + do q = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = 2d0 * get_two_e_integral(u,v,q,p,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,q) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) & + + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l3 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 2 + +\begin{align*} +- \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do t = 1, mo_num + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + endif + + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + = + - get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & + - get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) & + - get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(p,u,q,t) & + - get_two_e_integral(q,u,t,p,mo_integrals_map) * two_e_dm_mo(p,u,t,q) + = + - 2d0 * get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & + - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + = + - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) & + - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !---------- + ! Part 1.1 + !---------- + ! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_bi_int_3(q,u,p) = 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_2rdm_3(q,u,p) = two_e_dm_mo(q,u,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_accu_shared(p,q) = tmp_accu_shared(p,q) & + - tmp_bi_int_3(q,u,p) * tmp_2rdm_3(q,u,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu_shared(p,q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !-------- + ! Part 1.2 + !-------- + ! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do u = 1, mo_num + + do p = 1, mo_num + do q = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3(t,q,p) = 2d0*get_two_e_integral(t,q,p,u,mo_integrals_map) + + enddo + enddo + enddo + + do p= 1, mo_num + do q = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3(t,q,p) = two_e_dm_mo(t,p,q,u) + + enddo + enddo + enddo + + !$OMP CRITICAL + do q = 1, mo_num + do p = 1, mo_num + do t = 1, mo_num + + tmp_accu_shared(p,q) = tmp_accu_shared(p,q) & + - tmp_bi_int_3(t,q,p) * tmp_2rdm_3(t,q,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu_shared(p,q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + = + - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & + - get_two_e_integral(t,p,p,u,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & + - get_two_e_integral(q,u,q,t,mo_integrals_map) * two_e_dm_mo(p,u,p,t) & + - get_two_e_integral(q,u,t,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + = + - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & + - get_two_e_integral(q,t,q,u,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + + - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & + - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(t,p,p,u) + = + - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & + - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + + - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & + - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +Arrays of the kind (t,p,u,p) can be transformed in 3D arrays. By doing +so and using t,u as one variable, a matrix multiplication appears : +$$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !---------- + ! Part 2.1 + !---------- + ! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & + ! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) + + !$OMP DO + do q = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3_shared(t,u,q) = two_e_dm_mo(t,q,u,q) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,p) = get_two_e_integral(t,p,u,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP DO + do p = 1, mo_num + do q = 1, mo_num + + tmp_h_pqqp(q,p) = tmp_h_pqqp(q,p) - tmp_accu(q,p) - tmp_accu(p,q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +Arrays of the kind (t,u,p,p) can be transformed in 3D arrays. By doing +so and using t,u as one variable, a matrix multiplication appears : +$$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !-------- + ! Part 2.2 + !-------- + ! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & + ! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + + !$OMP DO + do p = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,p) = get_two_e_integral(t,u,p,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,t,q) = two_e_dm_mo(q,u,t,q) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3_shared,& + mo_num*mo_num, tmp_bi_int_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) - tmp_accu(p,q) - tmp_accu(q,p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l3 2',t6 + !$OMP END MASTER + + !$OMP MASTER + CALL wall_TIME(t2) + t2 = t2 - t1 + print*, 'Time to compute the hessian :', t2 + !$OMP END MASTER +#+END_SRC + +** Deallocation of private arrays +In the OMP section ! +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + deallocate(tmp_2rdm_3,tmp_bi_int_3) + deallocate(tmp_accu) +#+END_SRC + +** Permutations +As we mentioned before there are two permutation operator in the +formula : +Hessian(p,q,r,s) = P_pq P_rs [...] +=> Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !!$OMP DO + !do p = 1, mo_num + ! hessian(p,p,p,p) = hessian(p,p,p,p) + tmp_h_pppp(p) + !enddo + !!$OMP END DO + + !!$OMP DO + !do q = 1, mo_num + ! do p = 1, mo_num + ! hessian(p,q,p,q) = hessian(p,q,p,q) + tmp_h_pqpq(p,q) + ! enddo + !enddo + !!$OMP END DO + ! + !!$OMP DO + !do q = 1, mo_num + ! do p = 1, mo_num + ! hessian(p,q,q,p) = hessian(p,q,q,p) + tmp_h_pqqp(p,q) + ! enddo + !enddo + !!$OMP END DO + + !!$OMP DO + !do s = 1, mo_num + ! do r = 1, mo_num + ! do q = 1, mo_num + ! do p = 1, mo_num + + ! h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + ! enddo + ! enddo + ! enddo + !enddo + !!$OMP END DO +#+END_SRC + +** 4D -> 2D matrix +We need a 2D matrix for the Newton method's. Since the Hessian is +"antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +with p 2D :',t6 + !!$OMP END MASTER + + !$OMP END PARALLEL + call omp_set_max_active_levels(4) + + ! Display + !if (debug) then + ! print*,'2D diag Hessian matrix' + ! do pq = 1, n + ! write(*,'(100(F10.5))') H(pq,:) + ! enddo + !endif +#+END_SRC + +** Deallocation of shared arrays, end + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !deallocate(hessian)!,h_tmpr) + deallocate(tmp_h_pppp,tmp_h_pqpq,tmp_h_pqqp) + deallocate(tmp_accu_1_shared, tmp_accu_shared) + + print*,'---diagonal_hessian' + +end subroutine +#+END_SRC + diff --git a/src/mo_optimization/org/diagonalization_hessian.org b/src/mo_optimization/org/diagonalization_hessian.org new file mode 100644 index 00000000..5eed7dd5 --- /dev/null +++ b/src/mo_optimization/org/diagonalization_hessian.org @@ -0,0 +1,138 @@ +* Diagonalization of the hessian + +Just a matrix diagonalization using Lapack + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| H(n,n) | double precision | hessian | + +Output: +| e_val(n) | double precision | eigenvalues of the hessian | +| w(n,n) | double precision | eigenvectors of the hessian | + +Internal: +| nb_negative_nv | integer | number of negative eigenvalues | +| lwork | integer | for Lapack | +| work(lwork,n) | double precision | temporary array for Lapack | +| info | integer | if 0 -> ok, else problem in the diagonalization | +| i,j | integer | dummy indexes | + +#+BEGIN_SRC f90 :comments org :tangle diagonalization_hessian.irp.f +subroutine diagonalization_hessian(n,H,e_val,w) + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: H(n,n) + + ! out + double precision, intent(out) :: e_val(n), w(n,n) + + ! internal + double precision, allocatable :: work(:,:) + integer, allocatable :: key(:) + integer :: info,lwork + integer :: i,j + integer :: nb_negative_vp + double precision :: t1,t2,t3,max_elem + + print*,'' + print*,'---Diagonalization_hessian---' + + call wall_time(t1) + + if (optimization_method == 'full') then + ! Allocation + ! For Lapack + lwork=3*n-1 + + allocate(work(lwork,n)) + + ! Calculation + + ! Copy the hessian matrix, the eigenvectors will be store in W + W=H + + ! Diagonalization of the hessian + call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info) + + if (info /= 0) then + print*, 'Error diagonalization : diagonalization_hessian' + print*, 'info = ', info + call ABORT + endif + + if (debug) then + print *, 'vp Hess:' + write(*,'(100(F10.5))') real(e_val(:)) + endif + + ! Number of negative eigenvalues + max_elem = 0d0 + nb_negative_vp = 0 + do i = 1, n + if (e_val(i) < 0d0) then + nb_negative_vp = nb_negative_vp + 1 + if (e_val(i) < max_elem) then + max_elem = e_val(i) + endif + !print*,'e_val < 0 :', e_val(i) + endif + enddo + print*,'Number of negative eigenvalues:', nb_negative_vp + print*,'Lowest eigenvalue:',max_elem + + !nb_negative_vp = 0 + !do i = 1, n + ! if (e_val(i) < -thresh_eig) then + ! nb_negative_vp = nb_negative_vp + 1 + ! endif + !enddo + !print*,'Number of negative eigenvalues <', -thresh_eig,':', nb_negative_vp + + ! Deallocation + deallocate(work) + + elseif (optimization_method == 'diag') then + ! Diagonalization of the diagonal hessian by hands + allocate(key(n)) + + do i = 1, n + e_val(i) = H(i,i) + enddo + + ! Key list for dsort + do i = 1, n + key(i) = i + enddo + + ! Sort of the eigenvalues + call dsort(e_val, key, n) + + ! Eigenvectors + W = 0d0 + do i = 1, n + j = key(i) + W(j,i) = 1d0 + enddo + + deallocate(key) + else + print*,'Diagonalization_hessian, abort' + call abort + endif + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in diagonalization_hessian:', t3 + + print*,'---End diagonalization_hessian---' + +end subroutine +#+END_SRC + diff --git a/src/mo_optimization/org/first_diagonal_hessian_list_opt.org b/src/mo_optimization/org/first_diagonal_hessian_list_opt.org new file mode 100644 index 00000000..391c6033 --- /dev/null +++ b/src/mo_optimization/org/first_diagonal_hessian_list_opt.org @@ -0,0 +1,376 @@ +* First diagonal hessian + +#+BEGIN_SRC f90 :comments :tangle first_diagonal_hessian_list_opt.irp.f +subroutine first_diag_hessian_list_opt(tmp_n,m,list,H)!, h_tmpr) + + include 'constants.h' + + implicit none + + !=========================================================================== + ! Compute the diagonal hessian of energy with respects to orbital rotations + !=========================================================================== + + !=========== + ! Variables + !=========== + + ! in + integer, intent(in) :: tmp_n, m, list(m) + ! tmp_n : integer, tmp_n = m*(m-1)/2 + + ! out + double precision, intent(out) :: H(tmp_n)!, h_tmpr(m,m,m,m) + ! H : n by n double precision matrix containing the 2D hessian + + ! internal + double precision, allocatable :: hessian(:,:,:,:), tmp(:,:),h_tmpr(:,:,:,:) + integer :: p,q, tmp_p,tmp_q + integer :: r,s,t,u,v,tmp_r,tmp_s,tmp_t,tmp_u,tmp_v + integer :: pq,rs,tmp_pq,tmp_rs + double precision :: t1,t2,t3 + ! hessian : mo_num 4D double precision matrix containing the hessian before the permutations + ! h_tmpr : mo_num 4D double precision matrix containing the hessian after the permutations + ! p,q,r,s : integer, indexes of the 4D hessian matrix + ! t,u,v : integer, indexes to compute hessian elements + ! pq,rs : integer, indexes for the conversion from 4D to 2D hessian matrix + ! t1,t2,t3 : double precision, t3 = t2 - t1, time to compute the hessian + + ! Function + double precision :: get_two_e_integral + ! get_two_e_integral : double precision function, two e integrals + + ! Provided : + ! mo_one_e_integrals : mono e- integrals + ! get_two_e_integral : two e- integrals + ! one_e_dm_mo_alpha, one_e_dm_mo_beta : one body density matrix + ! two_e_dm_mo : two body density matrix + + print*,'---first_diag_hess_list---' + + !============ + ! Allocation + !============ + + allocate(hessian(m,m,m,m),tmp(tmp_n,tmp_n),h_tmpr(mo_num,mo_num,mo_num,mo_num)) + + !============= + ! Calculation + !============= + + ! From Anderson et. al. (2014) + ! The Journal of Chemical Physics 141, 244104 (2014); doi: 10.1063/1.4904384 + + ! LaTeX formula : + + !\begin{align*} + !H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + !&= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + !+ \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_u^r)] + !-(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + !&+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} +v_{uv}^{st} \Gamma_{pt}^{uv}) + !+ \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + !&+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{ps}^{uv}) \\ + !&- \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) + !\end{align*} + + !================ + ! Initialization + !================ + hessian = 0d0 + + CALL wall_time(t1) + + !======================== + ! First line, first term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (q==r) then + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !========================= + ! First line, second term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (p==s) then + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !======================== + ! First line, third term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + endif + + enddo + enddo + enddo + enddo + + !========================= + ! Second line, first term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !========================== + ! Second line, second term + !========================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !======================== + ! Third line, first term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + endif + + enddo + enddo + enddo + enddo + + !========================= + ! Third line, second term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do t = 1, mo_num + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + endif + + enddo + enddo + enddo + enddo + + CALL wall_time(t2) + t2 = t2 - t1 + print*, 'Time to compute the hessian :', t2 + + !============== + ! Permutations + !============== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + do tmp_s = 1, m + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + + h_tmpr(tmp_p,tmp_q,tmp_r,tmp_s) = (hessian(tmp_p,tmp_q,tmp_r,tmp_s) - hessian(tmp_q,tmp_p,tmp_r,tmp_s) & + - hessian(tmp_p,tmp_q,tmp_s,tmp_r) + hessian(tmp_q,tmp_p,tmp_s,tmp_r)) + + enddo + enddo + enddo + enddo + + !======================== + ! 4D matrix to 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo + enddo + + !======================== + ! 4D matrix to 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p vector, transformation +In addition there is a permutation in the gradient formula : +\begin{equation} +P_{pq} = 1 - (p <-> q) +\end{equation} + +We need a vector to use the gradient. Here the gradient is a +antisymetric matrix so we can transform it in a vector of length +mo_num*(mo_num-1)/2. + +Here we do these two things at the same time. + +#+BEGIN_SRC f90 :comments org :tangle gradient_list_opt.irp.f + do i=1,n + call vec_to_mat_index(i,p,q) + v_grad(i)=(grad(p,q) - grad(q,p)) + enddo + + ! Debug, diplay the vector containing the gradient elements + if (debug) then + print*,'Vector containing the gradient :' + write(*,'(100(F10.5))') v_grad(1:n) + endif +#+END_SRC + +*** Norm of the gradient +The norm can be useful. +#+BEGIN_SRC f90 :comments org :tangle gradient_list_opt.irp.f + norm = dnrm2(n,v_grad,1) + print*, 'Gradient norm : ', norm +#+END_SRC + +*** Maximum element in the gradient +The maximum element in the gradient is very important for the +convergence criterion of the Newton method. + +#+BEGIN_SRC f90 :comments org :tangle gradient_list_opt.irp.f + ! Max element of the gradient + max_elem = 0d0 + do i = 1, n + if (DABS(v_grad(i)) > DABS(max_elem)) then + max_elem = v_grad(i) + endif + enddo + + print*,'Max element in the gradient :', max_elem + + ! Debug, display the matrix containting the gradient elements + if (debug) then + ! Matrix gradient + A = 0d0 + do q=1,m + do p=1,m + A(p,q) = grad(p,q) - grad(q,p) + enddo + enddo + print*,'Matrix containing the gradient :' + do i = 1, m + write(*,'(100(F10.5))') A(i,1:m) + enddo + endif +#+END_SRC + +*** Deallocation of shared arrays and end +#+BEGIN_SRC f90 :comments org :tangle gradient_list_opt.irp.f + deallocate(grad,A, tmp_mo_one_e_integrals,tmp_one_e_dm_mo) + + print*,'---End gradient---' + + end subroutine + +#+END_SRC + diff --git a/src/mo_optimization/org/gradient_opt.org b/src/mo_optimization/org/gradient_opt.org new file mode 100644 index 00000000..45c761e9 --- /dev/null +++ b/src/mo_optimization/org/gradient_opt.org @@ -0,0 +1,358 @@ +* Gradient + +The gradient of the CI energy with respects to the orbital rotation +is: +(C-c C-x C-l) +$$ +G(p,q) = \mathcal{P}_{pq} \left[ \sum_r (h_p^r \gamma_r^q - h_r^q \gamma_p^r) + +\sum_{rst}(v_{pt}^{rs} \Gamma_{rs}^{qt} - v_{rs}^{qt} \Gamma_{pt}^{rs}) +\right] +$$ + + +$$ +\mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +$$ + +$$ +G(p,q) = \left[ +\sum_r (h_p^r \gamma_r^q - h_r^q \gamma_p^r) + +\sum_{rst}(v_{pt}^{rs} \Gamma_{rs}^{qt} - v_{rs}^{qt} \Gamma_{pt}^{rs}) +\right] - +\left[ +\sum_r (h_q^r \gamma_r^p - h_r^p \gamma_q^r) + +\sum_{rst}(v_{qt}^{rs} \Gamma_{rs}^{pt} - v_{rs}^{pt} +\Gamma_{qt}^{rs}) +\right] +$$ + +Where p,q,r,s,t are general spatial orbitals +mo_num : the number of molecular orbitals +$$h$$ : One electron integrals +$$\gamma$$ : One body density matrix (state average in our case) +$$v$$ : Two electron integrals +$$\Gamma$$ : Two body density matrice (state average in our case) + +The gradient is a mo_num by mo_num matrix, p,q,r,s,t take all the +values between 1 and mo_num (1 and mo_num include). + +To do that we compute $$G(p,q)$$ for all the pairs (p,q). + +Source : +Seniority-based coupled cluster theory +J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo +E. Scuseria + +*Compute the gradient of energy with respects to orbital rotations* + +Provided: +| mo_num | integer | number of MOs | +| mo_one_e_integrals(mo_num,mo_num) | double precision | mono_electronic integrals | +| one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix | +| two_e_dm_mo(mo_num,mo_num,mo_num,mo_num) | double precision | two e- density matrix | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | + +Output: +| v_grad(n) | double precision | the gradient | +| max_elem | double precision | maximum element of the gradient | + +Internal: +| grad(mo_num,mo_num) | double precison | gradient before the tranformation in a vector | +| A((mo_num,mo_num) | doubre precision | gradient after the permutations | +| norm | double precision | norm of the gradient | +| p, q | integer | indexes of the element in the matrix grad | +| i | integer | index for the tranformation in a vector | +| r, s, t | integer | indexes dor the sums | +| t1, t2, t3 | double precision | t3 = t2 - t1, time to compute the gradient | +| t4, t5, t6 | double precission | t6 = t5 - t4, time to compute each element | +| tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bi-electronic integrals | +| tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the two e- density matrix | +| tmp_accu(mo_num,mo_num) | double precision | temporary array | + +Function: +| get_two_e_integral | double precision | bi-electronic integrals | +| dnrm2 | double precision | (Lapack) norm | + +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f +subroutine gradient_opt(n,v_grad,max_elem) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: v_grad(n), max_elem + + ! internal + double precision, allocatable :: grad(:,:),A(:,:) + double precision :: norm + integer :: i,p,q,r,s,t + double precision :: t1,t2,t3,t4,t5,t6 + + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:) + + ! Functions + double precision :: get_two_e_integral, dnrm2 + + + print*,'' + print*,'---gradient---' + + ! Allocation of shared arrays + allocate(grad(mo_num,mo_num),A(mo_num,mo_num)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s,t, & + !$OMP tmp_accu, tmp_bi_int_3, tmp_2rdm_3) & + !$OMP SHARED(grad, one_e_dm_mo, mo_num,mo_one_e_integrals, & + !$OMP mo_integrals_map,t4,t5,t6) & + !$OMP DEFAULT(SHARED) + + ! Allocation of private arrays + allocate(tmp_accu(mo_num,mo_num)) + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num)) +#+END_SRC + +** Calculation +*** Initialization +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + !$OMP DO + do q = 1, mo_num + do p = 1,mo_num + grad(p,q) = 0d0 + enddo + enddo + !$OMP END DO +#+END_SRC + +*** Term 1 + +Without optimization the term 1 is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + grad(p,q) = grad(p,q) & + + mo_one_e_integrals(p,r) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(r,q) * one_e_dm_mo(p,r) + enddo + enddo +enddo + +Since the matrix multiplication A.B is defined like : +\begin{equation} +c_{ij} = \sum_k a_{ik}.b_{kj} +\end{equation} +The previous equation can be rewritten as a matrix multplication + +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + !**************** + ! Opt first term + !**************** + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + call dgemm('N','N',mo_num,mo_num,mo_num,1d0,mo_one_e_integrals,& + mo_num,one_e_dm_mo,mo_num,0d0,tmp_accu,mo_num) + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + grad(p,q) = grad(p,q) + (tmp_accu(p,q) - tmp_accu(q,p)) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'Gradient, first term (s) :', t6 + !$OMP END MASTER +#+END_SRC + +*** Term 2 + +Without optimization the second term is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + do t= 1, mo_num + + grad(p,q) = grad(p,q) & + + get_two_e_integral(p,t,r,s,mo_integrals_map) * two_e_dm_mo(r,s,q,t) & + - get_two_e_integral(r,s,q,t,mo_integrals_map) * two_e_dm_mo(p,t,r,s) + enddo + enddo + enddo + enddo +enddo + +Using the bielectronic integral properties : +get_two_e_integral(p,t,r,s,mo_integrals_map) = get_two_e_integral(r,s,p,t,mo_integrals_map) + +Using the two body matrix properties : +two_e_dm_mo(p,t,r,s) = two_e_dm_mo(r,s,p,t) + +t is one the right, we can put it on the external loop and create 3 +indexes temporary array +r,s can be seen as one index + +By doing so, a matrix multiplication appears + +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + !***************** + ! Opt second term + !***************** + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do s = 1, mo_num + do r = 1, mo_num + + tmp_bi_int_3(r,s,p) = get_two_e_integral(r,s,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do q = 1, mo_num + do s = 1, mo_num + do r = 1, mo_num + + tmp_2rdm_3(r,s,q) = two_e_dm_mo(r,s,q,t) + + enddo + enddo + enddo + + call dgemm('T','N',mo_num,mo_num,mo_num*mo_num,1d0,tmp_bi_int_3,& + mo_num*mo_num,tmp_2rdm_3,mo_num*mo_num,0d0,tmp_accu,mo_num) + + !$OMP CRITICAL + do q = 1, mo_num + do p = 1, mo_num + + grad(p,q) = grad(p,q) + tmp_accu(p,q) - tmp_accu(q,p) + + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'Gradient second term (s) : ', t6 + !$OMP END MASTER +#+END_SRC + +*** Deallocation of private arrays +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + deallocate(tmp_bi_int_3,tmp_2rdm_3,tmp_accu) + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) +#+END_SRC + +*** Permutation, 2D matrix -> vector, transformation +In addition there is a permutation in the gradient formula : +\begin{equation} +P_{pq} = 1 - (p <-> q) +\end{equation} + +We need a vector to use the gradient. Here the gradient is a +antisymetric matrix so we can transform it in a vector of length +mo_num*(mo_num-1)/2. + +Here we do these two things at the same time. + +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + do i=1,n + call vec_to_mat_index(i,p,q) + v_grad(i)=(grad(p,q) - grad(q,p)) + enddo + + ! Debug, diplay the vector containing the gradient elements + if (debug) then + print*,'Vector containing the gradient :' + write(*,'(100(F10.5))') v_grad(1:n) + endif +#+END_SRC + +*** Norm of the gradient +The norm can be useful. +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + norm = dnrm2(n,v_grad,1) + print*, 'Gradient norm : ', norm +#+END_SRC + +*** Maximum element in the gradient +The maximum element in the gradient is very important for the +convergence criterion of the Newton method. + +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + ! Max element of the gradient + max_elem = 0d0 + do i = 1, n + if (ABS(v_grad(i)) > ABS(max_elem)) then + max_elem = v_grad(i) + endif + enddo + + print*,'Max element in the gradient :', max_elem + + ! Debug, display the matrix containting the gradient elements + if (debug) then + ! Matrix gradient + A = 0d0 + do q=1,mo_num + do p=1,mo_num + A(p,q) = grad(p,q) - grad(q,p) + enddo + enddo + print*,'Matrix containing the gradient :' + do i = 1, mo_num + write(*,'(100(F10.5))') A(i,1:mo_num) + enddo + endif +#+END_SRC + +*** Deallocation of shared arrays and end +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + deallocate(grad,A) + + print*,'---End gradient---' + + end subroutine + +#+END_SRC + diff --git a/src/mo_optimization/org/hessian_list_opt.org b/src/mo_optimization/org/hessian_list_opt.org new file mode 100644 index 00000000..3df461cf --- /dev/null +++ b/src/mo_optimization/org/hessian_list_opt.org @@ -0,0 +1,1141 @@ +* Hessian + +The hessian of the CI energy with respects to the orbital rotation is : +(C-c C-x C-l) + +\begin{align*} +H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] + -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) + + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) + - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} +With pq a permutation operator : + +\begin{align*} +\mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +\end{align*} +\begin{align*} +\mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +&= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +\end{align*} + +Where p,q,r,s,t,u,v are general spatial orbitals +mo_num : the number of molecular orbitals +$$h$$ : One electron integrals +$$\gamma$$ : One body density matrix (state average in our case) +$$v$$ : Two electron integrals +$$\Gamma$$ : Two body density matrice (state average in our case) + +The hessian is a 4D matrix of size mo_num, p,q,r,s,t,u,v take all the +values between 1 and mo_num (1 and mo_num include). + +To do that we compute all the pairs (pq,rs) + +Source : +Seniority-based coupled cluster theory +J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +*Compute the hessian of energy with respects to orbital rotations* + +Provided: +| mo_num | integer | number of MOs | +| mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +| one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +| two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | + +Output: +| H(n,n) | double precision | Hessian matrix | +| h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +| | | in n by n matrix | + +Internal: +| hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +| | | the permutations | +| p, q, r, s | integer | indexes of the hessian elements | +| t, u, v | integer | indexes for the sums | +| pq, rs | integer | indexes for the transformation of the hessian | +| | | (4D -> 2D) | +| t1,t2,t3 | double precision | t3 = t2 - t1, time to compute the hessian | +| t4,t5,t6 | double precision | t6 = t5 - t4, time to compute each element | +| tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals | +| tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix | +| ind_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for matrix multiplication | +| tmp_accu(mo_num,mo_num) | double precision | temporary array | +| tmp_accu_sym(mo_num,mo_num) | double precision | temporary array | + +Function: +| get_two_e_integral | double precision | bielectronic integrals | + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f +subroutine hessian_list_opt(n,m,list,H,h_tmpr) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n,m,list(m) + + ! out + double precision, intent(out) :: H(n,n),h_tmpr(m,m,m,m) + + ! internal + double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + double precision, allocatable :: H_test(:,:) + integer :: p,q,tmp_p,tmp_q,tmp_r,tmp_s + integer :: r,s,t,u,v,k + integer :: pq,rs + double precision :: t1,t2,t3,t4,t5,t6 + ! H_test : monum**2 by mo_num**2 double precision matrix to debug the H matrix + + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:), ind_3(:,:,:),ind_3_3(:,:,:) + double precision, allocatable :: tmp_bi_int_3_3(:,:,:), tmp_2rdm_3_3(:,:,:) + double precision, allocatable :: tmp_accu(:,:), tmp_accu_sym(:,:),tmp_one_e_dm_mo(:,:),tmp_mo_one_e_integrals(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'---hessian---' + print*,'Use the full hessian' + + ! Allocation of shared arrays + allocate(hessian(m,m,m,m),tmp_one_e_dm_mo(mo_num,m),tmp_mo_one_e_integrals(mo_num,m)) + + ! Calculations + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP tmp_p,tmp_q,tmp_r,tmp_s,p,q,r,s, tmp_accu, tmp_accu_sym, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, ind_3, tmp_bi_int_3_3,tmp_2rdm_3_3, ind_3_3 ) & + !$OMP SHARED(m,list,hessian,h_tmpr,H, mo_num,n, & + !$OMP mo_one_e_integrals, one_e_dm_mo, & + !$OMP two_e_dm_mo,mo_integrals_map, & + !$OMP t1,t2,t3,t4,t5,t6,& + !$OMP tmp_mo_one_e_integrals,tmp_one_e_dm_mo)& + !$OMP DEFAULT(NONE) + + ! Allocation of private arrays + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num), ind_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(m,m), tmp_accu_sym(mo_num,mo_num)) +#+END_SRC + +** Initialization of the arrays +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END MASTER + + !$OMP MASTER + do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_sym(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END MASTER + + !$OMP DO + do tmp_s = 1, m + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = 0d0 + enddo + enddo + enddo + enddo + !$OMP ENDDO + + !$OMP MASTER + CALL wall_TIME(t1) + !$OMP END MASTER +#+END_SRC + +** Line 1, term 1 + +Without optimization the term 1 of the line 1 is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (q==r) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + + enddo + enddo + enddo +enddo + +We can write the formula as matrix multiplication. +$$c_{p,s} = \sum_u a_{p,u} b_{u,s}$$ + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + tmp_mo_one_e_integrals(u,tmp_p) = mo_one_e_integrals(u,p) + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_s = 1, m + s = list(tmp_s) + do u = 1, mo_num + tmp_one_e_dm_mo(u,tmp_s) = one_e_dm_mo(u,s) + enddo + enddo + !$OMP END DO + + + call dgemm('T','N', m, m, mo_num, 1d0, tmp_mo_one_e_integrals,& + size(tmp_mo_one_e_integrals,1), tmp_one_e_dm_mo, size(tmp_one_e_dm_mo,1),& + 0d0, tmp_accu, size(tmp_accu,1)) + + !$OMP DO + do tmp_s = 1, m + do tmp_p = 1, m + + tmp_accu_sym(tmp_p,tmp_s) = 0.5d0 * (tmp_accu(tmp_p,tmp_s) + tmp_accu(tmp_s,tmp_p)) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_s = 1, m + do tmp_p = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_r,tmp_r,tmp_s) = hessian(tmp_p,tmp_r,tmp_r,tmp_s) + tmp_accu_sym(tmp_p,tmp_s) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 2 +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (p==s) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * (one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * (one_e_dm_mo(r,u)) + enddo + endif + + enddo + enddo + enddo +enddo + +We can write the formula as matrix multiplication. +$$c_{r,q} = \sum_u a_{r,u} b_{u,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + call dgemm('T','N', m, m, mo_num, 1d0, tmp_mo_one_e_integrals,& + size(tmp_mo_one_e_integrals,1), tmp_one_e_dm_mo, size(tmp_one_e_dm_mo,1),& + 0d0, tmp_accu, size(tmp_accu,1)) + + !$OMP DO + do tmp_r = 1, m + do tmp_q = 1, m + + tmp_accu_sym(tmp_q,tmp_r) = 0.5d0 * (tmp_accu(tmp_q,tmp_r) + tmp_accu(tmp_r,tmp_q)) + + enddo + enddo + !OMP END DO + + !$OMP DO + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_s = 1, m + + hessian(tmp_s,tmp_q,tmp_r,tmp_s) = hessian(tmp_s,tmp_q,tmp_r,tmp_s) + tmp_accu_sym(tmp_q,tmp_r) + + enddo + enddo + enddo + !OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 3 + +Without optimization the third term is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s)) + + enddo + enddo + enddo +enddo + +We can just re-order the indexes + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_s = 1, m + s = list(tmp_s) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q)& + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 3',t6 + !$OMP END MASTER + +#+END_SRC + +** Line 2, term 1 + +Without optimization the fourth term is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +Using bielectronic integral properties : +get_two_e_integral(s,t,u,v,mo_integrals_map) = +get_two_e_integral(u,v,s,t,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(p,t,u,v) = two_e_dm_mo(u,v,p,t) + +With t on the external loop, using temporary arrays for each t and by +taking u,v as one variable a matrix multplication appears. +$$c_{p,s} = \sum_{uv} a_{p,uv} b_{uv,s}$$ + +There is a kroenecker delta $$\delta_{qr}$$, so we juste compute the +terms like : hessian(p,r,r,s) + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + call wall_TIME(t4) + !$OMP END MASTER + + allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + + !$OMP DO + do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m ! error, the p might be replace by a s + ! it's a temporary array, the result by replacing p and s will be the same + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1.d0, & + tmp_bi_int_3_3, mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do tmp_p = 1, m + do tmp_s = 1, m + + tmp_accu_sym(tmp_s,tmp_p) = 0.5d0 * (tmp_accu(tmp_p,tmp_s)+tmp_accu(tmp_s,tmp_p)) + + enddo + enddo + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + do tmp_p = 1, m + + hessian(tmp_p,tmp_r,tmp_r,tmp_s) = hessian(tmp_p,tmp_r,tmp_r,tmp_s) + tmp_accu_sym(tmp_p,tmp_s) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6=t5-t4 + print*,'l2 1', t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 2 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +Using the two electron density matrix properties : +get_two_e_integral(q,t,u,v,mo_integrals_map) = +get_two_e_integral(u,v,q,t,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(r,t,u,v) = two_e_dm_mo(u,v,r,t) + +With t on the external loop, using temporary arrays for each t and by +taking u,v as one variable a matrix multplication appears. +$$c_{q,r} = \sum_uv a_{q,uv} b_{uv,r}$$ + +There is a kroenecker delta $$\delta_{ps}$$, so we juste compute the +terms like : hessian(s,q,r,s) + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !****************************** + ! Opt Second line, second term + !****************************** + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + + + !$OMP DO + do t = 1, mo_num + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,v,tmp_q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_r = 1, m + r = list(tmp_r) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,v,tmp_r) = two_e_dm_mo(u,v,r,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1.d0, & + tmp_bi_int_3_3 , mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do tmp_r = 1, m + do tmp_q = 1, m + + tmp_accu_sym(tmp_q,tmp_r) = 0.5d0 * (tmp_accu(tmp_q,tmp_r) + tmp_accu(tmp_r,tmp_q)) + + enddo + enddo + + !$OMP CRITICAL + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_s = 1, m + + hessian(tmp_s,tmp_q,tmp_r,tmp_s) = hessian(tmp_s,tmp_q,tmp_r,tmp_s) + tmp_accu_sym(tmp_q,tmp_r) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l2 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 1 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + + enddo + enddo + enddo +enddo + +Using the two electron density matrix properties : +get_two_e_integral(u,v,p,r,mo_integrals_map) = +get_two_e_integral(p,r,u,v,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(u,v,q,s) = two_e_dm_mo(q,s,u,v) + +With v on the external loop, using temporary arrays for each v and by +taking p,r and q,s as one dimension a matrix multplication +appears. $$c_{pr,qs} = \sum_u a_{pr,u} b_{u,qs}$$ + +Part 1 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + call wall_TIME(t4) + !$OMP END MASTER + + !-------- + ! part 1 + ! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) + !-------- + + allocate(tmp_bi_int_3_3(m,m,mo_num), tmp_2rdm_3_3(mo_num,m,m),ind_3_3(m,m,m)) + + !$OMP DO + do v = 1, mo_num + + do u = 1, mo_num + do tmp_r = 1, m + r = list(tmp_r) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_bi_int_3_3(tmp_p,tmp_r,u) = get_two_e_integral(p,r,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + + tmp_2rdm_3_3(u,tmp_q,tmp_s) = two_e_dm_mo(q,s,u,v) + + enddo + enddo + enddo + + do tmp_s = 1, m + + call dgemm('N','N',m*m, m, mo_num, 1d0, tmp_bi_int_3_3,& + size(tmp_bi_int_3_3,1)*size(tmp_bi_int_3_3,2), tmp_2rdm_3_3(1,1,tmp_s),& + mo_num, 0d0, ind_3_3, size(ind_3_3,1) * size(ind_3_3,2)) + + !$OMP CRITICAL + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + ind_3_3(tmp_p,tmp_r,tmp_q) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3,ind_3_3) +#+END_SRC + +With v on the external loop, using temporary arrays for each v and by +taking q,s and p,r as one dimension a matrix multplication +appears. $$c_{qs,pr} = \sum_u a_{qs,u}*b_{u,pr}$$ + +Part 2 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !-------- + ! part 2 + ! get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + !-------- + + allocate(tmp_bi_int_3_3(m,m,mo_num), tmp_2rdm_3_3(mo_num,m,m),ind_3_3(m,m,m)) + + !$OMP DO + do v = 1, mo_num + + do u = 1, mo_num + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + + tmp_bi_int_3_3(tmp_q,tmp_s,u) = get_two_e_integral(q,s,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_r = 1, m + r = list(tmp_r) + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_2rdm_3_3(u,tmp_p,tmp_r) = two_e_dm_mo(p,r,u,v) + + enddo + enddo + enddo + + do tmp_r = 1, m + + call dgemm('N','N', m*m, m, mo_num, 1d0, tmp_bi_int_3_3,& + size(tmp_bi_int_3_3,1)*size(tmp_bi_int_3_3,2), tmp_2rdm_3_3(1,1,tmp_r),& + mo_num, 0d0, ind_3_3, size(ind_3_3,1) * size(ind_3_3,2)) + + !$OMP CRITICAL + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + ind_3_3(tmp_q,tmp_s,tmp_p) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3,ind_3_3) + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5 - t4 + print*,'l3 1', t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 2 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + do t = 1, mo_num + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + enddo + enddo + enddo +enddo + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 1 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !-------- + ! Part 1 + ! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) + !-------- + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_r) = two_e_dm_mo(q,u,r,t) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_s) = - get_two_e_integral(u,s,t,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_3,& + mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 2 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !-------- + ! Part 2 + !- get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) + !-------- + + allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_r) = two_e_dm_mo(q,u,t,r) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_s) = - get_two_e_integral(u,t,s,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_3,& + mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 3 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !-------- + ! Part 3 + !- get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) + !-------- + + allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_r) = - get_two_e_integral(u,q,t,r,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_s) = two_e_dm_mo(p,u,s,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_3,& + mo_num*mo_num, tmp_bi_int_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 4 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !-------- + ! Part 4 + ! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + !-------- + + allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_r) = - get_two_e_integral(u,t,r,q,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_s) = two_e_dm_mo(p,u,t,s) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_3,& + mo_num*mo_num, tmp_bi_int_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5-t4 + print*,'l3 2',t6 + !$OMP END MASTER + + !$OMP MASTER + CALL wall_TIME(t2) + t3 = t2 -t1 + print*,'Time to compute the hessian : ', t3 + !$OMP END MASTER +#+END_SRC + +** Deallocation of private arrays +In the omp section ! +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + deallocate(tmp_bi_int_3, tmp_2rdm_3, tmp_accu, tmp_accu_sym, ind_3) +#+END_SRC + +** Permutations +As we mentioned before there are two permutation operator in the +formula : +Hessian(p,q,r,s) = P_pq P_rs [...] +=> Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do s = 1, m + do r = 1, m + do q = 1, m + do p = 1, m + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5-t4 + print*,'Time for permutations :',t6 + !$OMP END MASTER +#+END_SRC + +** 4D -> 2D matrix +We need a 2D matrix for the Newton method's. Since the Hessian is +"antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +with p 2D :',t6 + !$OMP END MASTER + + !$OMP END PARALLEL + call omp_set_max_active_levels(4) + + ! Display + if (debug) then + print*,'2D Hessian matrix' + do pq = 1, n + write(*,'(100(F10.5))') H(pq,:) + enddo + endif +#+END_SRC + +** Deallocation of shared arrays, end +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + deallocate(hessian,tmp_one_e_dm_mo,tmp_mo_one_e_integrals)!,h_tmpr) +! h_tmpr is intent out in order to debug the subroutine +! It's why we don't deallocate it + + print*,'---End hessian---' + +end subroutine +#+END_SRC + diff --git a/src/mo_optimization/org/hessian_opt.org b/src/mo_optimization/org/hessian_opt.org new file mode 100644 index 00000000..5b0642e3 --- /dev/null +++ b/src/mo_optimization/org/hessian_opt.org @@ -0,0 +1,1056 @@ +* Hessian + +The hessian of the CI energy with respects to the orbital rotation is : +(C-c C-x C-l) + +\begin{align*} +H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] + -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) + + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) + - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} +With pq a permutation operator : + +\begin{align*} +\mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +\end{align*} +\begin{align*} +\mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +&= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +\end{align*} + +Where p,q,r,s,t,u,v are general spatial orbitals +mo_num : the number of molecular orbitals +$$h$$ : One electron integrals +$$\gamma$$ : One body density matrix (state average in our case) +$$v$$ : Two electron integrals +$$\Gamma$$ : Two body density matrice (state average in our case) + +The hessian is a 4D matrix of size mo_num, p,q,r,s,t,u,v take all the +values between 1 and mo_num (1 and mo_num include). + +To do that we compute all the pairs (pq,rs) + +Source : +Seniority-based coupled cluster theory +J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +*Compute the hessian of energy with respects to orbital rotations* + +Provided: +| mo_num | integer | number of MOs | +| mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +| one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +| two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | + +Output: +| H(n,n) | double precision | Hessian matrix | +| h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +| | | in n by n matrix | + +Internal: +| hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +| | | the permutations | +| p, q, r, s | integer | indexes of the hessian elements | +| t, u, v | integer | indexes for the sums | +| pq, rs | integer | indexes for the transformation of the hessian | +| | | (4D -> 2D) | +| t1,t2,t3 | double precision | t3 = t2 - t1, time to compute the hessian | +| t4,t5,t6 | double precision | t6 = t5 - t4, time to compute each element | +| tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals | +| tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix | +| ind_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for matrix multiplication | +| tmp_accu(mo_num,mo_num) | double precision | temporary array | +| tmp_accu_sym(mo_num,mo_num) | double precision | temporary array | + +Function: +| get_two_e_integral | double precision | bielectronic integrals | + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f +subroutine hessian_opt(n,H,h_tmpr) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: H(n,n),h_tmpr(mo_num,mo_num,mo_num,mo_num) + + ! internal + double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + double precision, allocatable :: H_test(:,:) + integer :: p,q + integer :: r,s,t,u,v,k + integer :: pq,rs + double precision :: t1,t2,t3,t4,t5,t6 + ! H_test : monum**2 by mo_num**2 double precision matrix to debug the H matrix + + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:), ind_3(:,:,:) + double precision, allocatable :: tmp_accu(:,:), tmp_accu_sym(:,:), tmp_accu_shared(:,:),tmp_accu_sym_shared(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'---hessian---' + print*,'Use the full hessian' + + ! Allocation of shared arrays + allocate(hessian(mo_num,mo_num,mo_num,mo_num))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_accu_shared(mo_num,mo_num),tmp_accu_sym_shared(mo_num,mo_num)) + + ! Calculations + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu, tmp_accu_sym, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, ind_3) & + !$OMP SHARED(hessian,h_tmpr,H, mo_num,n, & + !$OMP mo_one_e_integrals, one_e_dm_mo, & + !$OMP two_e_dm_mo,mo_integrals_map,tmp_accu_sym_shared, tmp_accu_shared, & + !$OMP t1,t2,t3,t4,t5,t6)& + !$OMP DEFAULT(NONE) + + ! Allocation of private arrays + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num), ind_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(mo_num,mo_num), tmp_accu_sym(mo_num,mo_num)) +#+END_SRC + +** Initialization of the arrays +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo + enddo + !$OMP END MASTER + + !$OMP MASTER + do q = 1, mo_num + do p = 1, mo_num + tmp_accu_sym(p,q) = 0d0 + enddo + enddo + !$OMP END MASTER + + !$OMP DO + do s=1,mo_num + do r=1,mo_num + do q=1,mo_num + do p=1,mo_num + hessian(p,q,r,s) = 0d0 + enddo + enddo + enddo + enddo + !$OMP ENDDO + + !$OMP MASTER + CALL wall_TIME(t1) + !$OMP END MASTER +#+END_SRC + +** Line 1, term 1 + +Without optimization the term 1 of the line 1 is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (q==r) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + + enddo + enddo + enddo +enddo + +We can write the formula as matrix multiplication. +$$c_{p,s} = \sum_u a_{p,u} b_{u,s}$$ + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + call dgemm('T','N', mo_num, mo_num, mo_num, 1d0, mo_one_e_integrals,& + size(mo_one_e_integrals,1), one_e_dm_mo, size(one_e_dm_mo,1),& + 0d0, tmp_accu_shared, size(tmp_accu_shared,1)) + + !$OMP DO + do s = 1, mo_num + do p = 1, mo_num + + tmp_accu_sym_shared(p,s) = 0.5d0 * (tmp_accu_shared(p,s) + tmp_accu_shared(s,p)) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do s = 1, mo_num + do p = 1, mo_num + do r = 1, mo_num + + hessian(p,r,r,s) = hessian(p,r,r,s) + tmp_accu_sym_shared(p,s) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 2 +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (p==s) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * (one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * (one_e_dm_mo(r,u)) + enddo + endif + + enddo + enddo + enddo +enddo + +We can write the formula as matrix multiplication. +$$c_{r,q} = \sum_u a_{r,u} b_{u,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + call dgemm('T','N', mo_num, mo_num, mo_num, 1d0, mo_one_e_integrals,& + size(mo_one_e_integrals,1), one_e_dm_mo, size(one_e_dm_mo,1),& + 0d0, tmp_accu_shared, size(tmp_accu_shared,1)) + + !$OMP DO + do r = 1, mo_num + do q = 1, mo_num + + tmp_accu_sym_shared(q,r) = 0.5d0 * (tmp_accu_shared(q,r) + tmp_accu_shared(r,q)) + + enddo + enddo + !OMP END DO + + !$OMP DO + do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + + hessian(s,q,r,s) = hessian(s,q,r,s) + tmp_accu_sym_shared(q,r) + + enddo + enddo + enddo + !OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 3 + +Without optimization the third term is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s)) + + enddo + enddo + enddo +enddo + +We can just re-order the indexes + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q)& + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 3',t6 + !$OMP END MASTER + +#+END_SRC + +** Line 2, term 1 + +Without optimization the fourth term is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +Using bielectronic integral properties : +get_two_e_integral(s,t,u,v,mo_integrals_map) = +get_two_e_integral(u,v,s,t,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(p,t,u,v) = two_e_dm_mo(u,v,p,t) + +With t on the external loop, using temporary arrays for each t and by +taking u,v as one variable a matrix multplication appears. +$$c_{p,s} = \sum_{uv} a_{p,uv} b_{uv,s}$$ + +There is a kroenecker delta $$\delta_{qr}$$, so we juste compute the +terms like : hessian(p,r,r,s) + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + call wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num ! error, the p might be replace by a s + ! it's a temporary array, the result by replacing p and s will be the same + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1.d0, & + tmp_bi_int_3, mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do p = 1, mo_num + do s = 1, mo_num + + tmp_accu_sym(s,p) = 0.5d0 * (tmp_accu(p,s)+tmp_accu(s,p)) + + enddo + enddo + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + do p = 1, mo_num + + hessian(p,r,r,s) = hessian(p,r,r,s) + tmp_accu_sym(p,s) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6=t5-t4 + print*,'l2 1', t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 2 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +Using the two electron density matrix properties : +get_two_e_integral(q,t,u,v,mo_integrals_map) = +get_two_e_integral(u,v,q,t,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(r,t,u,v) = two_e_dm_mo(u,v,r,t) + +With t on the external loop, using temporary arrays for each t and by +taking u,v as one variable a matrix multplication appears. +$$c_{q,r} = \sum_uv a_{q,uv} b_{uv,r}$$ + +There is a kroenecker delta $$\delta_{ps}$$, so we juste compute the +terms like : hessian(s,q,r,s) + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !****************************** + ! Opt Second line, second term + !****************************** + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do t = 1, mo_num + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do r = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,r) = two_e_dm_mo(u,v,r,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1.d0, & + tmp_bi_int_3 , mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do r = 1, mo_num + do q = 1, mo_num + + tmp_accu_sym(q,r) = 0.5d0 * (tmp_accu(q,r) + tmp_accu(r,q)) + + enddo + enddo + + !$OMP CRITICAL + do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + + hessian(s,q,r,s) = hessian(s,q,r,s) + tmp_accu_sym(q,r) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l2 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 1 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + + enddo + enddo + enddo +enddo + +Using the two electron density matrix properties : +get_two_e_integral(u,v,p,r,mo_integrals_map) = +get_two_e_integral(p,r,u,v,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(u,v,q,s) = two_e_dm_mo(q,s,u,v) + +With v on the external loop, using temporary arrays for each v and by +taking p,r and q,s as one dimension a matrix multplication +appears. $$c_{pr,qs} = \sum_u a_{pr,u} b_{u,qs}$$ + +Part 1 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + call wall_TIME(t4) + !$OMP END MASTER + + !-------- + ! part 1 + ! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) + !-------- + + !$OMP DO + do v = 1, mo_num + + do u = 1, mo_num + do r = 1, mo_num + do p = 1, mo_num + + tmp_bi_int_3(p,r,u) = get_two_e_integral(p,r,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do s = 1, mo_num + do q = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,q,s) = two_e_dm_mo(q,s,u,v) + + enddo + enddo + enddo + + do s = 1, mo_num + + call dgemm('N','N',mo_num*mo_num, mo_num, mo_num, 1d0, tmp_bi_int_3,& + size(tmp_bi_int_3,1)*size(tmp_bi_int_3,2), tmp_2rdm_3(1,1,s),& + size(tmp_2rdm_3,1), 0d0, ind_3, size(ind_3,1) * size(ind_3,2)) + + !$OMP CRITICAL + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + hessian(p,q,r,s) = hessian(p,q,r,s) + ind_3(p,r,q) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + +#+END_SRC + +With v on the external loop, using temporary arrays for each v and by +taking q,s and p,r as one dimension a matrix multplication +appears. $$c_{qs,pr} = \sum_u a_{qs,u}*b_{u,pr}$$ + +Part 2 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !-------- + ! part 2 + ! get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + !-------- + + !$OMP DO + do v = 1, mo_num + + do u = 1, mo_num + do s = 1, mo_num + do q = 1, mo_num + + tmp_bi_int_3(q,s,u) = get_two_e_integral(q,s,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do r = 1, mo_num + do p = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,p,r) = two_e_dm_mo(p,r,u,v) + + enddo + enddo + enddo + + do r = 1, mo_num + call dgemm('N','N', mo_num*mo_num, mo_num, mo_num, 1d0, tmp_bi_int_3,& + size(tmp_bi_int_3,1)*size(tmp_bi_int_3,2), tmp_2rdm_3(1,1,r),& + size(tmp_2rdm_3,1), 0d0, ind_3, size(ind_3,1) * size(ind_3,2)) + + !$OMP CRITICAL + do s = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + hessian(p,q,r,s) = hessian(p,q,r,s) + ind_3(q,s,p) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5 - t4 + print*,'l3 1', t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 2 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + do t = 1, mo_num + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + enddo + enddo + enddo +enddo + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 1 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !-------- + ! Part 1 + ! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) + !-------- + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,r) = two_e_dm_mo(q,u,r,t) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,s) = - get_two_e_integral(u,s,t,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3,& + mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 2 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !-------- + ! Part 2 + !- get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) + !-------- + + !$OMP DO + do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,r) = two_e_dm_mo(q,u,t,r) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,s) = - get_two_e_integral(u,t,s,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3,& + mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 3 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !-------- + ! Part 3 + !- get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) + !-------- + + !$OMP DO + do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,r) = - get_two_e_integral(u,q,t,r,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,s) = two_e_dm_mo(p,u,s,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3,& + mo_num*mo_num, tmp_bi_int_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 4 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !-------- + ! Part 4 + ! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + !-------- + + !$OMP DO + do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,r) = - get_two_e_integral(u,t,r,q,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,s) = two_e_dm_mo(p,u,t,s) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3,& + mo_num*mo_num, tmp_bi_int_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5-t4 + print*,'l3 2',t6 + !$OMP END MASTER + + !$OMP MASTER + CALL wall_TIME(t2) + t3 = t2 -t1 + print*,'Time to compute the hessian : ', t3 + !$OMP END MASTER +#+END_SRC + +** Deallocation of private arrays +In the omp section ! +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + deallocate(tmp_bi_int_3, tmp_2rdm_3, tmp_accu, tmp_accu_sym, ind_3) +#+END_SRC + +** Permutations +As we mentioned before there are two permutation operator in the +formula : +Hessian(p,q,r,s) = P_pq P_rs [...] +=> Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5-t4 + print*,'Time for permutations :',t6 + !$OMP END MASTER +#+END_SRC + +** 4D -> 2D matrix +We need a 2D matrix for the Newton method's. Since the Hessian is +"antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +with p 2D :',t6 + !$OMP END MASTER + + !$OMP END PARALLEL + call omp_set_max_active_levels(4) + + ! Display + if (debug) then + print*,'2D Hessian matrix' + do pq = 1, n + write(*,'(100(F10.5))') H(pq,:) + enddo + endif +#+END_SRC + +** Deallocation of shared arrays, end +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + deallocate(hessian)!,h_tmpr) +! h_tmpr is intent out in order to debug the subroutine +! It's why we don't deallocate it + + print*,'---End hessian---' + +end subroutine +#+END_SRC + diff --git a/src/mo_optimization/org/my_providers.org b/src/mo_optimization/org/my_providers.org new file mode 100644 index 00000000..b88cbd62 --- /dev/null +++ b/src/mo_optimization/org/my_providers.org @@ -0,0 +1,308 @@ +* Providers +** Dimensions of MOs + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ integer, n_mo_dim ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of MOs we can build, + ! with i>j + END_DOC + + n_mo_dim = mo_num*(mo_num-1)/2 + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ integer, n_mo_dim_core ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of core MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_core = dim_list_core_orb*(dim_list_core_orb-1)/2 + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ integer, n_mo_dim_act ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of active MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_act = dim_list_act_orb*(dim_list_act_orb-1)/2 + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ integer, n_mo_dim_inact ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of inactive MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_inact = dim_list_inact_orb*(dim_list_inact_orb-1)/2 + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ integer, n_mo_dim_virt ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of virtual MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_virt = dim_list_virt_orb*(dim_list_virt_orb-1)/2 + +END_PROVIDER +#+END_SRC + +** Energies/criterions +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_st_av_energy ] + implicit none + BEGIN_DOC + ! State average CI energy + END_DOC + + !call update_st_av_ci_energy(my_st_av_energy) + call state_average_energy(my_st_av_energy) + +END_PROVIDER +#+END_SRC + +** Gradient/hessian +*** Orbital optimization +**** With all the MOs +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_opt, (n_mo_dim) ] +&BEGIN_PROVIDER [ double precision, my_CC1_opt ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, for all the MOs. + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + PROVIDE mo_two_e_integrals_in_map + + call gradient_opt(n_mo_dim, my_gradient_opt, my_CC1_opt, norm_grad) + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_opt, (n_mo_dim, n_mo_dim) ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, for all the MOs. + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision, allocatable :: h_f(:,:,:,:) + + PROVIDE mo_two_e_integrals_in_map + + allocate(h_f(mo_num, mo_num, mo_num, mo_num)) + + call hessian_list_opt(n_mo_dim, my_hessian_opt, h_f) + +END_PROVIDER +#+END_SRC + +**** With the list of active MOs +Can be generalized to any mo_class by changing the list/dimension +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_list_opt, (n_mo_dim_act) ] +&BEGIN_PROVIDER [ double precision, my_CC2_opt ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, only for the active MOs ! + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + PROVIDE mo_two_e_integrals_in_map !one_e_dm_mo two_e_dm_mo mo_one_e_integrals + + call gradient_list_opt(n_mo_dim_act, dim_list_act_orb, list_act, my_gradient_list_opt, my_CC2_opt, norm_grad) + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_list_opt, (n_mo_dim_act, n_mo_dim_act) ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, only for the active MOs ! + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision, allocatable :: h_f(:,:,:,:) + + PROVIDE mo_two_e_integrals_in_map + + allocate(h_f(dim_list_act_orb, dim_list_act_orb, dim_list_act_orb, dim_list_act_orb)) + + call hessian_list_opt(n_mo_dim_act, dim_list_act_orb, list_act, my_hessian_list_opt, h_f) + +END_PROVIDER +#+END_SRC + +*** Orbital localization +**** Gradient +***** Core MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_loc_core, (n_mo_dim_core) ] +&BEGIN_PROVIDER [ double precision, my_CC_loc_core ] + implicit none + BEGIN_DOC + ! - Gradient of the MO localization with respect to the MO rotations for the core MOs + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + !PROVIDE something ? + + call gradient_localization(n_mo_dim_core, dim_list_core_orb, list_core, my_gradient_loc_core, my_CC_loc_core , norm_grad) + +END_PROVIDER +#+END_SRC + +***** Active MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_loc_act, (n_mo_dim_act) ] +&BEGIN_PROVIDER [ double precision, my_CC_loc_act ] + implicit none + BEGIN_DOC + ! - Gradient of the MO localization with respect to the MO rotations for the active MOs + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + !PROVIDE something ? + + call gradient_localization(n_mo_dim_act, dim_list_act_orb, list_act, my_gradient_loc_act, my_CC_loc_act , norm_grad) + +END_PROVIDER +#+END_SRC + +***** Inactive MOs +#+BEGIN_SRC f90 :comments org ! +:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_loc_inact, (n_mo_dim_inact) ] +&BEGIN_PROVIDER [ double precision, my_CC_loc_inact ] + implicit none + BEGIN_DOC + ! - Gradient of the MO localization with respect to the MO rotations for the inactive MOs + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + !PROVIDE something ? + + call gradient_localization(n_mo_dim_inact, dim_list_inact_orb, list_inact, my_gradient_loc_inact, my_CC_loc_inact , norm_grad) + +END_PROVIDER +#+END_SRC + +***** Virtual MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_loc_virt, (n_mo_dim_virt) ] +&BEGIN_PROVIDER [ double precision, my_CC_loc_virt ] + implicit none + BEGIN_DOC + ! - Gradient of the MO localization with respect to the MO rotations for the virtual MOs + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + !PROVIDE something ? + + call gradient_localization(n_mo_dim_virt, dim_list_virt_orb, list_virt, my_gradient_loc_virt, my_CC_loc_virt , norm_grad) + +END_PROVIDER +#+END_SRC + +**** Hessian +***** Core MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_loc_core, (n_mo_dim_core) ] + implicit none + BEGIN_DOC + ! - Hessian of the MO localization with respect to the MO rotations for the core MOs + END_DOC + + !PROVIDE something ? + + call hessian_localization(n_mo_dim_core, dim_list_core_orb, list_core, my_hessian_loc_core) + +END_PROVIDER +#+END_SRC + +***** Active MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_loc_act, (n_mo_dim_act) ] + implicit none + BEGIN_DOC + ! - Hessian of the MO localization with respect to the MO rotations for the active MOs + END_DOC + + !PROVIDE something ? + + call hessian_localization(n_mo_dim_act, dim_list_act_orb, list_act, my_hessian_loc_act) + +END_PROVIDER +#+END_SRC + +***** Inactive MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_loc_inact, (n_mo_dim_inact) ] + implicit none + BEGIN_DOC + ! - Hessian of the MO localization with respect to the MO rotations for the inactive MOs + END_DOC + + !PROVIDE something ? + + call hessian_localization(n_mo_dim_inact, dim_list_inact_orb, list_inact, my_hessian_loc_inact) + +END_PROVIDER +#+END_SRC + +***** Virtual MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_loc_virt, (n_mo_dim_virt) ] + implicit none + BEGIN_DOC + ! - Hessian of the MO localization with respect to the MO rotations for the virtual MOs + END_DOC + + !PROVIDE something ? + + call hessian_localization(n_mo_dim_virt, dim_list_virt_orb, list_virt, my_hessian_loc_virt) + +END_PROVIDER +#+END_SRC + diff --git a/src/mo_optimization/org/optimization.org b/src/mo_optimization/org/optimization.org new file mode 100644 index 00000000..cbb7b752 --- /dev/null +++ b/src/mo_optimization/org/optimization.org @@ -0,0 +1,91 @@ +#+BEGIN_SRC f90 :comments org :tangle optimization.irp.f +program optimization + + read_wf = .true. ! must be True for the orbital optimization !!! + TOUCH read_wf + call run_optimization + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle optimization.irp.f +subroutine run_optimization + + implicit none + + double precision :: e_cipsi, e_opt, delta_e + integer :: nb_iter,i + logical :: not_converged + character (len=100) :: filename + + PROVIDE psi_det psi_coef mo_two_e_integrals_in_map + + not_converged = .True. + nb_iter = 0 + + ! To start from the wf + N_det_max = max(n_det,5) + TOUCH N_det_max + + open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt') + write(10,*) " Ndet E_cipsi E_opt Delta_e" + call state_average_energy(e_cipsi) + write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_cipsi, 0d0 + close(10) + + do while (not_converged) + print*,'' + print*,'======================' + print*,' Cipsi step:', nb_iter + print*,'======================' + print*,'' + print*,'********** cipsi step **********' + ! cispi calculation + call run_stochastic_cipsi + + ! State average energy after the cipsi step + call state_average_energy(e_cipsi) + + print*,'' + print*,'********** optimization step **********' + ! orbital optimization + call run_orb_opt_trust_v2 + + ! State average energy after the orbital optimization + call state_average_energy(e_opt) + + print*,'' + print*,'********** diff step **********' + ! Gain in energy + delta_e = e_opt - e_cipsi + print*, 'Gain in energy during the orbital optimization:', delta_e + + open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt', position='append') + write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_opt, delta_e + close(10) + + ! Exit + if (delta_e > 1d-12) then + print*, 'WARNING, something wrong happened' + print*, 'The gain (delta_e) in energy during the optimization process' + print*, 'is > 0, but it must be < 0' + print*, 'The program will exit' + exit + endif + + if (n_det > n_det_max_opt) then + print*, 'The number of determinants in the wf > n_det_max_opt' + print*, 'The program will exit' + exit + endif + + ! To double the number of determinants in the wf + N_det_max = int(dble(n_det * 2)*0.9) + TOUCH N_det_max + + nb_iter = nb_iter + 1 + enddo + +end + +#+END_SRC diff --git a/src/mo_optimization/org/orb_opt_trust_v2.org b/src/mo_optimization/org/orb_opt_trust_v2.org new file mode 100644 index 00000000..dbcd3c19 --- /dev/null +++ b/src/mo_optimization/org/orb_opt_trust_v2.org @@ -0,0 +1,349 @@ +* Orbital optimization program + +This is an optimization program for molecular orbitals. It produces +orbital rotations in order to lower the energy of a truncated wave +function. +This program just optimize the orbitals for a fixed number of +determinants. This optimization process must be repeated for different +number of determinants. + +#+BEGIN_SRC f90 :comments org :tangle orb_opt.irp.f +#+END_SRC + +* Main program : orb_opt_trust + +#+BEGIN_SRC f90 :comments org :tangle orb_opt.irp.f +program orb_opt + read_wf = .true. ! must be True for the orbital optimization !!! + TOUCH read_wf + io_mo_two_e_integrals = 'None' + TOUCH io_mo_two_e_integrals + call run_orb_opt_trust_v2 +end +#+END_SRC + +* Subroutine : run_orb_opt_trust + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f + +#+END_SRC + +Subroutine to optimize the MOs using a trust region algorithm: +- choice of the method +- initialization +- optimization until convergence + +The optimization use the trust region algorithm, the different parts +are explained in the corresponding subroutine files. + +qp_edit: +| thresh_opt_max_elem_grad | +| optimization_max_nb_iter | +| optimization_method | + +Provided: +| mo_num | integer | number of MOs | +| ao_num | integer | number of AOs | +| N_states | integer | number of states | +| ci_energy(N_states) | double precision | CI energies | +| state_average_weight(N_states) | double precision | Weight of the different states | + +Variables: +| m | integer | number of active MOs | +| tmp_n | integer | m*(m-1)/2, number of MO parameters | +| tmp_n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +| v_grad(tmp_n) | double precision | gradient | +| H(tmp_n,tmp_n) | double precision | hessian (2D) | +| h_f(m,m,m,m) | double precision | hessian (4D) | +| e_val(m) | double precision | eigenvalues of the hessian | +| w(m,m) | double precision | eigenvectors of the hessian | +| x(m) | double precision | step given by the trust region | +| m_x(m,m) | double precision | step given by the trust region after | +| tmp_R(m,m) | double precision | rotation matrix for active MOs | +| R(mo_num,mo_num) | double precision | full rotation matrix | +| prev_mos(ao_num,mo_num) | double precision | previous MOs (before the rotation) | +| new_mos(ao_num,mo_num) | double precision | new MOs (after the roration) | +| delta | double precision | radius of the trust region | +| rho | double precision | agreement between the model and the exact function | +| max_elem | double precision | maximum element in the gradient | +| i | integer | index | +| tmp_i,tmp_j | integer | indexes in the subspace containing only | +| | | the active MOs | +| converged | logical | convergence of the algorithm | +| cancel_step | logical | if the step must be cancelled | +| nb_iter | integer | number of iterations (accepted) | +| nb_diag | integer | number of diagonalizations of the CI matrix | +| nb_cancel | integer | number of cancelled steps for the actual iteration | +| nb_cancel_tot | integer | total number of cancel steps | +| info | integer | if 0 ok, else problem in the diagonalization of | +| | | the hessian with the Lapack routine | +| criterion | double precision | energy at a given step | +| prev_criterion | double precision | energy before the rotation | +| criterion_model | double precision | estimated energy after the rotation using | +| | | a Taylor series | +| must_exit | logical | To exit the trust region algorithm when | +| | | criterion - criterion_model is too small | +| enforce_step_cancellation | logical | To force the cancellation of the step if the | +| | | error in the rotation matrix is too large | + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f +subroutine run_orb_opt_trust_v2 + + include 'constants.h' + + implicit none + + BEGIN_DOC + ! Orbital optimization + END_DOC + + ! Variables + + double precision, allocatable :: R(:,:) + double precision, allocatable :: H(:,:),h_f(:,:,:,:) + double precision, allocatable :: v_grad(:) + double precision, allocatable :: prev_mos(:,:),new_mos(:,:) + integer :: info + integer :: n + integer :: i,j,p,q,k + double precision :: max_elem_grad, delta, rho, norm_grad, normalization_factor + logical :: cancel_step + integer :: nb_iter, nb_diag, nb_cancel, nb_cancel_tot, nb_sub_iter + double precision :: t1, t2, t3 + double precision :: prev_criterion, criterion, criterion_model + logical :: not_converged, must_exit, enforce_step_cancellation + integer :: m, tmp_n, tmp_i, tmp_j, tmp_k, tmp_n2 + integer,allocatable :: tmp_list(:), key(:) + double precision, allocatable :: tmp_m_x(:,:),tmp_R(:,:), tmp_x(:), W(:,:), e_val(:) + + PROVIDE mo_two_e_integrals_in_map ci_energy psi_det psi_coef +#+END_SRC + +** Allocation + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f + allocate(R(mo_num,mo_num)) ! rotation matrix + allocate(prev_mos(ao_num,mo_num), new_mos(ao_num,mo_num)) ! old and new MOs + + ! Definition of m and tmp_n + m = dim_list_act_orb + tmp_n = m*(m-1)/2 + + allocate(tmp_list(m)) + allocate(tmp_R(m,m), tmp_m_x(m,m), tmp_x(tmp_n)) + allocate(e_val(tmp_n),key(tmp_n),v_grad(tmp_n)) + +#+END_SRC + +** Method + There are three different methods : + - the "full" hessian, which uses all the elements of the hessian + matrix" + - the "diagonal" hessian, which uses only the diagonal elements of the + hessian + - without the hessian (hessian = identity matrix) + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f + !Display the method + print*, 'Method :', optimization_method + if (optimization_method == 'full') then + print*, 'Full hessian' + allocate(H(tmp_n,tmp_n), h_f(m,m,m,m),W(tmp_n,tmp_n)) + tmp_n2 = tmp_n + elseif (optimization_method == 'diag') then + print*,'Diagonal hessian' + allocate(H(tmp_n,1),W(tmp_n,1)) + tmp_n2 = 1 + elseif (optimization_method == 'none') then + print*,'No hessian' + allocate(H(tmp_n,1),W(tmp_n,1)) + tmp_n2 = 1 + else + print*,'Unknown optimization_method, please select full, diag or none' + call abort + endif + print*, 'Absolute value of the hessian:', absolute_eig +#+END_SRC + +** Calculations +*** Algorithm + +Here is the main algorithm of the optimization: +- First of all we initialize some parameters and we compute the + criterion (the ci energy) before doing any MO rotations +- We compute the gradient and the hessian for the active MOs +- We diagonalize the hessian +- We compute a step and loop to reduce the radius of the + trust region (and the size of the step by the way) until the step is + accepted +- We repeat the process until the convergence + NB: the convergence criterion can be changed + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f + ! Loop until the convergence of the optimization + ! call diagonalize_ci + + !### Initialization ### + nb_iter = 0 + rho = 0.5d0 + not_converged = .True. + tmp_list = list_act ! Optimization of the active MOs + nb_cancel_tot = 0 + + ! Renormalization of the weights of the states + call state_weight_normalization + + ! Compute the criterion before the loop + call state_average_energy(prev_criterion) + + do while (not_converged) + print*,'' + print*,'******************' + print*,'Iteration', nb_iter + print*,'******************' + print*,'' + + ! Gradient + call gradient_list_opt(tmp_n, m, tmp_list, v_grad, max_elem_grad, norm_grad) + + ! Hessian + if (optimization_method == 'full') then + ! Full hessian + call hessian_list_opt(tmp_n, m, tmp_list, H, h_f) + + ! Diagonalization of the hessian + call diagonalization_hessian(tmp_n, H, e_val, w) + + elseif (optimization_method == 'diag') then + ! Diagonal hessian + call diag_hessian_list_opt(tmp_n, m, tmp_list, H) + else + ! Identity matrix + do tmp_i = 1, tmp_n + H(tmp_i,1) = 1d0 + enddo + endif + + if (optimization_method /= 'full') then + ! Sort + do tmp_i = 1, tmp_n + key(tmp_i) = tmp_i + e_val(tmp_i) = H(tmp_i,1) + enddo + call dsort(e_val,key,tmp_n) + + ! Eigenvalues and eigenvectors + do tmp_i = 1, tmp_n + w(tmp_i,1) = dble(key(tmp_i)) + enddo + + endif + + ! Init before the internal loop + cancel_step = .True. ! To enter in the loop just after + nb_cancel = 0 + nb_sub_iter = 0 + + ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + print*,'' + print*,'-----------------------------' + print*,'Iteration: ', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'Max elem grad:', max_elem_grad + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n,tmp_n2,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit) + + if (must_exit) then + print*,'step_in_trust_region sends: Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, m, tmp_x, tmp_m_x) + + ! Rotation matrix for the active MOs + call rotation_matrix(tmp_m_x, m, tmp_R, m, m, info, enforce_step_cancellation) + + ! Security to ensure an unitary transformation + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(m, tmp_list, tmp_R, R) + + ! MO rotations + call apply_mo_rotation(R, prev_mos) + + ! Update of the energy before the diagonalization of the hamiltonian + call clear_mo_map + TOUCH mo_coef psi_det psi_coef ci_energy two_e_dm_mo + call state_average_energy(criterion) + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step) + + ! Cancellation of the step if necessary + if (cancel_step) then + mo_coef = prev_mos + call save_mos() + nb_cancel = nb_cancel + 1 + nb_cancel_tot = nb_cancel_tot + 1 + else + ! Diagonalization of the hamiltonian + FREE ci_energy! To enforce the recomputation + call diagonalize_ci + call save_wavefunction_unsorted + + ! Energy obtained after the diagonalization of the CI matrix + call state_average_energy(prev_criterion) + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + call save_mos() !### depend of the time for 1 iteration + + ! To exit the external loop if must_exit = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! External loop exit conditions + if (DABS(max_elem_grad) < thresh_opt_max_elem_grad) then + print*,'Converged: DABS(max_elem_grad) < thresh_opt_max_elem_grad' + not_converged = .False. + endif + if (nb_iter >= optimization_max_nb_iter) then + print*,'Not converged: nb_iter >= optimization_max_nb_iter' + not_converged = .False. + endif + + if (.not. not_converged) then + print*,'#############################' + print*,' End of the optimization' + print*,'#############################' + endif + enddo + +#+END_SRC + +** Deallocation, end + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f + deallocate(v_grad,H,R,W,e_val) + deallocate(prev_mos,new_mos) + if (optimization_method == 'full') then + deallocate(h_f) + endif + +end +#+END_SRC + diff --git a/src/mo_optimization/org/state_average_energy.org b/src/mo_optimization/org/state_average_energy.org new file mode 100644 index 00000000..b779845a --- /dev/null +++ b/src/mo_optimization/org/state_average_energy.org @@ -0,0 +1,73 @@ +* State average energy + +Calculation of the state average energy from the integrals and the +density matrices. + +\begin{align*} +E = \sum_{ij} h_{ij} \gamma_{ij} + \frac{1}{2} v_{ij}^{kl} \Gamma_{ij}^{kl} +\end{align*} +$h_{ij}$: mono-electronic integral +$\gamma_{ij}$: one electron density matrix +$v_{ij}^{kl}$: bi-electronic integral +$\Gamma_{ij}^{kl}$: two electrons density matrix + +TODO: OMP version + +PROVIDED: +| mo_one_e_integrals | double precision | mono-electronic integrals | +| get_two_e_integral | double precision | bi-electronic integrals | +| one_e_dm_mo | double precision | one electron density matrix | +| two_e_dm_mo | double precision | two electrons density matrix | +| nuclear_repulsion | double precision | nuclear repulsion | +| mo_num | integer | number of MOs | + +Output: +| energy | double precision | state average energy | + +Internal: +| mono_e | double precision | mono-electronic energy | +| bi_e | double precision | bi-electronic energy | +| i,j,k,l | integer | indexes to loop over the MOs | + +#+BEGIN_SRC f90 :comments org :tangle state_average_energy.irp.f +subroutine state_average_energy(energy) + + implicit none + + double precision, intent(out) :: energy + + double precision :: get_two_e_integral + double precision :: mono_e, bi_e + integer :: i,j,k,l + + ! mono electronic part + mono_e = 0d0 + do j = 1, mo_num + do i = 1, mo_num + mono_e = mono_e + mo_one_e_integrals(i,j) * one_e_dm_mo(i,j) + enddo + enddo + + ! bi electronic part + bi_e = 0d0 + do l = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do i = 1, mo_num + bi_e = bi_e + get_two_e_integral(i,j,k,l,mo_integrals_map) * two_e_dm_mo(i,j,k,l) + enddo + enddo + enddo + enddo + + ! State average energy + energy = mono_e + 0.5d0 * bi_e + nuclear_repulsion + + ! Check + !call print_energy_components + + print*,'State average energy:', energy + !print*,ci_energy + +end +#+END_SRC diff --git a/src/mo_optimization/org/state_weight_normalization.org b/src/mo_optimization/org/state_weight_normalization.org new file mode 100644 index 00000000..492ad3d4 --- /dev/null +++ b/src/mo_optimization/org/state_weight_normalization.org @@ -0,0 +1,31 @@ +#+BEGIN_SRC f90 :comments org :tangle state_weight_normalization.irp.f +subroutine state_weight_normalization + + implicit none + + BEGIN_DOC + ! Renormalization of the state weights or enforcing state average + ! weights for orbital optimization + END_DOC + + integer :: i + double precision :: normalization_factor + + ! To normalize the sum of the state weights + normalization_factor = 0d0 + do i = 1, N_states + normalization_factor = normalization_factor + state_average_weight(i) + enddo + normalization_factor = 1d0 / normalization_factor + + do i = 1, N_states + state_average_weight(i) = state_average_weight(i) * normalization_factor + enddo + TOUCH state_average_weight + + print*, 'Number of states:', N_states + print*, 'State average weights:' + print*, state_average_weight(:) + +end +#+END_SRC diff --git a/src/mo_optimization/org/update_parameters.org b/src/mo_optimization/org/update_parameters.org new file mode 100644 index 00000000..cd9b9595 --- /dev/null +++ b/src/mo_optimization/org/update_parameters.org @@ -0,0 +1,16 @@ +Subroutine toupdate the parameters. +Ex: TOUCH mo_coef ... + +#+BEGIN_SRC f90 :comments org :tangle update_parameters.irp.f +subroutine update_parameters() + + implicit none + + !### TODO + ! Touch yours parameters + call clear_mo_map + TOUCH mo_coef psi_det psi_coef + call diagonalize_ci + call save_wavefunction_unsorted +end +#+END_SRC diff --git a/src/mo_optimization/org/update_st_av_ci_energy.org b/src/mo_optimization/org/update_st_av_ci_energy.org new file mode 100644 index 00000000..2dc7f3ee --- /dev/null +++ b/src/mo_optimization/org/update_st_av_ci_energy.org @@ -0,0 +1,26 @@ +* Update the CI state average energy + +Computes the state average energy +\begin{align*} +E =\sum_{i=1}^{N_{states}} E_i . w_i +\end{align*} + +$E_i$: energy of state i +$w_i$: weight of state i + +#+BEGIN_SRC f90 :comments org :tangle update_st_av_ci_energy.irp.f +subroutine update_st_av_ci_energy(energy) + + implicit none + + double precision, intent(out) :: energy + integer :: i + + energy = 0d0 + do i = 1, N_states + energy = energy + ci_energy(i) * state_average_weight(i) + enddo + + print*, 'ci_energy :', energy +end +#+END_SRC diff --git a/src/mo_optimization/run_orb_opt_trust_v2.irp.f b/src/mo_optimization/run_orb_opt_trust_v2.irp.f new file mode 100644 index 00000000..e1431255 --- /dev/null +++ b/src/mo_optimization/run_orb_opt_trust_v2.irp.f @@ -0,0 +1,317 @@ +! Subroutine : run_orb_opt_trust + + + + + + +! Subroutine to optimize the MOs using a trust region algorithm: +! - choice of the method +! - initialization +! - optimization until convergence + +! The optimization use the trust region algorithm, the different parts +! are explained in the corresponding subroutine files. + +! qp_edit: +! | thresh_opt_max_elem_grad | +! | optimization_max_nb_iter | +! | optimization_method | + +! Provided: +! | mo_num | integer | number of MOs | +! | ao_num | integer | number of AOs | +! | N_states | integer | number of states | +! | ci_energy(N_states) | double precision | CI energies | +! | state_average_weight(N_states) | double precision | Weight of the different states | + +! Variables: +! | m | integer | number of active MOs | +! | tmp_n | integer | m*(m-1)/2, number of MO parameters | +! | tmp_n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +! | v_grad(tmp_n) | double precision | gradient | +! | H(tmp_n,tmp_n) | double precision | hessian (2D) | +! | h_f(m,m,m,m) | double precision | hessian (4D) | +! | e_val(m) | double precision | eigenvalues of the hessian | +! | w(m,m) | double precision | eigenvectors of the hessian | +! | x(m) | double precision | step given by the trust region | +! | m_x(m,m) | double precision | step given by the trust region after | +! | tmp_R(m,m) | double precision | rotation matrix for active MOs | +! | R(mo_num,mo_num) | double precision | full rotation matrix | +! | prev_mos(ao_num,mo_num) | double precision | previous MOs (before the rotation) | +! | new_mos(ao_num,mo_num) | double precision | new MOs (after the roration) | +! | delta | double precision | radius of the trust region | +! | rho | double precision | agreement between the model and the exact function | +! | max_elem | double precision | maximum element in the gradient | +! | i | integer | index | +! | tmp_i,tmp_j | integer | indexes in the subspace containing only | +! | | | the active MOs | +! | converged | logical | convergence of the algorithm | +! | cancel_step | logical | if the step must be cancelled | +! | nb_iter | integer | number of iterations (accepted) | +! | nb_diag | integer | number of diagonalizations of the CI matrix | +! | nb_cancel | integer | number of cancelled steps for the actual iteration | +! | nb_cancel_tot | integer | total number of cancel steps | +! | info | integer | if 0 ok, else problem in the diagonalization of | +! | | | the hessian with the Lapack routine | +! | criterion | double precision | energy at a given step | +! | prev_criterion | double precision | energy before the rotation | +! | criterion_model | double precision | estimated energy after the rotation using | +! | | | a Taylor series | +! | must_exit | logical | To exit the trust region algorithm when | +! | | | criterion - criterion_model is too small | +! | enforce_step_cancellation | logical | To force the cancellation of the step if the | +! | | | error in the rotation matrix is too large | + + +subroutine run_orb_opt_trust_v2 + + include 'constants.h' + + implicit none + + BEGIN_DOC + ! Orbital optimization + END_DOC + + ! Variables + + double precision, allocatable :: R(:,:) + double precision, allocatable :: H(:,:),h_f(:,:,:,:) + double precision, allocatable :: v_grad(:) + double precision, allocatable :: prev_mos(:,:),new_mos(:,:) + integer :: info + integer :: n + integer :: i,j,p,q,k + double precision :: max_elem_grad, delta, rho, norm_grad, normalization_factor + logical :: cancel_step + integer :: nb_iter, nb_diag, nb_cancel, nb_cancel_tot, nb_sub_iter + double precision :: t1, t2, t3 + double precision :: prev_criterion, criterion, criterion_model + logical :: not_converged, must_exit, enforce_step_cancellation + integer :: m, tmp_n, tmp_i, tmp_j, tmp_k, tmp_n2 + integer,allocatable :: tmp_list(:), key(:) + double precision, allocatable :: tmp_m_x(:,:),tmp_R(:,:), tmp_x(:), W(:,:), e_val(:) + + PROVIDE mo_two_e_integrals_in_map ci_energy psi_det psi_coef + +! Allocation + + +allocate(R(mo_num,mo_num)) ! rotation matrix +allocate(prev_mos(ao_num,mo_num), new_mos(ao_num,mo_num)) ! old and new MOs + +! Definition of m and tmp_n +m = dim_list_act_orb +tmp_n = m*(m-1)/2 + +allocate(tmp_list(m)) +allocate(tmp_R(m,m), tmp_m_x(m,m), tmp_x(tmp_n)) +allocate(e_val(tmp_n),key(tmp_n),v_grad(tmp_n)) + +! Method +! There are three different methods : +! - the "full" hessian, which uses all the elements of the hessian +! matrix" +! - the "diagonal" hessian, which uses only the diagonal elements of the +! hessian +! - without the hessian (hessian = identity matrix) + + +!Display the method + print*, 'Method :', optimization_method +if (optimization_method == 'full') then + print*, 'Full hessian' + allocate(H(tmp_n,tmp_n), h_f(m,m,m,m),W(tmp_n,tmp_n)) + tmp_n2 = tmp_n +elseif (optimization_method == 'diag') then + print*,'Diagonal hessian' + allocate(H(tmp_n,1),W(tmp_n,1)) + tmp_n2 = 1 +elseif (optimization_method == 'none') then + print*,'No hessian' + allocate(H(tmp_n,1),W(tmp_n,1)) + tmp_n2 = 1 +else + print*,'Unknown optimization_method, please select full, diag or none' + call abort +endif +print*, 'Absolute value of the hessian:', absolute_eig + +! Algorithm + +! Here is the main algorithm of the optimization: +! - First of all we initialize some parameters and we compute the +! criterion (the ci energy) before doing any MO rotations +! - We compute the gradient and the hessian for the active MOs +! - We diagonalize the hessian +! - We compute a step and loop to reduce the radius of the +! trust region (and the size of the step by the way) until the step is +! accepted +! - We repeat the process until the convergence +! NB: the convergence criterion can be changed + + +! Loop until the convergence of the optimization +! call diagonalize_ci + +!### Initialization ### +nb_iter = 0 +rho = 0.5d0 +not_converged = .True. +tmp_list = list_act ! Optimization of the active MOs +nb_cancel_tot = 0 + +! Renormalization of the weights of the states +call state_weight_normalization + +! Compute the criterion before the loop +call state_average_energy(prev_criterion) + +do while (not_converged) + print*,'' + print*,'******************' + print*,'Iteration', nb_iter + print*,'******************' + print*,'' + + ! Gradient + call gradient_list_opt(tmp_n, m, tmp_list, v_grad, max_elem_grad, norm_grad) + + ! Hessian + if (optimization_method == 'full') then + ! Full hessian + call hessian_list_opt(tmp_n, m, tmp_list, H, h_f) + + ! Diagonalization of the hessian + call diagonalization_hessian(tmp_n, H, e_val, w) + + elseif (optimization_method == 'diag') then + ! Diagonal hessian + call diag_hessian_list_opt(tmp_n, m, tmp_list, H) + else + ! Identity matrix + do tmp_i = 1, tmp_n + H(tmp_i,1) = 1d0 + enddo + endif + + if (optimization_method /= 'full') then + ! Sort + do tmp_i = 1, tmp_n + key(tmp_i) = tmp_i + e_val(tmp_i) = H(tmp_i,1) + enddo + call dsort(e_val,key,tmp_n) + + ! Eigenvalues and eigenvectors + do tmp_i = 1, tmp_n + w(tmp_i,1) = dble(key(tmp_i)) + enddo + + endif + + ! Init before the internal loop + cancel_step = .True. ! To enter in the loop just after + nb_cancel = 0 + nb_sub_iter = 0 + + ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + print*,'' + print*,'-----------------------------' + print*,'Iteration: ', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'Max elem grad:', max_elem_grad + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n,tmp_n2,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit) + + if (must_exit) then + print*,'step_in_trust_region sends: Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, m, tmp_x, tmp_m_x) + + ! Rotation matrix for the active MOs + call rotation_matrix(tmp_m_x, m, tmp_R, m, m, info, enforce_step_cancellation) + + ! Security to ensure an unitary transformation + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(m, tmp_list, tmp_R, R) + + ! MO rotations + call apply_mo_rotation(R, prev_mos) + + ! Update of the energy before the diagonalization of the hamiltonian + call clear_mo_map + TOUCH mo_coef psi_det psi_coef ci_energy two_e_dm_mo + call state_average_energy(criterion) + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step) + + ! Cancellation of the step if necessary + if (cancel_step) then + mo_coef = prev_mos + call save_mos() + nb_cancel = nb_cancel + 1 + nb_cancel_tot = nb_cancel_tot + 1 + else + ! Diagonalization of the hamiltonian + FREE ci_energy! To enforce the recomputation + call diagonalize_ci + call save_wavefunction_unsorted + + ! Energy obtained after the diagonalization of the CI matrix + call state_average_energy(prev_criterion) + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + call save_mos() !### depend of the time for 1 iteration + + ! To exit the external loop if must_exit = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! External loop exit conditions + if (DABS(max_elem_grad) < thresh_opt_max_elem_grad) then + print*,'Converged: DABS(max_elem_grad) < thresh_opt_max_elem_grad' + not_converged = .False. + endif + if (nb_iter >= optimization_max_nb_iter) then + print*,'Not converged: nb_iter >= optimization_max_nb_iter' + not_converged = .False. + endif + + if (.not. not_converged) then + print*,'#############################' + print*,' End of the optimization' + print*,'#############################' + endif +enddo + +! Deallocation, end + + +deallocate(v_grad,H,R,W,e_val) + deallocate(prev_mos,new_mos) + if (optimization_method == 'full') then + deallocate(h_f) + endif + +end diff --git a/src/mo_optimization/save_energy.irp.f b/src/mo_optimization/save_energy.irp.f new file mode 100644 index 00000000..5dac8da9 --- /dev/null +++ b/src/mo_optimization/save_energy.irp.f @@ -0,0 +1,9 @@ +subroutine save_energy(E,pt2) + implicit none + BEGIN_DOC +! Saves the energy in |EZFIO|. + END_DOC + double precision, intent(in) :: E(N_states), pt2(N_states) + call ezfio_set_fci_energy(E(1:N_states)) + call ezfio_set_fci_energy_pt2(E(1:N_states)+pt2(1:N_states)) +end diff --git a/src/mo_optimization/state_average_energy.irp.f b/src/mo_optimization/state_average_energy.irp.f new file mode 100644 index 00000000..2cd063da --- /dev/null +++ b/src/mo_optimization/state_average_energy.irp.f @@ -0,0 +1,72 @@ +! State average energy + +! Calculation of the state average energy from the integrals and the +! density matrices. + +! \begin{align*} +! E = \sum_{ij} h_{ij} \gamma_{ij} + \frac{1}{2} v_{ij}^{kl} \Gamma_{ij}^{kl} +! \end{align*} +! $h_{ij}$: mono-electronic integral +! $\gamma_{ij}$: one electron density matrix +! $v_{ij}^{kl}$: bi-electronic integral +! $\Gamma_{ij}^{kl}$: two electrons density matrix + +! TODO: OMP version + +! PROVIDED: +! | mo_one_e_integrals | double precision | mono-electronic integrals | +! | get_two_e_integral | double precision | bi-electronic integrals | +! | one_e_dm_mo | double precision | one electron density matrix | +! | two_e_dm_mo | double precision | two electrons density matrix | +! | nuclear_repulsion | double precision | nuclear repulsion | +! | mo_num | integer | number of MOs | + +! Output: +! | energy | double precision | state average energy | + +! Internal: +! | mono_e | double precision | mono-electronic energy | +! | bi_e | double precision | bi-electronic energy | +! | i,j,k,l | integer | indexes to loop over the MOs | + + +subroutine state_average_energy(energy) + + implicit none + + double precision, intent(out) :: energy + + double precision :: get_two_e_integral + double precision :: mono_e, bi_e + integer :: i,j,k,l + + ! mono electronic part + mono_e = 0d0 + do j = 1, mo_num + do i = 1, mo_num + mono_e = mono_e + mo_one_e_integrals(i,j) * one_e_dm_mo(i,j) + enddo + enddo + + ! bi electronic part + bi_e = 0d0 + do l = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do i = 1, mo_num + bi_e = bi_e + get_two_e_integral(i,j,k,l,mo_integrals_map) * two_e_dm_mo(i,j,k,l) + enddo + enddo + enddo + enddo + + ! State average energy + energy = mono_e + 0.5d0 * bi_e + nuclear_repulsion + + ! Check + !call print_energy_components + + print*,'State average energy:', energy + !print*,ci_energy + +end diff --git a/src/mo_optimization/state_weight_normalization.irp.f b/src/mo_optimization/state_weight_normalization.irp.f new file mode 100644 index 00000000..27d30af7 --- /dev/null +++ b/src/mo_optimization/state_weight_normalization.irp.f @@ -0,0 +1,29 @@ +subroutine state_weight_normalization + + implicit none + + BEGIN_DOC + ! Renormalization of the state weights or enforcing state average + ! weights for orbital optimization + END_DOC + + integer :: i + double precision :: normalization_factor + + ! To normalize the sum of the state weights + normalization_factor = 0d0 + do i = 1, N_states + normalization_factor = normalization_factor + state_average_weight(i) + enddo + normalization_factor = 1d0 / normalization_factor + + do i = 1, N_states + state_average_weight(i) = state_average_weight(i) * normalization_factor + enddo + TOUCH state_average_weight + + print*, 'Number of states:', N_states + print*, 'State average weights:' + print*, state_average_weight(:) + +end diff --git a/src/mo_optimization/update_parameters.irp.f b/src/mo_optimization/update_parameters.irp.f new file mode 100644 index 00000000..88e8fc34 --- /dev/null +++ b/src/mo_optimization/update_parameters.irp.f @@ -0,0 +1,15 @@ +! Subroutine toupdate the parameters. +! Ex: TOUCH mo_coef ... + + +subroutine update_parameters() + + implicit none + + !### TODO + ! Touch yours parameters + call clear_mo_map + TOUCH mo_coef psi_det psi_coef + call diagonalize_ci + call save_wavefunction_unsorted +end diff --git a/src/mo_optimization/update_st_av_ci_energy.irp.f b/src/mo_optimization/update_st_av_ci_energy.irp.f new file mode 100644 index 00000000..18b72502 --- /dev/null +++ b/src/mo_optimization/update_st_av_ci_energy.irp.f @@ -0,0 +1,25 @@ +! Update the CI state average energy + +! Computes the state average energy +! \begin{align*} +! E =\sum_{i=1}^{N_{states}} E_i . w_i +! \end{align*} + +! $E_i$: energy of state i +! $w_i$: weight of state i + + +subroutine update_st_av_ci_energy(energy) + + implicit none + + double precision, intent(out) :: energy + integer :: i + + energy = 0d0 + do i = 1, N_states + energy = energy + ci_energy(i) * state_average_weight(i) + enddo + + print*, 'ci_energy :', energy +end From 6b4bf5b601fb759772be99af301b4959d17c18e6 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 18 Apr 2023 14:41:08 +0200 Subject: [PATCH 044/337] Raise error --- external/ezfio | 2 +- external/irpf90 | 2 +- external/qp2-dependencies | 2 +- scripts/compilation/qp_create_ninja | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/external/ezfio b/external/ezfio index d5805497..ed1df9f3 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit d5805497fa0ef30e70e055cde1ecec2963303e93 +Subproject commit ed1df9f3c1f51752656ca98da5693a4119add05c diff --git a/external/irpf90 b/external/irpf90 index 0007f72f..33ca5e10 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit 0007f72f677fe7d61c5e1ed461882cb239517102 +Subproject commit 33ca5e1018f3bbb5e695e6ee558f5dac0753b271 diff --git a/external/qp2-dependencies b/external/qp2-dependencies index 6e23ebac..9e5b27ce 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit 6e23ebac001acae91d1c762ca934e09a9b7d614a +Subproject commit 9e5b27ce5a174901765cec9db9e7b2aa6170a5de diff --git a/scripts/compilation/qp_create_ninja b/scripts/compilation/qp_create_ninja index aad85778..27b34901 100755 --- a/scripts/compilation/qp_create_ninja +++ b/scripts/compilation/qp_create_ninja @@ -25,7 +25,7 @@ except ImportError: "quantum_package.rc")) print("\n".join(["", "Error:", "source %s" % f, ""])) - sys.exit(1) + raise # Compress path def comp_path(path): From f228b0a3a477fa10d7b8194f43ae1615d1200bbc Mon Sep 17 00:00:00 2001 From: ydamour Date: Fri, 21 Apr 2023 13:43:49 +0200 Subject: [PATCH 045/337] missing program --- src/mo_optimization/optimization.irp.f | 86 ++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 src/mo_optimization/optimization.irp.f diff --git a/src/mo_optimization/optimization.irp.f b/src/mo_optimization/optimization.irp.f new file mode 100644 index 00000000..9892b3e3 --- /dev/null +++ b/src/mo_optimization/optimization.irp.f @@ -0,0 +1,86 @@ +program optimization + + read_wf = .true. ! must be True for the orbital optimization !!! + TOUCH read_wf + call run_optimization + +end + +subroutine run_optimization + + implicit none + + double precision :: e_cipsi, e_opt, delta_e + integer :: nb_iter,i + logical :: not_converged + character (len=100) :: filename + + PROVIDE psi_det psi_coef mo_two_e_integrals_in_map + + not_converged = .True. + nb_iter = 0 + + ! To start from the wf + N_det_max = max(n_det,5) + TOUCH N_det_max + + open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt') + write(10,*) " Ndet E_cipsi E_opt Delta_e" + call state_average_energy(e_cipsi) + write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_cipsi, 0d0 + close(10) + + do while (not_converged) + print*,'' + print*,'======================' + print*,' Cipsi step:', nb_iter + print*,'======================' + print*,'' + print*,'********** cipsi step **********' + ! cispi calculation + call run_stochastic_cipsi + + ! State average energy after the cipsi step + call state_average_energy(e_cipsi) + + print*,'' + print*,'********** optimization step **********' + ! orbital optimization + call run_orb_opt_trust_v2 + + ! State average energy after the orbital optimization + call state_average_energy(e_opt) + + print*,'' + print*,'********** diff step **********' + ! Gain in energy + delta_e = e_opt - e_cipsi + print*, 'Gain in energy during the orbital optimization:', delta_e + + open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt', position='append') + write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_opt, delta_e + close(10) + + ! Exit + if (delta_e > 1d-12) then + print*, 'WARNING, something wrong happened' + print*, 'The gain (delta_e) in energy during the optimization process' + print*, 'is > 0, but it must be < 0' + print*, 'The program will exit' + exit + endif + + if (n_det > n_det_max_opt) then + print*, 'The number of determinants in the wf > n_det_max_opt' + print*, 'The program will exit' + exit + endif + + ! To double the number of determinants in the wf + N_det_max = int(dble(n_det * 2)*0.9) + TOUCH N_det_max + + nb_iter = nb_iter + 1 + enddo + +end From 20d057b7d1c046ef6872d6713e57511e4adc311e Mon Sep 17 00:00:00 2001 From: ydamour Date: Fri, 21 Apr 2023 13:44:51 +0200 Subject: [PATCH 046/337] unecessary parameter --- src/mo_optimization/EZFIO.cfg | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/mo_optimization/EZFIO.cfg b/src/mo_optimization/EZFIO.cfg index 8944e507..e6aa2d67 100644 --- a/src/mo_optimization/EZFIO.cfg +++ b/src/mo_optimization/EZFIO.cfg @@ -4,12 +4,6 @@ doc: Define the kind of hessian for the orbital optimization full : full hessian interface: ezfio,provider,ocaml default: full -[n_det_start] -type: integer -doc: Number of determinants after which the orbital optimization will start, n_det_start must be greater than 1. The algorithm does a cipsi until n_det > n_det_start and the optimization starts after -interface: ezfio,provider,ocaml -default: 5 - [n_det_max_opt] type: integer doc: Maximal number of the determinants in the wf for the orbital optimization (to stop the optimization if n_det > n_det_max_opt) From 94662d3da070a8f4602578a62b3734fcce709593 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 21 Apr 2023 15:11:50 +0200 Subject: [PATCH 047/337] Introduced JSON --- src/hartree_fock/NEED | 1 + src/hartree_fock/scf.irp.f | 6 +++++ src/json/EZFIO.cfg | 5 ++++ src/json/NEED | 1 + src/json/README.rst | 5 ++++ src/json/json.irp.f | 39 +++++++++++++++++++++++++++ src/json/json_formats.irp.f | 26 ++++++++++++++++++ src/scf_utils/roothaan_hall_scf.irp.f | 39 ++++++++++++++++++++++++--- src/two_body_rdm/io_two_rdm.irp.f | 8 +++--- 9 files changed, 122 insertions(+), 8 deletions(-) create mode 100644 src/json/EZFIO.cfg create mode 100644 src/json/NEED create mode 100644 src/json/README.rst create mode 100644 src/json/json.irp.f create mode 100644 src/json/json_formats.irp.f diff --git a/src/hartree_fock/NEED b/src/hartree_fock/NEED index 2b3fa238..e168bd80 100644 --- a/src/hartree_fock/NEED +++ b/src/hartree_fock/NEED @@ -1,3 +1,4 @@ ao_one_e_ints ao_two_e_ints scf_utils +json diff --git a/src/hartree_fock/scf.irp.f b/src/hartree_fock/scf.irp.f index a7ac9fe4..d22b11ab 100644 --- a/src/hartree_fock/scf.irp.f +++ b/src/hartree_fock/scf.irp.f @@ -80,9 +80,15 @@ subroutine run mo_label = 'Orthonormalized' + write(json_unit,*) '"scf" : [' + call Roothaan_Hall_SCF call ezfio_set_hartree_fock_energy(SCF_energy) + write(json_unit,*) ']' + + call json_close + end diff --git a/src/json/EZFIO.cfg b/src/json/EZFIO.cfg new file mode 100644 index 00000000..7dc8d796 --- /dev/null +++ b/src/json/EZFIO.cfg @@ -0,0 +1,5 @@ +[empty] +type: logical +doc: Needed to create the json directory +interface: ezfio + diff --git a/src/json/NEED b/src/json/NEED new file mode 100644 index 00000000..5a3182ed --- /dev/null +++ b/src/json/NEED @@ -0,0 +1 @@ +ezfio_files diff --git a/src/json/README.rst b/src/json/README.rst new file mode 100644 index 00000000..3dd9ffbb --- /dev/null +++ b/src/json/README.rst @@ -0,0 +1,5 @@ +==== +json +==== + +JSON files to simplify getting output information from QP. diff --git a/src/json/json.irp.f b/src/json/json.irp.f new file mode 100644 index 00000000..5a92f22f --- /dev/null +++ b/src/json/json.irp.f @@ -0,0 +1,39 @@ +BEGIN_PROVIDER [ character*(128), json_filename ] + implicit none + BEGIN_DOC + ! Fortran unit of the JSON file + END_DOC + integer, external :: getUnitAndOpen + integer :: counter + character*(128) :: prefix + logical :: exists + + prefix = trim(ezfio_filename)//'/json/' + + exists = .True. + counter = 0 + do while (exists) + counter += 1 + write(json_filename, '(A,I5.5,A)') trim(prefix), counter, '.json' + INQUIRE(FILE=trim(json_filename), EXIST=exists) + enddo + +END_PROVIDER + +BEGIN_PROVIDER [ integer, json_unit] + implicit none + BEGIN_DOC + ! Unit file for JSON output + END_DOC + integer, external :: getUnitAndOpen + call ezfio_set_json_empty(.False.) + json_unit = getUnitAndOpen(json_filename, 'w') + write(json_unit, '(A)') '{' +END_PROVIDER + +subroutine json_close + write(json_unit, '(A)') '}' + close(json_unit) + FREE json_unit +end + diff --git a/src/json/json_formats.irp.f b/src/json/json_formats.irp.f new file mode 100644 index 00000000..14a8f014 --- /dev/null +++ b/src/json/json_formats.irp.f @@ -0,0 +1,26 @@ + BEGIN_PROVIDER [ character*(64), json_int_fmt ] +&BEGIN_PROVIDER [ character*(64), json_int_fmtx ] +&BEGIN_PROVIDER [ character*(64), json_real_fmt ] +&BEGIN_PROVIDER [ character*(64), json_real_fmtx ] +&BEGIN_PROVIDER [ character*(64), json_str_fmt ] +&BEGIN_PROVIDER [ character*(64), json_str_fmtx ] +&BEGIN_PROVIDER [ character*(64), json_true_fmt ] +&BEGIN_PROVIDER [ character*(64), json_true_fmtx ] +&BEGIN_PROVIDER [ character*(64), json_false_fmt ] +&BEGIN_PROVIDER [ character*(64), json_false_fmtx ] + implicit none + BEGIN_DOC + ! Formats for JSON output. + ! x: used to mark the last write (no comma) + END_DOC + json_int_fmt = '('' "'',A,''": '',I10,'','')' + json_int_fmtx = '('' "'',A,''": '',I10)' + json_real_fmt = '('' "'',A,''": '',E22.15,'','')' + json_real_fmtx = '('' "'',A,''": '',E22.15)' + json_str_fmt = '('' "'',A,''": "'',A,''",'')' + json_str_fmtx = '('' "'',A,''": "'',A,''"'')' + json_true_fmt = '('' "'',A,''": true,'')' + json_true_fmtx = '('' "'',A,''": true'')' + json_false_fmt = '('' "'',A,''": false,'')' + json_false_fmtx = '('' "'',A,''": false'')' +END_PROVIDER diff --git a/src/scf_utils/roothaan_hall_scf.irp.f b/src/scf_utils/roothaan_hall_scf.irp.f index 3b9eaeb4..449afdc8 100644 --- a/src/scf_utils/roothaan_hall_scf.irp.f +++ b/src/scf_utils/roothaan_hall_scf.irp.f @@ -12,6 +12,7 @@ END_DOC integer :: iteration_SCF,dim_DIIS,index_dim_DIIS + logical :: converged integer :: i,j logical, external :: qp_stop double precision, allocatable :: mo_coef_save(:,:) @@ -50,10 +51,8 @@ END_DOC ! PROVIDE FPS_SPF_matrix_AO Fock_matrix_AO - do while ( & - ( (max_error_DIIS > threshold_DIIS_nonzero) .or. & - (dabs(Delta_energy_SCF) > thresh_SCF) & - ) .and. (iteration_SCF < n_it_SCF_max) ) + converged = .False. + do while ( .not.converged .and. (iteration_SCF < n_it_SCF_max) ) ! Increment cycle number @@ -144,17 +143,45 @@ END_DOC SOFT_TOUCH level_shift energy_SCF_previous = energy_SCF + converged = ( (max_error_DIIS <= threshold_DIIS_nonzero) .and. & + (dabs(Delta_energy_SCF) <= thresh_SCF) ) + ! Print results at the end of each iteration write(6,'(I4, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, I3)') & iteration_SCF, energy_SCF, Delta_energy_SCF, max_error_DIIS, level_shift, dim_DIIS +! Write data in JSON file + + if (iteration_SCF == 1) then + write(json_unit, *) '{' + else + write(json_unit, *) '}, {' + endif + write(json_unit, json_int_fmt) 'iteration', iteration_SCF + write(json_unit, json_real_fmt) 'energy', energy_SCF + write(json_unit, json_real_fmt) 'delta_energy_SCF', Delta_energy_SCF + write(json_unit, json_real_fmt) 'max_error_DIIS', max_error_DIIS + write(json_unit, json_real_fmt) 'level_shift', level_shift + write(json_unit, json_int_fmt) 'dim_DIIS', dim_DIIS + if (Delta_energy_SCF < 0.d0) then call save_mos + write(json_unit, json_true_fmt) 'saved' + else + write(json_unit, json_false_fmt) 'saved' endif + + if (converged) then + write(json_unit, json_true_fmtx) 'converged' + else + write(json_unit, json_false_fmtx) 'converged' + endif + if (qp_stop()) exit enddo + write(json_unit, *) '}' if (iteration_SCF < n_it_SCF_max) then mo_label = 'Canonical' @@ -166,6 +193,10 @@ END_DOC write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)') & '====','================','================','================','================' write(6,*) + if (converged) then + write(6,*) 'SCF converged' + endif + if(.not.frozen_orb_scf)then call mo_as_eigvectors_of_mo_matrix(Fock_matrix_mo,size(Fock_matrix_mo,1), & diff --git a/src/two_body_rdm/io_two_rdm.irp.f b/src/two_body_rdm/io_two_rdm.irp.f index f7008ca9..67837682 100644 --- a/src/two_body_rdm/io_two_rdm.irp.f +++ b/src/two_body_rdm/io_two_rdm.irp.f @@ -1,12 +1,12 @@ subroutine write_array_two_rdm(n_orb,nstates,array_tmp,name_file) implicit none integer, intent(in) :: n_orb,nstates - character*(128), intent(in) :: name_file + character*(128), intent(in) :: name_file double precision, intent(in) :: array_tmp(n_orb,n_orb,n_orb,n_orb,nstates) character*(128) :: output integer :: i_unit_output,getUnitAndOpen - PROVIDE ezfio_filename + PROVIDE ezfio_filename output=trim(ezfio_filename)//'/work/'//trim(name_file) i_unit_output = getUnitAndOpen(output,'W') write(i_unit_output)array_tmp @@ -18,9 +18,9 @@ subroutine read_array_two_rdm(n_orb,nstates,array_tmp,name_file) character*(128) :: output integer :: i_unit_output,getUnitAndOpen integer, intent(in) :: n_orb,nstates - character*(128), intent(in) :: name_file + character*(128), intent(in) :: name_file double precision, intent(out) :: array_tmp(n_orb,n_orb,n_orb,n_orb,N_states) - PROVIDE ezfio_filename + PROVIDE ezfio_filename output=trim(ezfio_filename)//'/work/'//trim(name_file) i_unit_output = getUnitAndOpen(output,'R') read(i_unit_output)array_tmp From 5039bb674d6d2433de182011bbb8973748588c25 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 21 Apr 2023 18:06:37 +0200 Subject: [PATCH 048/337] Fixed need for JSON --- external/ezfio | 2 +- external/irpf90 | 2 +- external/qp2-dependencies | 2 +- src/json/json.irp.f | 6 ++++++ src/kohn_sham/ks_scf.irp.f | 4 ++++ src/kohn_sham_rs/rs_ks_scf.irp.f | 3 +++ src/scf_utils/NEED | 1 + src/scf_utils/roothaan_hall_scf.irp.f | 4 ++++ 8 files changed, 21 insertions(+), 3 deletions(-) diff --git a/external/ezfio b/external/ezfio index ed1df9f3..d5805497 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit ed1df9f3c1f51752656ca98da5693a4119add05c +Subproject commit d5805497fa0ef30e70e055cde1ecec2963303e93 diff --git a/external/irpf90 b/external/irpf90 index 33ca5e10..0007f72f 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit 33ca5e1018f3bbb5e695e6ee558f5dac0753b271 +Subproject commit 0007f72f677fe7d61c5e1ed461882cb239517102 diff --git a/external/qp2-dependencies b/external/qp2-dependencies index fd43778e..e0d0e02e 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit fd43778e12bb5858c4c780c34346be0f158b8cc7 +Subproject commit e0d0e02e9f5ece138d1520106954a881ab0b8db2 diff --git a/src/json/json.irp.f b/src/json/json.irp.f index 5a92f22f..1fc24eb6 100644 --- a/src/json/json.irp.f +++ b/src/json/json.irp.f @@ -10,6 +10,7 @@ BEGIN_PROVIDER [ character*(128), json_filename ] prefix = trim(ezfio_filename)//'/json/' + call lock_io exists = .True. counter = 0 do while (exists) @@ -17,6 +18,7 @@ BEGIN_PROVIDER [ character*(128), json_filename ] write(json_filename, '(A,I5.5,A)') trim(prefix), counter, '.json' INQUIRE(FILE=trim(json_filename), EXIST=exists) enddo + call unlock_io END_PROVIDER @@ -27,13 +29,17 @@ BEGIN_PROVIDER [ integer, json_unit] END_DOC integer, external :: getUnitAndOpen call ezfio_set_json_empty(.False.) + call lock_io json_unit = getUnitAndOpen(json_filename, 'w') write(json_unit, '(A)') '{' + call unlock_io END_PROVIDER subroutine json_close + call lock_io write(json_unit, '(A)') '}' close(json_unit) + call unlock_io FREE json_unit end diff --git a/src/kohn_sham/ks_scf.irp.f b/src/kohn_sham/ks_scf.irp.f index aa6efd52..85bfc333 100644 --- a/src/kohn_sham/ks_scf.irp.f +++ b/src/kohn_sham/ks_scf.irp.f @@ -90,7 +90,11 @@ subroutine run ! Choose SCF algorithm + write(json_unit,*) '"scf" : [' call Roothaan_Hall_SCF + write(json_unit,*) ']' + + call json_close end diff --git a/src/kohn_sham_rs/rs_ks_scf.irp.f b/src/kohn_sham_rs/rs_ks_scf.irp.f index 84b85136..f28fd861 100644 --- a/src/kohn_sham_rs/rs_ks_scf.irp.f +++ b/src/kohn_sham_rs/rs_ks_scf.irp.f @@ -93,7 +93,10 @@ subroutine run level_shift += 1.d0 touch level_shift + write(json_unit,*) '"scf" : [' call Roothaan_Hall_SCF + write(json_unit,*) ']' + call json_close call ezfio_set_kohn_sham_rs_energy(SCF_energy) write(*, '(A22,X,F16.10)') 'one_e_energy = ',one_e_energy diff --git a/src/scf_utils/NEED b/src/scf_utils/NEED index b89695da..292d343a 100644 --- a/src/scf_utils/NEED +++ b/src/scf_utils/NEED @@ -1,2 +1,3 @@ mo_guess bitmask +json diff --git a/src/scf_utils/roothaan_hall_scf.irp.f b/src/scf_utils/roothaan_hall_scf.irp.f index 449afdc8..08fe7acf 100644 --- a/src/scf_utils/roothaan_hall_scf.irp.f +++ b/src/scf_utils/roothaan_hall_scf.irp.f @@ -153,6 +153,7 @@ END_DOC ! Write data in JSON file + call lock_io if (iteration_SCF == 1) then write(json_unit, *) '{' else @@ -164,6 +165,7 @@ END_DOC write(json_unit, json_real_fmt) 'max_error_DIIS', max_error_DIIS write(json_unit, json_real_fmt) 'level_shift', level_shift write(json_unit, json_int_fmt) 'dim_DIIS', dim_DIIS + call unlock_io if (Delta_energy_SCF < 0.d0) then call save_mos @@ -172,11 +174,13 @@ END_DOC write(json_unit, json_false_fmt) 'saved' endif + call lock_io if (converged) then write(json_unit, json_true_fmtx) 'converged' else write(json_unit, json_false_fmtx) 'converged' endif + call unlock_io if (qp_stop()) exit From 528bf20e1e00e2677f695bba8b61c203cc053777 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 21 Apr 2023 18:10:08 +0200 Subject: [PATCH 049/337] Cleaning useless function --- src/kohn_sham/print_mos.irp.f | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/kohn_sham/print_mos.irp.f b/src/kohn_sham/print_mos.irp.f index 5e728444..7105c989 100644 --- a/src/kohn_sham/print_mos.irp.f +++ b/src/kohn_sham/print_mos.irp.f @@ -21,10 +21,3 @@ program print_mos end -double precision function f_mu(x) - implicit none - double precision, intent(in) :: x - - - -end From 333d8a551c277e3808f9ba82e8c1ffa6edbffd4c Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Fri, 21 Apr 2023 20:50:00 +0200 Subject: [PATCH 050/337] v0 tc integ numericall with DGEMM --- src/bi_ort_ints/total_twoe_pot.irp.f | 28 +-- src/non_h_ints_mu/debug_fit.irp.f | 14 +- src/non_h_ints_mu/grad_squared.irp.f | 26 +-- src/non_h_ints_mu/grad_tc_int.irp.f | 6 +- src/non_h_ints_mu/j12_nucl_utils.irp.f | 149 ++++----------- src/non_h_ints_mu/jast_deriv.irp.f | 234 +++++++++++++++++++++++ src/non_h_ints_mu/new_grad_tc.irp.f | 98 +--------- src/non_h_ints_mu/new_grad_tc_manu.irp.f | 1 - src/non_h_ints_mu/numerical_integ.irp.f | 56 +++--- src/non_h_ints_mu/tc_integ.irp.f | 217 +++++++++++++++++++++ 10 files changed, 550 insertions(+), 279 deletions(-) create mode 100644 src/non_h_ints_mu/jast_deriv.irp.f create mode 100644 src/non_h_ints_mu/tc_integ.irp.f diff --git a/src/bi_ort_ints/total_twoe_pot.irp.f b/src/bi_ort_ints/total_twoe_pot.irp.f index f5f5959a..bdebe890 100644 --- a/src/bi_ort_ints/total_twoe_pot.irp.f +++ b/src/bi_ort_ints/total_twoe_pot.irp.f @@ -40,20 +40,7 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n provide j1b_type - if(j1b_type .eq. 3) then - - do j = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do k = 1, ao_num - ao_two_e_tc_tot(k,i,l,j) = ao_tc_int_chemist(k,i,l,j) - !write(222,*) ao_two_e_tc_tot(k,i,l,j) - enddo - enddo - enddo - enddo - - else + if(j1b_type .eq. 0) then PROVIDE ao_tc_sym_two_e_pot_in_map @@ -77,6 +64,19 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n enddo enddo + else + + do j = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do k = 1, ao_num + ao_two_e_tc_tot(k,i,l,j) = ao_tc_int_chemist(k,i,l,j) + !write(222,*) ao_two_e_tc_tot(k,i,l,j) + enddo + enddo + enddo + enddo + endif END_PROVIDER diff --git a/src/non_h_ints_mu/debug_fit.irp.f b/src/non_h_ints_mu/debug_fit.irp.f index af441335..5995bffa 100644 --- a/src/non_h_ints_mu/debug_fit.irp.f +++ b/src/non_h_ints_mu/debug_fit.irp.f @@ -82,9 +82,9 @@ subroutine test_grad_j1b_nucl() integer :: ipoint double precision :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz double precision :: r(3) - double precision, external :: grad_x_j1b_nucl - double precision, external :: grad_y_j1b_nucl - double precision, external :: grad_z_j1b_nucl + double precision, external :: grad_x_j1b_nucl_num + double precision, external :: grad_y_j1b_nucl_num + double precision, external :: grad_z_j1b_nucl_num print*, ' test_grad_j1b_nucl ...' @@ -101,7 +101,7 @@ subroutine test_grad_j1b_nucl() r(3) = final_grid_points(3,ipoint) i_exc = v_1b_grad(1,ipoint) - i_num = grad_x_j1b_nucl(r) + i_num = grad_x_j1b_nucl_num(r) acc_ij = dabs(i_exc - i_num) if(acc_ij .gt. eps_ij) then print *, ' problem in x of v_1b_grad on', ipoint @@ -111,7 +111,7 @@ subroutine test_grad_j1b_nucl() endif i_exc = v_1b_grad(2,ipoint) - i_num = grad_y_j1b_nucl(r) + i_num = grad_y_j1b_nucl_num(r) acc_ij = dabs(i_exc - i_num) if(acc_ij .gt. eps_ij) then print *, ' problem in y of v_1b_grad on', ipoint @@ -121,7 +121,7 @@ subroutine test_grad_j1b_nucl() endif i_exc = v_1b_grad(3,ipoint) - i_num = grad_z_j1b_nucl(r) + i_num = grad_z_j1b_nucl_num(r) acc_ij = dabs(i_exc - i_num) if(acc_ij .gt. eps_ij) then print *, ' problem in z of v_1b_grad on', ipoint @@ -317,7 +317,7 @@ subroutine test_fit_ugradu() i_fit = i_fit / dsqrt(x2) tmp = j12_mu(r1, r2) - call grad1_j12_mu_exc(r1, r2, grad) + call grad1_j12_mu(r1, r2, grad) ! --- diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f index 1fd39f6a..558ca268 100644 --- a/src/non_h_ints_mu/grad_squared.irp.f +++ b/src/non_h_ints_mu/grad_squared.irp.f @@ -351,6 +351,7 @@ END_PROVIDER ! --- + BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao_num)] BEGIN_DOC @@ -376,7 +377,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao else - allocate(b_mat(n_points_final_grid,ao_num,ao_num), tmp(ao_num,ao_num,n_points_final_grid)) + allocate(b_mat(n_points_final_grid,ao_num,ao_num)) b_mat = 0.d0 !$OMP PARALLEL & @@ -392,29 +393,13 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao enddo enddo !$OMP END DO - !$OMP END PARALLEL - - tmp = 0.d0 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (j, l, ipoint) & - !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq, u12_grad1_u12_j1b_grad1_j1b, grad12_j12) - !$OMP DO SCHEDULE (static) - do ipoint = 1, n_points_final_grid - do j = 1, ao_num - do l = 1, ao_num - tmp(l,j,ipoint) = u12sq_j1bsq(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(l,j,ipoint) + 0.5d0 * grad12_j12(l,j,ipoint) - enddo - enddo - enddo - !$OMP END DO !$OMP END PARALLEL tc_grad_square_ao = 0.d0 - call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & - , tmp(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & + , int2_grad1_u12_squared_ao(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & , 1.d0, tc_grad_square_ao, ao_num*ao_num) - deallocate(tmp, b_mat) + deallocate(b_mat) call sum_A_At(tc_grad_square_ao(1,1,1,1), ao_num*ao_num) @@ -450,3 +435,4 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao END_PROVIDER ! --- + diff --git a/src/non_h_ints_mu/grad_tc_int.irp.f b/src/non_h_ints_mu/grad_tc_int.irp.f index cb3b71a3..f4eb02e2 100644 --- a/src/non_h_ints_mu/grad_tc_int.irp.f +++ b/src/non_h_ints_mu/grad_tc_int.irp.f @@ -16,9 +16,11 @@ BEGIN_PROVIDER [double precision, ao_non_hermit_term_chemist, (ao_num, ao_num, a double precision :: wall1, wall0 double precision, allocatable :: b_mat(:,:,:,:), ac_mat(:,:,:,:) + print*, ' providing ao_non_hermit_term_chemist ...' + call wall_time(wall0) + provide v_ij_erf_rk_cst_mu x_v_ij_erf_rk_cst_mu - call wall_time(wall0) allocate(b_mat(n_points_final_grid,ao_num,ao_num,3), ac_mat(ao_num,ao_num,ao_num,ao_num)) !$OMP PARALLEL & @@ -102,7 +104,7 @@ BEGIN_PROVIDER [double precision, ao_non_hermit_term_chemist, (ao_num, ao_num, a !$OMP END PARALLEL call wall_time(wall1) - print *, ' wall time dgemm ', wall1 - wall0 + print *, ' wall time for ao_non_hermit_term_chemist ', wall1 - wall0 END_PROVIDER diff --git a/src/non_h_ints_mu/j12_nucl_utils.irp.f b/src/non_h_ints_mu/j12_nucl_utils.irp.f index a515e0b8..c9f62b18 100644 --- a/src/non_h_ints_mu/j12_nucl_utils.irp.f +++ b/src/non_h_ints_mu/j12_nucl_utils.irp.f @@ -204,39 +204,6 @@ END_PROVIDER ! --- -double precision function jmu_modif(r1, r2) - - implicit none - double precision, intent(in) :: r1(3), r2(3) - double precision, external :: j12_mu, j12_nucl - - jmu_modif = j12_mu(r1, r2) * j12_nucl(r1, r2) - - return -end function jmu_modif - -! --- - -double precision function j12_mu(r1, r2) - - include 'constants.include.F' - - implicit none - double precision, intent(in) :: r1(3), r2(3) - double precision :: mu_r12, r12 - - r12 = dsqrt( (r1(1) - r2(1)) * (r1(1) - r2(1)) & - + (r1(2) - r2(2)) * (r1(2) - r2(2)) & - + (r1(3) - r2(3)) * (r1(3) - r2(3)) ) - mu_r12 = mu_erf * r12 - - j12_mu = 0.5d0 * r12 * (1.d0 - derf(mu_r12)) - inv_sq_pi_2 * dexp(-mu_r12*mu_r12) / mu_erf - - return -end function j12_mu - -! --- - double precision function j12_mu_r12(r12) include 'constants.include.F' @@ -254,6 +221,19 @@ end function j12_mu_r12 ! --- +double precision function jmu_modif(r1, r2) + + implicit none + double precision, intent(in) :: r1(3), r2(3) + double precision, external :: j12_mu, j12_nucl + + jmu_modif = j12_mu(r1, r2) * j12_nucl(r1, r2) + + return +end function jmu_modif + +! --- + double precision function j12_mu_gauss(r1, r2) implicit none @@ -278,30 +258,6 @@ end function j12_mu_gauss ! --- -double precision function j1b_nucl(r) - - implicit none - double precision, intent(in) :: r(3) - integer :: i - double precision :: a, d, e - - j1b_nucl = 1.d0 - - do i = 1, nucl_num - a = j1b_pen(i) - d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & - + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & - + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) - e = 1.d0 - exp(-a*d) - - j1b_nucl = j1b_nucl * e - enddo - - return -end function j1b_nucl - -! --- - double precision function j12_nucl(r1, r2) implicit none @@ -317,7 +273,7 @@ end function j12_nucl ! --------------------------------------------------------------------------------------- -double precision function grad_x_j1b_nucl(r) +double precision function grad_x_j1b_nucl_num(r) implicit none double precision, intent(in) :: r(3) @@ -333,12 +289,12 @@ double precision function grad_x_j1b_nucl(r) r_eps(1) = r_eps(1) - 2.d0 * delta fm = j1b_nucl(r_eps) - grad_x_j1b_nucl = 0.5d0 * (fp - fm) / delta + grad_x_j1b_nucl_num = 0.5d0 * (fp - fm) / delta return -end function grad_x_j1b_nucl +end function grad_x_j1b_nucl_num -double precision function grad_y_j1b_nucl(r) +double precision function grad_y_j1b_nucl_num(r) implicit none double precision, intent(in) :: r(3) @@ -354,12 +310,12 @@ double precision function grad_y_j1b_nucl(r) r_eps(2) = r_eps(2) - 2.d0 * delta fm = j1b_nucl(r_eps) - grad_y_j1b_nucl = 0.5d0 * (fp - fm) / delta + grad_y_j1b_nucl_num = 0.5d0 * (fp - fm) / delta return -end function grad_y_j1b_nucl +end function grad_y_j1b_nucl_num -double precision function grad_z_j1b_nucl(r) +double precision function grad_z_j1b_nucl_num(r) implicit none double precision, intent(in) :: r(3) @@ -375,10 +331,10 @@ double precision function grad_z_j1b_nucl(r) r_eps(3) = r_eps(3) - 2.d0 * delta fm = j1b_nucl(r_eps) - grad_z_j1b_nucl = 0.5d0 * (fp - fm) / delta + grad_z_j1b_nucl_num = 0.5d0 * (fp - fm) / delta return -end function grad_z_j1b_nucl +end function grad_z_j1b_nucl_num ! --------------------------------------------------------------------------------------- @@ -389,9 +345,9 @@ double precision function lapl_j1b_nucl(r) implicit none double precision, intent(in) :: r(3) double precision :: r_eps(3), eps, fp, fm, delta - double precision, external :: grad_x_j1b_nucl - double precision, external :: grad_y_j1b_nucl - double precision, external :: grad_z_j1b_nucl + double precision, external :: grad_x_j1b_nucl_num + double precision, external :: grad_y_j1b_nucl_num + double precision, external :: grad_z_j1b_nucl_num eps = 1d-5 r_eps = r @@ -402,9 +358,9 @@ double precision function lapl_j1b_nucl(r) delta = max(eps, dabs(eps*r(1))) r_eps(1) = r_eps(1) + delta - fp = grad_x_j1b_nucl(r_eps) + fp = grad_x_j1b_nucl_num(r_eps) r_eps(1) = r_eps(1) - 2.d0 * delta - fm = grad_x_j1b_nucl(r_eps) + fm = grad_x_j1b_nucl_num(r_eps) r_eps(1) = r_eps(1) + delta lapl_j1b_nucl += 0.5d0 * (fp - fm) / delta @@ -413,9 +369,9 @@ double precision function lapl_j1b_nucl(r) delta = max(eps, dabs(eps*r(2))) r_eps(2) = r_eps(2) + delta - fp = grad_y_j1b_nucl(r_eps) + fp = grad_y_j1b_nucl_num(r_eps) r_eps(2) = r_eps(2) - 2.d0 * delta - fm = grad_y_j1b_nucl(r_eps) + fm = grad_y_j1b_nucl_num(r_eps) r_eps(2) = r_eps(2) + delta lapl_j1b_nucl += 0.5d0 * (fp - fm) / delta @@ -424,9 +380,9 @@ double precision function lapl_j1b_nucl(r) delta = max(eps, dabs(eps*r(3))) r_eps(3) = r_eps(3) + delta - fp = grad_z_j1b_nucl(r_eps) + fp = grad_z_j1b_nucl_num(r_eps) r_eps(3) = r_eps(3) - 2.d0 * delta - fm = grad_z_j1b_nucl(r_eps) + fm = grad_z_j1b_nucl_num(r_eps) r_eps(3) = r_eps(3) + delta lapl_j1b_nucl += 0.5d0 * (fp - fm) / delta @@ -574,35 +530,6 @@ end function grad1_z_j12_mu_num ! --------------------------------------------------------------------------------------- -! --- - -subroutine grad1_j12_mu_exc(r1, r2, grad) - - implicit none - double precision, intent(in) :: r1(3), r2(3) - double precision, intent(out) :: grad(3) - double precision :: dx, dy, dz, r12, tmp - - grad = 0.d0 - - dx = r1(1) - r2(1) - dy = r1(2) - r2(2) - dz = r1(3) - r2(3) - - r12 = dsqrt( dx * dx + dy * dy + dz * dz ) - if(r12 .lt. 1d-10) return - - tmp = 0.5d0 * (1.d0 - derf(mu_erf * r12)) / r12 - - grad(1) = tmp * dx - grad(2) = tmp * dy - grad(3) = tmp * dz - - return -end subroutine grad1_j12_mu_exc - -! --- - subroutine grad1_jmu_modif_num(r1, r2, grad) implicit none @@ -614,11 +541,11 @@ subroutine grad1_jmu_modif_num(r1, r2, grad) double precision, external :: j12_mu double precision, external :: j1b_nucl - double precision, external :: grad_x_j1b_nucl - double precision, external :: grad_y_j1b_nucl - double precision, external :: grad_z_j1b_nucl + double precision, external :: grad_x_j1b_nucl_num + double precision, external :: grad_y_j1b_nucl_num + double precision, external :: grad_z_j1b_nucl_num - call grad1_j12_mu_exc(r1, r2, grad_u12) + call grad1_j12_mu(r1, r2, grad_u12) tmp0 = j1b_nucl(r1) tmp1 = j1b_nucl(r2) @@ -626,9 +553,9 @@ subroutine grad1_jmu_modif_num(r1, r2, grad) tmp3 = tmp0 * tmp1 tmp4 = tmp2 * tmp1 - grad(1) = tmp3 * grad_u12(1) + tmp4 * grad_x_j1b_nucl(r1) - grad(2) = tmp3 * grad_u12(2) + tmp4 * grad_y_j1b_nucl(r1) - grad(3) = tmp3 * grad_u12(3) + tmp4 * grad_z_j1b_nucl(r1) + grad(1) = tmp3 * grad_u12(1) + tmp4 * grad_x_j1b_nucl_num(r1) + grad(2) = tmp3 * grad_u12(2) + tmp4 * grad_y_j1b_nucl_num(r1) + grad(3) = tmp3 * grad_u12(3) + tmp4 * grad_z_j1b_nucl_num(r1) return end subroutine grad1_jmu_modif_num diff --git a/src/non_h_ints_mu/jast_deriv.irp.f b/src/non_h_ints_mu/jast_deriv.irp.f new file mode 100644 index 00000000..e4a3b071 --- /dev/null +++ b/src/non_h_ints_mu/jast_deriv.irp.f @@ -0,0 +1,234 @@ + +! --- + + BEGIN_PROVIDER [ double precision, grad1_u12_num, (n_points_extra_final_grid, n_points_final_grid, 3)] +&BEGIN_PROVIDER [ double precision, grad1_u12_squared_num, (n_points_extra_final_grid, n_points_final_grid)] + + BEGIN_DOC + ! + ! grad_1 u(r1,r2) + ! + ! this will be integrated numerically over r2: + ! we use grid for r1 and extra_grid for r2 + ! + ! for 99 < j1b_type < 199 + ! + ! u(r1,r2) = j12_mu(r12) x v(r1) x v(r2) + ! grad1 u(r1, r2) = [(grad1 j12_mu) v(r1) + j12_mu grad1 v(r1)] v(r2) + ! + END_DOC + + implicit none + integer :: ipoint, jpoint + double precision :: r1(3), r2(3) + + PROVIDE j1b_type + PROVIDE final_grid_points_extra + + grad1_u12_num = 0.d0 + grad1_u12_squared_num = 0.d0 + + if((j1b_type .ge. 100) .and. (j1b_type .lt. 200)) then + + double precision :: v1b_r1, v1b_r2, u2b_r12 + double precision :: grad1_v1b(3), grad1_u2b(3) + double precision, external :: j12_mu, j1b_nucl + + do ipoint = 1, n_points_final_grid ! r1 + + r1(1) = final_grid_points(1,ipoint) + r1(2) = final_grid_points(2,ipoint) + r1(3) = final_grid_points(3,ipoint) + + v1b_r1 = j1b_nucl(r1) + call grad1_j1b_nuc(r1, grad1_v1b) + + do jpoint = 1, n_points_extra_final_grid ! r2 + + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + + v1b_r2 = j1b_nucl(r2) + u2b_r12 = j12_mu(r1, r2) + call grad1_j12_mu(r1, r2, grad1_u2b) + + grad1_u12_num(jpoint,ipoint,1) = (grad1_u2b(1) * v1b_r1 + u2b_r12 * grad1_v1b(1)) * v1b_r2 + grad1_u12_num(jpoint,ipoint,2) = (grad1_u2b(2) * v1b_r1 + u2b_r12 * grad1_v1b(2)) * v1b_r2 + grad1_u12_num(jpoint,ipoint,3) = (grad1_u2b(3) * v1b_r1 + u2b_r12 * grad1_v1b(3)) * v1b_r2 + + grad1_u12_squared_num(jpoint,ipoint) = ( grad1_u12_num(jpoint,ipoint,1) * grad1_u12_num(jpoint,ipoint,1) & + + grad1_u12_num(jpoint,ipoint,2) * grad1_u12_num(jpoint,ipoint,2) & + + grad1_u12_num(jpoint,ipoint,3) * grad1_u12_num(jpoint,ipoint,3) ) + enddo + enddo + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + +END_PROVIDER + +! --- + +double precision function j12_mu(r1, r2) + + include 'constants.include.F' + + implicit none + double precision, intent(in) :: r1(3), r2(3) + double precision :: mu_r12, r12 + + if((j1b_type .ge. 100) .and. (j1b_type .lt. 200)) then + + r12 = dsqrt( (r1(1) - r2(1)) * (r1(1) - r2(1)) & + + (r1(2) - r2(2)) * (r1(2) - r2(2)) & + + (r1(3) - r2(3)) * (r1(3) - r2(3)) ) + mu_r12 = mu_erf * r12 + + j12_mu = 0.5d0 * r12 * (1.d0 - derf(mu_r12)) - inv_sq_pi_2 * dexp(-mu_r12*mu_r12) / mu_erf + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + + return +end function j12_mu + +! --- + +subroutine grad1_j12_mu(r1, r2, grad) + + implicit none + double precision, intent(in) :: r1(3), r2(3) + double precision, intent(out) :: grad(3) + double precision :: dx, dy, dz, r12, tmp + + grad = 0.d0 + + if((j1b_type .ge. 100) .and. (j1b_type .lt. 200)) then + + dx = r1(1) - r2(1) + dy = r1(2) - r2(2) + dz = r1(3) - r2(3) + + r12 = dsqrt( dx * dx + dy * dy + dz * dz ) + if(r12 .lt. 1d-10) return + + tmp = 0.5d0 * (1.d0 - derf(mu_erf * r12)) / r12 + + grad(1) = tmp * dx + grad(2) = tmp * dy + grad(3) = tmp * dz + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + + return +end subroutine grad1_j12_mu + +! --- + +double precision function j1b_nucl(r) + + implicit none + double precision, intent(in) :: r(3) + integer :: i + double precision :: a, d, e + + if(j1b_type .eq. 103) then + + j1b_nucl = 1.d0 + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + e = 1.d0 - dexp(-a*d) + j1b_nucl = j1b_nucl * e + enddo + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + + return +end function j1b_nucl + +! --- + +subroutine grad1_j1b_nuc(r, grad) + + implicit none + double precision, intent(in) :: r(3) + double precision, intent(out) :: grad(3) + integer :: ipoint, i, j, phase + double precision :: x, y, z, dx, dy, dz + double precision :: a, d, e + double precision :: fact_x, fact_y, fact_z + double precision :: ax_der, ay_der, az_der, a_expo + + if(j1b_type .eq. 103) then + + x = r(1) + y = r(2) + z = r(3) + + fact_x = 0.d0 + fact_y = 0.d0 + fact_z = 0.d0 + do i = 1, List_all_comb_b2_size + + phase = 0 + a_expo = 0.d0 + ax_der = 0.d0 + ay_der = 0.d0 + az_der = 0.d0 + do j = 1, nucl_num + a = dble(List_all_comb_b2(j,i)) * j1b_pen(j) + dx = x - nucl_coord(j,1) + dy = y - nucl_coord(j,2) + dz = z - nucl_coord(j,3) + + phase += List_all_comb_b2(j,i) + a_expo += a * (dx*dx + dy*dy + dz*dz) + ax_der += a * dx + ay_der += a * dy + az_der += a * dz + enddo + e = -2.d0 * (-1.d0)**dble(phase) * dexp(-a_expo) + + fact_x += e * ax_der + fact_y += e * ay_der + fact_z += e * az_der + enddo + + grad(1) = fact_x + grad(2) = fact_y + grad(3) = fact_z + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + + return +end subroutine grad1_j1b_nuc + +! --- + diff --git a/src/non_h_ints_mu/new_grad_tc.irp.f b/src/non_h_ints_mu/new_grad_tc.irp.f index a6673252..2b22dab4 100644 --- a/src/non_h_ints_mu/new_grad_tc.irp.f +++ b/src/non_h_ints_mu/new_grad_tc.irp.f @@ -1,101 +1,5 @@ ! --- -BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_final_grid, 3)] - - BEGIN_DOC - ! - ! int2_grad1_u12_ao(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) - ! - ! where r1 = r(ipoint) - ! - ! if J(r1,r2) = u12: - ! - ! int2_grad1_u12_ao(i,j,ipoint,:) = 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r2) \phi_j(r2) - ! = 0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ] - ! - ! if J(r1,r2) = u12 x v1 x v2 - ! - ! int2_grad1_u12_ao(i,j,ipoint,:) = v1 x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ] - ! - \grad_1 v1 x [ \int dr2 u12 v2 \phi_i(r2) \phi_j(r2) ] - ! = 0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) - ! - 0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) - ! - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint) - ! - ! - END_DOC - - implicit none - integer :: ipoint, i, j, m - double precision :: time0, time1 - double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2 - - print*, ' providing int2_grad1_u12_ao ...' - call wall_time(time0) - - PROVIDE j1b_type - - if(read_tc_integ) then - - open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="read") - read(11) int2_grad1_u12_ao - close(11) - - else - - if(j1b_type .eq. 3) then - do ipoint = 1, n_points_final_grid - x = final_grid_points(1,ipoint) - y = final_grid_points(2,ipoint) - z = final_grid_points(3,ipoint) - tmp0 = 0.5d0 * v_1b(ipoint) - tmp_x = v_1b_grad(1,ipoint) - tmp_y = v_1b_grad(2,ipoint) - tmp_z = v_1b_grad(3,ipoint) - do j = 1, ao_num - do i = 1, ao_num - tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) - tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint) - int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x - int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y - int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z - enddo - enddo - enddo - else - do ipoint = 1, n_points_final_grid - x = final_grid_points(1,ipoint) - y = final_grid_points(2,ipoint) - z = final_grid_points(3,ipoint) - do j = 1, ao_num - do i = 1, ao_num - tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint) - - int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,1) - int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,2) - int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,3) - enddo - enddo - enddo - int2_grad1_u12_ao *= 0.5d0 - endif - - endif - - if(write_tc_integ.and.mpi_master) then - open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="write") - call ezfio_set_work_empty(.False.) - write(11) int2_grad1_u12_ao - close(11) - call ezfio_set_tc_keywords_io_tc_integ('Read') - endif - - call wall_time(time1) - print*, ' Wall time for int2_grad1_u12_ao = ', time1 - time0 - -END_PROVIDER - -! --- - BEGIN_PROVIDER [ double precision, int1_grad2_u12_ao, (3, ao_num, ao_num, n_points_final_grid)] BEGIN_DOC @@ -303,6 +207,8 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, print*, ' providing tc_grad_and_lapl_ao ...' call wall_time(time0) + PROVIDE int2_grad1_u12_ao + if(read_tc_integ) then open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_and_lapl_ao', action="read") diff --git a/src/non_h_ints_mu/new_grad_tc_manu.irp.f b/src/non_h_ints_mu/new_grad_tc_manu.irp.f index 901e3048..7ab5b327 100644 --- a/src/non_h_ints_mu/new_grad_tc_manu.irp.f +++ b/src/non_h_ints_mu/new_grad_tc_manu.irp.f @@ -39,7 +39,6 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (ao_num, ao_num, n_po read(11) int2_grad1_u12_ao_test close(11) - else if(j1b_type .eq. 3) then diff --git a/src/non_h_ints_mu/numerical_integ.irp.f b/src/non_h_ints_mu/numerical_integ.irp.f index dcd7a52a..f9457247 100644 --- a/src/non_h_ints_mu/numerical_integ.irp.f +++ b/src/non_h_ints_mu/numerical_integ.irp.f @@ -322,9 +322,9 @@ double precision function num_gradu_squared_u_ij_mu(i, j, ipoint) double precision, external :: ao_value double precision, external :: j1b_nucl double precision, external :: j12_mu - double precision, external :: grad_x_j1b_nucl - double precision, external :: grad_y_j1b_nucl - double precision, external :: grad_z_j1b_nucl + double precision, external :: grad_x_j1b_nucl_num + double precision, external :: grad_y_j1b_nucl_num + double precision, external :: grad_z_j1b_nucl_num r1(1) = final_grid_points(1,ipoint) r1(2) = final_grid_points(2,ipoint) @@ -342,11 +342,11 @@ double precision function num_gradu_squared_u_ij_mu(i, j, ipoint) tmp_z = r1(3) - r2(3) r12 = dsqrt(tmp_x*tmp_x + tmp_y*tmp_y + tmp_z*tmp_z) - dx1_v1 = grad_x_j1b_nucl(r1) - dy1_v1 = grad_y_j1b_nucl(r1) - dz1_v1 = grad_z_j1b_nucl(r1) + dx1_v1 = grad_x_j1b_nucl_num(r1) + dy1_v1 = grad_y_j1b_nucl_num(r1) + dz1_v1 = grad_z_j1b_nucl_num(r1) - call grad1_j12_mu_exc(r1, r2, grad_u12) + call grad1_j12_mu(r1, r2, grad_u12) tmp1 = 1.d0 - derf(mu_erf * r12) v1_tmp = j1b_nucl(r1) @@ -390,9 +390,9 @@ double precision function num_grad12_j12(i, j, ipoint) double precision, external :: ao_value double precision, external :: j1b_nucl double precision, external :: j12_mu - double precision, external :: grad_x_j1b_nucl - double precision, external :: grad_y_j1b_nucl - double precision, external :: grad_z_j1b_nucl + double precision, external :: grad_x_j1b_nucl_num + double precision, external :: grad_y_j1b_nucl_num + double precision, external :: grad_z_j1b_nucl_num r1(1) = final_grid_points(1,ipoint) r1(2) = final_grid_points(2,ipoint) @@ -410,11 +410,11 @@ double precision function num_grad12_j12(i, j, ipoint) tmp_z = r1(3) - r2(3) r12 = dsqrt(tmp_x*tmp_x + tmp_y*tmp_y + tmp_z*tmp_z) - dx1_v1 = grad_x_j1b_nucl(r1) - dy1_v1 = grad_y_j1b_nucl(r1) - dz1_v1 = grad_z_j1b_nucl(r1) + dx1_v1 = grad_x_j1b_nucl_num(r1) + dy1_v1 = grad_y_j1b_nucl_num(r1) + dz1_v1 = grad_z_j1b_nucl_num(r1) - call grad1_j12_mu_exc(r1, r2, grad_u12) + call grad1_j12_mu(r1, r2, grad_u12) tmp1 = 1.d0 - derf(mu_erf * r12) v1_tmp = j1b_nucl(r1) @@ -456,9 +456,9 @@ double precision function num_u12sq_j1bsq(i, j, ipoint) double precision, external :: ao_value double precision, external :: j1b_nucl double precision, external :: j12_mu - double precision, external :: grad_x_j1b_nucl - double precision, external :: grad_y_j1b_nucl - double precision, external :: grad_z_j1b_nucl + double precision, external :: grad_x_j1b_nucl_num + double precision, external :: grad_y_j1b_nucl_num + double precision, external :: grad_z_j1b_nucl_num r1(1) = final_grid_points(1,ipoint) r1(2) = final_grid_points(2,ipoint) @@ -476,11 +476,11 @@ double precision function num_u12sq_j1bsq(i, j, ipoint) tmp_z = r1(3) - r2(3) r12 = dsqrt(tmp_x*tmp_x + tmp_y*tmp_y + tmp_z*tmp_z) - dx1_v1 = grad_x_j1b_nucl(r1) - dy1_v1 = grad_y_j1b_nucl(r1) - dz1_v1 = grad_z_j1b_nucl(r1) + dx1_v1 = grad_x_j1b_nucl_num(r1) + dy1_v1 = grad_y_j1b_nucl_num(r1) + dz1_v1 = grad_z_j1b_nucl_num(r1) - call grad1_j12_mu_exc(r1, r2, grad_u12) + call grad1_j12_mu(r1, r2, grad_u12) tmp1 = 1.d0 - derf(mu_erf * r12) v1_tmp = j1b_nucl(r1) @@ -522,9 +522,9 @@ double precision function num_u12_grad1_u12_j1b_grad1_j1b(i, j, ipoint) double precision, external :: ao_value double precision, external :: j1b_nucl double precision, external :: j12_mu - double precision, external :: grad_x_j1b_nucl - double precision, external :: grad_y_j1b_nucl - double precision, external :: grad_z_j1b_nucl + double precision, external :: grad_x_j1b_nucl_num + double precision, external :: grad_y_j1b_nucl_num + double precision, external :: grad_z_j1b_nucl_num r1(1) = final_grid_points(1,ipoint) r1(2) = final_grid_points(2,ipoint) @@ -542,11 +542,11 @@ double precision function num_u12_grad1_u12_j1b_grad1_j1b(i, j, ipoint) tmp_z = r1(3) - r2(3) r12 = dsqrt(tmp_x*tmp_x + tmp_y*tmp_y + tmp_z*tmp_z) - dx1_v1 = grad_x_j1b_nucl(r1) - dy1_v1 = grad_y_j1b_nucl(r1) - dz1_v1 = grad_z_j1b_nucl(r1) + dx1_v1 = grad_x_j1b_nucl_num(r1) + dy1_v1 = grad_y_j1b_nucl_num(r1) + dz1_v1 = grad_z_j1b_nucl_num(r1) - call grad1_j12_mu_exc(r1, r2, grad_u12) + call grad1_j12_mu(r1, r2, grad_u12) tmp1 = 1.d0 - derf(mu_erf * r12) v1_tmp = j1b_nucl(r1) diff --git a/src/non_h_ints_mu/tc_integ.irp.f b/src/non_h_ints_mu/tc_integ.irp.f new file mode 100644 index 00000000..a92eac02 --- /dev/null +++ b/src/non_h_ints_mu/tc_integ.irp.f @@ -0,0 +1,217 @@ + +! --- + + BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_final_grid, 3)] +&BEGIN_PROVIDER [double precision, int2_grad1_u12_squared_ao, (ao_num, ao_num, n_points_final_grid)] + + BEGIN_DOC + ! + ! int2_grad1_u12_ao(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) + ! + ! where r1 = r(ipoint) + ! + ! if J(r1,r2) = u12 (j1b_type .eq. 1) + ! + ! int2_grad1_u12_ao(i,j,ipoint,:) = 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r2) \phi_j(r2) + ! = 0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ] + ! + ! if J(r1,r2) = u12 x v1 x v2 (j1b_type .eq. 3) + ! + ! int2_grad1_u12_ao(i,j,ipoint,:) = v1 x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ] + ! - \grad_1 v1 x [ \int dr2 u12 v2 \phi_i(r2) \phi_j(r2) ] + ! = 0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) + ! - 0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) + ! - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint) + ! + ! + ! + ! int2_grad1_u12_squared_ao = int dr2 chi_l(r2) chi_j(r2) [grad_1 u(r1,r2)]^2 + ! + END_DOC + + implicit none + integer :: ipoint, i, j, m, jpoint + double precision :: time0, time1 + double precision :: x, y, z, w, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2 + + print*, ' providing int2_grad1_u12_ao & int2_grad1_u12_squared_ao ...' + + PROVIDE j1b_type + + if(read_tc_integ) then + call wall_time(time0) + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="read") + read(11) int2_grad1_u12_ao + call wall_time(time1) + print*, ' wall time for int2_grad1_u12_ao =', time1-time0 + endif + + if(j1b_type .eq. 3) then + + ! --- + + PROVIDE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b x_v_ij_erf_rk_cst_mu_j1b + + if(.not.read_tc_integ) then + call wall_time(time0) + int2_grad1_u12_ao = 0.d0 + ! TODO OPENMP + do ipoint = 1, n_points_final_grid + x = final_grid_points(1,ipoint) + y = final_grid_points(2,ipoint) + z = final_grid_points(3,ipoint) + tmp0 = 0.5d0 * v_1b(ipoint) + tmp_x = v_1b_grad(1,ipoint) + tmp_y = v_1b_grad(2,ipoint) + tmp_z = v_1b_grad(3,ipoint) + do j = 1, ao_num + do i = 1, ao_num + tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) + tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint) + int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x + int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y + int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z + enddo + enddo + enddo + call wall_time(time1) + print*, ' wall time for int2_grad1_u12_ao =', time1-time0 + endif + + ! --- + + call wall_time(time0) + PROVIDE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12 + int2_grad1_u12_squared_ao = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, ipoint) & + !$OMP SHARED (int2_grad1_u12_squared_ao, ao_num, n_points_final_grid, u12sq_j1bsq, u12_grad1_u12_j1b_grad1_j1b, grad12_j12) + !$OMP DO SCHEDULE (static) + do ipoint = 1, n_points_final_grid + do j = 1, ao_num + do i = 1, ao_num + int2_grad1_u12_squared_ao(i,j,ipoint) = u12sq_j1bsq(i,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) + 0.5d0 * grad12_j12(i,j,ipoint) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call wall_time(time1) + print*, ' wall time for int2_grad1_u12_squared_ao =', time1-time0 + + ! --- + + elseif(j1b_type .ge. 100) then + + ! --- + + call wall_time(time0) + PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra + PROVIDE grad1_u12_squared_num + PROVIDE grad1_u12_num + double precision, allocatable :: tmp(:,:,:) + allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) + tmp = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, i, jpoint) & + !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) + !$OMP DO SCHEDULE (static) + do j = 1, ao_num + do i = 1, ao_num + do jpoint = 1, n_points_extra_final_grid + tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + if(.not.read_tc_integ) then + int2_grad1_u12_ao = 0.d0 + do m = 1, 3 + call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, 1.d0 & + , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_num(1,1,m), n_points_extra_final_grid & + , 1.d0, int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num) + enddo + ! these dgemm are equivalen to + !!$OMP PARALLEL & + !!$OMP DEFAULT (NONE) & + !!$OMP PRIVATE (j, i, ipoint, jpoint, w) & + !!$OMP SHARED (int2_grad1_u12_ao, ao_num, n_points_final_grid, & + !!$OMP n_points_extra_final_grid, final_weight_at_r_vector_extra, & + !!$OMP aos_in_r_array_extra_transp, grad1_u12_num,tmp) + !!$OMP DO SCHEDULE (static) + !do ipoint = 1, n_points_final_grid + ! do j = 1, ao_num + ! do i = 1, ao_num + ! do jpoint = 1, n_points_extra_final_grid + ! w = tmp(jpoint,i,j) + ! int2_grad1_u12_ao(i,j,ipoint,1) += w * grad1_u12_num(jpoint,ipoint,1) + ! int2_grad1_u12_ao(i,j,ipoint,2) += w * grad1_u12_num(jpoint,ipoint,2) + ! int2_grad1_u12_ao(i,j,ipoint,3) += w * grad1_u12_num(jpoint,ipoint,3) + ! enddo + ! enddo + ! enddo + !enddo + !!$OMP END DO + !!$OMP END PARALLEL + call wall_time(time1) + print*, ' wall time for int2_grad1_u12_ao =', time1-time0 + endif + + ! --- + + call wall_time(time0) + int2_grad1_u12_squared_ao = 0.d0 + call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, 1.d0 & + , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_squared_num(1,1), n_points_extra_final_grid & + , 1.d0, int2_grad1_u12_squared_ao(1,1,1), ao_num*ao_num) + !! this dgemm is equivalen to + !!$OMP PARALLEL & + !!$OMP DEFAULT (NONE) & + !!$OMP PRIVATE (i, j, ipoint, jpoint, w) & + !!$OMP SHARED (int2_grad1_u12_squared_ao, ao_num, n_points_final_grid, & + !!$OMP n_points_extra_final_grid, final_weight_at_r_vector_extra, & + !!$OMP aos_in_r_array_extra_transp, grad1_u12_squared_num,tmp) + !!$OMP DO SCHEDULE (static) + !do ipoint = 1, n_points_final_grid + ! do j = 1, ao_num + ! do i = 1, ao_num + ! do jpoint = 1, n_points_extra_final_grid + ! w = tmp(jpoint,i,j) + ! int2_grad1_u12_squared_ao(i,j,ipoint) += w * grad1_u12_squared_num(jpoint,ipoint) + ! enddo + ! enddo + ! enddo + !enddo + !!$OMP END DO + !!$OMP END PARALLEL + call wall_time(time1) + print*, ' wall time for int2_grad1_u12_squared_ao =', time1-time0 + + ! --- + + deallocate(tmp) + + else + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + endif + + if(write_tc_integ.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="write") + call ezfio_set_work_empty(.False.) + write(11) int2_grad1_u12_ao + close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') + endif + + call wall_time(time1) + print*, ' wall time for int2_grad1_u12_ao & int2_grad1_u12_squared_ao = ', time1 - time0 + +END_PROVIDER + +! --- + From 195c7402cf45318e22dd0800012b36a29ae3f467 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Fri, 21 Apr 2023 22:22:25 +0200 Subject: [PATCH 051/337] DGEMM added and tested for Be(VDZ) --- src/non_h_ints_mu/grad_squared.irp.f | 54 ++++++++++++------------ src/non_h_ints_mu/jast_deriv.irp.f | 31 +++++++++----- src/non_h_ints_mu/new_grad_tc.irp.f | 3 +- src/non_h_ints_mu/tc_integ.irp.f | 61 +++++++++++++++++++++++----- 4 files changed, 97 insertions(+), 52 deletions(-) diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f index 558ca268..8b801f9d 100644 --- a/src/non_h_ints_mu/grad_squared.irp.f +++ b/src/non_h_ints_mu/grad_squared.irp.f @@ -232,37 +232,33 @@ BEGIN_PROVIDER [ double precision, grad12_j12, (ao_num, ao_num, n_points_final_g PROVIDE j1b_type - if(j1b_type .eq. 3) then - - do ipoint = 1, n_points_final_grid - tmp1 = v_1b(ipoint) - tmp1 = tmp1 * tmp1 - do j = 1, ao_num - do i = 1, ao_num - grad12_j12(i,j,ipoint) = tmp1 * int2_grad1u2_grad2u2_j1b2(i,j,ipoint) - enddo + do ipoint = 1, n_points_final_grid + tmp1 = v_1b(ipoint) + tmp1 = tmp1 * tmp1 + do j = 1, ao_num + do i = 1, ao_num + grad12_j12(i,j,ipoint) = tmp1 * int2_grad1u2_grad2u2_j1b2(i,j,ipoint) enddo enddo + enddo - else - - grad12_j12 = 0.d0 - do ipoint = 1, n_points_final_grid - r(1) = final_grid_points(1,ipoint) - r(2) = final_grid_points(2,ipoint) - r(3) = final_grid_points(3,ipoint) - do j = 1, ao_num - do i = 1, ao_num - do igauss = 1, n_max_fit_slat - delta = expo_gauss_1_erf_x_2(igauss) - coef = coef_gauss_1_erf_x_2(igauss) - grad12_j12(i,j,ipoint) += -0.25d0 * coef * overlap_gauss_r12_ao(r, delta, i, j) - enddo - enddo - enddo - enddo - - endif + !if(j1b_type .eq. 0) then + ! grad12_j12 = 0.d0 + ! do ipoint = 1, n_points_final_grid + ! r(1) = final_grid_points(1,ipoint) + ! r(2) = final_grid_points(2,ipoint) + ! r(3) = final_grid_points(3,ipoint) + ! do j = 1, ao_num + ! do i = 1, ao_num + ! do igauss = 1, n_max_fit_slat + ! delta = expo_gauss_1_erf_x_2(igauss) + ! coef = coef_gauss_1_erf_x_2(igauss) + ! grad12_j12(i,j,ipoint) += -0.25d0 * coef * overlap_gauss_r12_ao(r, delta, i, j) + ! enddo + ! enddo + ! enddo + ! enddo + !endif call wall_time(time1) print*, ' Wall time for grad12_j12 = ', time1 - time0 @@ -398,7 +394,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao tc_grad_square_ao = 0.d0 call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & , int2_grad1_u12_squared_ao(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & - , 1.d0, tc_grad_square_ao, ao_num*ao_num) + , 0.d0, tc_grad_square_ao, ao_num*ao_num) deallocate(b_mat) call sum_A_At(tc_grad_square_ao(1,1,1,1), ao_num*ao_num) diff --git a/src/non_h_ints_mu/jast_deriv.irp.f b/src/non_h_ints_mu/jast_deriv.irp.f index e4a3b071..b2772f92 100644 --- a/src/non_h_ints_mu/jast_deriv.irp.f +++ b/src/non_h_ints_mu/jast_deriv.irp.f @@ -1,7 +1,7 @@ ! --- - BEGIN_PROVIDER [ double precision, grad1_u12_num, (n_points_extra_final_grid, n_points_final_grid, 3)] + BEGIN_PROVIDER [ double precision, grad1_u12_num, (n_points_extra_final_grid, n_points_final_grid, 3)] &BEGIN_PROVIDER [ double precision, grad1_u12_squared_num, (n_points_extra_final_grid, n_points_final_grid)] BEGIN_DOC @@ -32,8 +32,15 @@ double precision :: v1b_r1, v1b_r2, u2b_r12 double precision :: grad1_v1b(3), grad1_u2b(3) + double precision :: dx, dy, dz double precision, external :: j12_mu, j1b_nucl + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, jpoint, r1, r2, v1b_r1, v1b_r2, u2b_r12, grad1_v1b, grad1_u2b, dx, dy, dz) & + !$OMP SHARED (n_points_final_grid, n_points_extra_final_grid, final_grid_points, & + !$OMP final_grid_points_extra, grad1_u12_num, grad1_u12_squared_num) + !$OMP DO SCHEDULE (static) do ipoint = 1, n_points_final_grid ! r1 r1(1) = final_grid_points(1,ipoint) @@ -41,7 +48,7 @@ r1(3) = final_grid_points(3,ipoint) v1b_r1 = j1b_nucl(r1) - call grad1_j1b_nuc(r1, grad1_v1b) + call grad1_j1b_nucl(r1, grad1_v1b) do jpoint = 1, n_points_extra_final_grid ! r2 @@ -53,15 +60,19 @@ u2b_r12 = j12_mu(r1, r2) call grad1_j12_mu(r1, r2, grad1_u2b) - grad1_u12_num(jpoint,ipoint,1) = (grad1_u2b(1) * v1b_r1 + u2b_r12 * grad1_v1b(1)) * v1b_r2 - grad1_u12_num(jpoint,ipoint,2) = (grad1_u2b(2) * v1b_r1 + u2b_r12 * grad1_v1b(2)) * v1b_r2 - grad1_u12_num(jpoint,ipoint,3) = (grad1_u2b(3) * v1b_r1 + u2b_r12 * grad1_v1b(3)) * v1b_r2 + dx = (grad1_u2b(1) * v1b_r1 + u2b_r12 * grad1_v1b(1)) * v1b_r2 + dy = (grad1_u2b(2) * v1b_r1 + u2b_r12 * grad1_v1b(2)) * v1b_r2 + dz = (grad1_u2b(3) * v1b_r1 + u2b_r12 * grad1_v1b(3)) * v1b_r2 - grad1_u12_squared_num(jpoint,ipoint) = ( grad1_u12_num(jpoint,ipoint,1) * grad1_u12_num(jpoint,ipoint,1) & - + grad1_u12_num(jpoint,ipoint,2) * grad1_u12_num(jpoint,ipoint,2) & - + grad1_u12_num(jpoint,ipoint,3) * grad1_u12_num(jpoint,ipoint,3) ) + grad1_u12_num(jpoint,ipoint,1) = dx + grad1_u12_num(jpoint,ipoint,2) = dy + grad1_u12_num(jpoint,ipoint,3) = dz + + grad1_u12_squared_num(jpoint,ipoint) = dx*dx + dy*dy + dz*dz enddo enddo + !$OMP END DO + !$OMP END PARALLEL else @@ -170,7 +181,7 @@ end function j1b_nucl ! --- -subroutine grad1_j1b_nuc(r, grad) +subroutine grad1_j1b_nucl(r, grad) implicit none double precision, intent(in) :: r(3) @@ -228,7 +239,7 @@ subroutine grad1_j1b_nuc(r, grad) endif return -end subroutine grad1_j1b_nuc +end subroutine grad1_j1b_nucl ! --- diff --git a/src/non_h_ints_mu/new_grad_tc.irp.f b/src/non_h_ints_mu/new_grad_tc.irp.f index 2b22dab4..064eb9f1 100644 --- a/src/non_h_ints_mu/new_grad_tc.irp.f +++ b/src/non_h_ints_mu/new_grad_tc.irp.f @@ -247,8 +247,7 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, do m = 1, 3 call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid & - , 1.d0, tc_grad_and_lapl_ao, ao_num*ao_num) - + , 0.d0, tc_grad_and_lapl_ao, ao_num*ao_num) enddo deallocate(b_mat) diff --git a/src/non_h_ints_mu/tc_integ.irp.f b/src/non_h_ints_mu/tc_integ.irp.f index a92eac02..865522e0 100644 --- a/src/non_h_ints_mu/tc_integ.irp.f +++ b/src/non_h_ints_mu/tc_integ.irp.f @@ -50,10 +50,9 @@ ! --- - PROVIDE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b x_v_ij_erf_rk_cst_mu_j1b - if(.not.read_tc_integ) then call wall_time(time0) + PROVIDE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b x_v_ij_erf_rk_cst_mu_j1b int2_grad1_u12_ao = 0.d0 ! TODO OPENMP do ipoint = 1, n_points_final_grid @@ -133,15 +132,38 @@ do m = 1, 3 call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, 1.d0 & , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_num(1,1,m), n_points_extra_final_grid & - , 1.d0, int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num) + , 0.d0, int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num) enddo + + !! DEBUG + !PROVIDE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b x_v_ij_erf_rk_cst_mu_j1b + !int2_grad1_u12_ao = 0.d0 + !do ipoint = 1, n_points_final_grid + ! x = final_grid_points(1,ipoint) + ! y = final_grid_points(2,ipoint) + ! z = final_grid_points(3,ipoint) + ! tmp0 = 0.5d0 * v_1b(ipoint) + ! tmp_x = v_1b_grad(1,ipoint) + ! tmp_y = v_1b_grad(2,ipoint) + ! tmp_z = v_1b_grad(3,ipoint) + ! do j = 1, ao_num + ! do i = 1, ao_num + ! tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) + ! tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint) + ! int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x + ! int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y + ! int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z + ! enddo + ! enddo + !enddo + ! these dgemm are equivalen to !!$OMP PARALLEL & !!$OMP DEFAULT (NONE) & !!$OMP PRIVATE (j, i, ipoint, jpoint, w) & !!$OMP SHARED (int2_grad1_u12_ao, ao_num, n_points_final_grid, & !!$OMP n_points_extra_final_grid, final_weight_at_r_vector_extra, & - !!$OMP aos_in_r_array_extra_transp, grad1_u12_num,tmp) + !!$OMP aos_in_r_array_extra_transp, grad1_u12_num, tmp) !!$OMP DO SCHEDULE (static) !do ipoint = 1, n_points_final_grid ! do j = 1, ao_num @@ -165,22 +187,42 @@ call wall_time(time0) int2_grad1_u12_squared_ao = 0.d0 - call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, 1.d0 & + call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, -0.5d0 & , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_squared_num(1,1), n_points_extra_final_grid & - , 1.d0, int2_grad1_u12_squared_ao(1,1,1), ao_num*ao_num) + , 0.d0, int2_grad1_u12_squared_ao(1,1,1), ao_num*ao_num) + + !! DEBUG + !PROVIDE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12 + !int2_grad1_u12_squared_ao = 0.d0 + !!$OMP PARALLEL & + !!$OMP DEFAULT (NONE) & + !!$OMP PRIVATE (i, j, ipoint) & + !!$OMP SHARED (int2_grad1_u12_squared_ao, ao_num, n_points_final_grid, u12sq_j1bsq, u12_grad1_u12_j1b_grad1_j1b, grad12_j12) + !!$OMP DO SCHEDULE (static) + !do ipoint = 1, n_points_final_grid + ! do j = 1, ao_num + ! do i = 1, ao_num + ! int2_grad1_u12_squared_ao(i,j,ipoint) = u12sq_j1bsq(i,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) + 0.5d0 * grad12_j12(i,j,ipoint) + ! enddo + ! enddo + !enddo + !!$OMP END DO + !!$OMP END PARALLEL + !call wall_time(time1) + !! this dgemm is equivalen to !!$OMP PARALLEL & !!$OMP DEFAULT (NONE) & !!$OMP PRIVATE (i, j, ipoint, jpoint, w) & !!$OMP SHARED (int2_grad1_u12_squared_ao, ao_num, n_points_final_grid, & !!$OMP n_points_extra_final_grid, final_weight_at_r_vector_extra, & - !!$OMP aos_in_r_array_extra_transp, grad1_u12_squared_num,tmp) + !!$OMP aos_in_r_array_extra_transp, grad1_u12_squared_num, tmp) !!$OMP DO SCHEDULE (static) !do ipoint = 1, n_points_final_grid ! do j = 1, ao_num ! do i = 1, ao_num ! do jpoint = 1, n_points_extra_final_grid - ! w = tmp(jpoint,i,j) + ! w = 0.5d0 * tmp(jpoint,i,j) ! int2_grad1_u12_squared_ao(i,j,ipoint) += w * grad1_u12_squared_num(jpoint,ipoint) ! enddo ! enddo @@ -208,9 +250,6 @@ call ezfio_set_tc_keywords_io_tc_integ('Read') endif - call wall_time(time1) - print*, ' wall time for int2_grad1_u12_ao & int2_grad1_u12_squared_ao = ', time1 - time0 - END_PROVIDER ! --- From e1dc8b9ec3e1212dd0b2479559081ef61811cce5 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Sun, 23 Apr 2023 21:14:17 +0200 Subject: [PATCH 052/337] added jast 104 & 105 --- src/non_h_ints_mu/jast_deriv.irp.f | 69 +++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/src/non_h_ints_mu/jast_deriv.irp.f b/src/non_h_ints_mu/jast_deriv.irp.f index b2772f92..6c3f4214 100644 --- a/src/non_h_ints_mu/jast_deriv.irp.f +++ b/src/non_h_ints_mu/jast_deriv.irp.f @@ -155,7 +155,7 @@ double precision function j1b_nucl(r) implicit none double precision, intent(in) :: r(3) integer :: i - double precision :: a, d, e + double precision :: a, d, e, x, y, z if(j1b_type .eq. 103) then @@ -169,6 +169,29 @@ double precision function j1b_nucl(r) j1b_nucl = j1b_nucl * e enddo + elseif(j1b_type .eq. 104) then + + j1b_nucl = 1.d0 + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + j1b_nucl = j1b_nucl - dexp(-a*d) + enddo + + elseif(j1b_type .eq. 105) then + + j1b_nucl = 1.d0 + do i = 1, nucl_num + a = j1b_pen(i) + x = r(1) - nucl_coord(i,1) + y = r(2) - nucl_coord(i,2) + z = r(3) - nucl_coord(i,3) + d = x*x + y*y + z*z + j1b_nucl = j1b_nucl - dexp(-a*d*d) + enddo + else print *, ' j1b_type = ', j1b_type, 'not implemented yet' @@ -231,6 +254,50 @@ subroutine grad1_j1b_nucl(r, grad) grad(2) = fact_y grad(3) = fact_z + else if(j1b_type .eq. 104) then + + fact_x = 0.d0 + fact_y = 0.d0 + fact_z = 0.d0 + do i = 1, nucl_num + a = j1b_pen(i) + x = r(1) - nucl_coord(i,1) + y = r(2) - nucl_coord(i,2) + z = r(3) - nucl_coord(i,3) + d = x*x + y*y + z*z + e = a * dexp(-a*d) + + fact_x += e * x + fact_y += e * y + fact_z += e * z + enddo + + grad(1) = -2.d0 * fact_x + grad(2) = -2.d0 * fact_y + grad(3) = -2.d0 * fact_z + + else if(j1b_type .eq. 105) then + + fact_x = 0.d0 + fact_y = 0.d0 + fact_z = 0.d0 + do i = 1, nucl_num + a = j1b_pen(i) + x = r(1) - nucl_coord(i,1) + y = r(2) - nucl_coord(i,2) + z = r(3) - nucl_coord(i,3) + d = x*x + y*y + z*z + e = a * d * dexp(-a*d*d) + + fact_x += e * x + fact_y += e * y + fact_z += e * z + enddo + + grad(1) = -4.d0 * fact_x + grad(2) = -4.d0 * fact_y + grad(3) = -4.d0 * fact_z + else print *, ' j1b_type = ', j1b_type, 'not implemented yet' From 5fb6ed0180090e1d7d5cd009ce0e3588815670a3 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 24 Apr 2023 00:50:07 +0200 Subject: [PATCH 053/337] Added JSON in FCI --- src/cipsi/NEED | 1 + src/cipsi/cipsi.irp.f | 10 +-- src/cipsi/stochastic_cipsi.irp.f | 8 +-- src/cipsi/write_cipsi_json.irp.f | 53 ++++++++++++++ src/fci/fci.irp.f | 7 ++ src/hartree_fock/scf.irp.f | 7 +- src/iterations/EZFIO.cfg | 24 ------- src/iterations/io.irp.f | 37 ---------- src/iterations/iterations.irp.f | 91 +++++++++++++++--------- src/iterations/print_extrapolation.irp.f | 14 ++-- src/json/json_formats.irp.f | 20 ++++++ src/scf_utils/roothaan_hall_scf.irp.f | 6 +- 12 files changed, 162 insertions(+), 116 deletions(-) create mode 100644 src/cipsi/write_cipsi_json.irp.f delete mode 100644 src/iterations/EZFIO.cfg delete mode 100644 src/iterations/io.irp.f diff --git a/src/cipsi/NEED b/src/cipsi/NEED index 5bd742bc..89c128ec 100644 --- a/src/cipsi/NEED +++ b/src/cipsi/NEED @@ -1,3 +1,4 @@ +json perturbation zmq mpi diff --git a/src/cipsi/cipsi.irp.f b/src/cipsi/cipsi.irp.f index 5225c6df..88aaeae0 100644 --- a/src/cipsi/cipsi.irp.f +++ b/src/cipsi/cipsi.irp.f @@ -16,7 +16,6 @@ subroutine run_cipsi double precision, external :: memory_of_double PROVIDE H_apply_buffer_allocated - N_iter = 1 threshold_generators = 1.d0 SOFT_TOUCH threshold_generators @@ -76,7 +75,6 @@ subroutine run_cipsi ) write(*,'(A)') '--------------------------------------------------------------------------------' - to_select = int(sqrt(dble(N_states))*dble(N_det)*selection_factor) to_select = max(N_states_diag, to_select) if (do_pt2) then @@ -106,10 +104,10 @@ subroutine run_cipsi call save_energy(psi_energy_with_nucl_rep, pt2_data % pt2) - call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det) + call increment_n_iter(psi_energy_with_nucl_rep, pt2_data) call print_extrapolated_energy() call print_mol_properties() - N_iter += 1 + call write_cipsi_json(pt2_data,pt2_data_err) if (qp_stop()) exit @@ -155,11 +153,13 @@ subroutine run_cipsi call save_energy(psi_energy_with_nucl_rep, pt2_data % pt2) call print_summary(psi_energy_with_nucl_rep(1:N_states), & pt2_data, pt2_data_err, N_det,N_configuration,N_states,psi_s2) - call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det) + call increment_n_iter(psi_energy_with_nucl_rep, pt2_data) call print_extrapolated_energy() call print_mol_properties() + call write_cipsi_json(pt2_data,pt2_data_err) endif call pt2_dealloc(pt2_data) call pt2_dealloc(pt2_data_err) end + diff --git a/src/cipsi/stochastic_cipsi.irp.f b/src/cipsi/stochastic_cipsi.irp.f index 35e80eb8..b83e658a 100644 --- a/src/cipsi/stochastic_cipsi.irp.f +++ b/src/cipsi/stochastic_cipsi.irp.f @@ -15,7 +15,6 @@ subroutine run_stochastic_cipsi double precision, external :: memory_of_double PROVIDE H_apply_buffer_allocated distributed_davidson mo_two_e_integrals_in_map - N_iter = 1 threshold_generators = 1.d0 SOFT_TOUCH threshold_generators @@ -96,10 +95,10 @@ subroutine run_stochastic_cipsi call save_energy(psi_energy_with_nucl_rep, pt2_data % pt2) - call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det) + call increment_n_iter(psi_energy_with_nucl_rep, pt2_data) call print_extrapolated_energy() call print_mol_properties() - N_iter += 1 + call write_cipsi_json(pt2_data,pt2_data_err) if (qp_stop()) exit @@ -135,9 +134,10 @@ subroutine run_stochastic_cipsi call save_energy(psi_energy_with_nucl_rep, pt2_data % pt2) call print_summary(psi_energy_with_nucl_rep, & pt2_data , pt2_data_err, N_det, N_configuration, N_states, psi_s2) - call save_iterations(psi_energy_with_nucl_rep(1:N_states),pt2_data % rpt2,N_det) + call increment_n_iter(psi_energy_with_nucl_rep, pt2_data) call print_extrapolated_energy() call print_mol_properties() + call write_cipsi_json(pt2_data,pt2_data_err) endif call pt2_dealloc(pt2_data) call pt2_dealloc(pt2_data_err) diff --git a/src/cipsi/write_cipsi_json.irp.f b/src/cipsi/write_cipsi_json.irp.f new file mode 100644 index 00000000..98a402a2 --- /dev/null +++ b/src/cipsi/write_cipsi_json.irp.f @@ -0,0 +1,53 @@ +subroutine write_cipsi_json(pt2_data, pt2_data_err) + use selection_types + implicit none + BEGIN_DOC +! Writes JSON data for CIPSI runs + END_DOC + type(pt2_type), intent(in) :: pt2_data, pt2_data_err + integer :: i,j,k + + call lock_io + character*(64), allocatable :: fmtk(:) + integer :: N_states_p, N_iter_p + N_states_p = min(N_states,N_det) + N_iter_p = min(N_iter,8) + allocate(fmtk(0:N_iter_p)) + fmtk(:) = '('' '',E22.15,'','')' + fmtk(N_iter_p) = '('' '',E22.15)' + + write(json_unit, json_dict_uopen_fmt) + write(json_unit, json_int_fmt) 'n_det', N_det + if (s2_eig) then + write(json_unit, json_int_fmt) 'n_cfg', N_configuration + if (only_expected_s2) then + write(json_unit, json_int_fmt) 'n_csf', N_csf + endif + endif + write(json_unit, json_array_open_fmt) 'states' + do k=1,N_states_p + write(json_unit, json_dict_uopen_fmt) + write(json_unit, json_real_fmt) 'energy', psi_energy_with_nucl_rep(k) + write(json_unit, json_real_fmt) 's2', psi_s2(k) + write(json_unit, json_real_fmt) 'pt2', pt2_data % pt2(k) + write(json_unit, json_real_fmt) 'pt2_err', pt2_data_err % pt2(k) + write(json_unit, json_real_fmt) 'rpt2', pt2_data % rpt2(k) + write(json_unit, json_real_fmt) 'rpt2_err', pt2_data_err % rpt2(k) + write(json_unit, json_real_fmt) 'variance', pt2_data % variance(k) + write(json_unit, json_real_fmt) 'variance_err', pt2_data_err % variance(k) + write(json_unit, json_array_open_fmt) 'ex_energy' + do i=2,N_iter_p + write(json_unit, fmtk(i)) extrapolated_energy(i,k) + enddo + write(json_unit, json_array_close_fmtx) + if (k < N_states_p) then + write(json_unit, json_dict_close_fmt) + else + write(json_unit, json_dict_close_fmtx) + endif + enddo + write(json_unit, json_array_close_fmtx) + write(json_unit, json_dict_close_fmt) + deallocate(fmtk) + call unlock_io +end diff --git a/src/fci/fci.irp.f b/src/fci/fci.irp.f index 9d9c0b7d..bb2a93f8 100644 --- a/src/fci/fci.irp.f +++ b/src/fci/fci.irp.f @@ -39,12 +39,19 @@ program fci if (.not.is_zmq_slave) then PROVIDE psi_det psi_coef mo_two_e_integrals_in_map + write(json_unit,json_array_open_fmt) 'fci' + if (do_pt2) then call run_stochastic_cipsi else call run_cipsi endif + write(json_unit,json_dict_uopen_fmt) + write(json_unit,json_dict_close_fmtx) + write(json_unit,json_array_close_fmtx) + call json_close + else PROVIDE mo_two_e_integrals_in_map pt2_min_parallel_tasks diff --git a/src/hartree_fock/scf.irp.f b/src/hartree_fock/scf.irp.f index d22b11ab..a361c04f 100644 --- a/src/hartree_fock/scf.irp.f +++ b/src/hartree_fock/scf.irp.f @@ -80,15 +80,14 @@ subroutine run mo_label = 'Orthonormalized' - write(json_unit,*) '"scf" : [' + write(json_unit,json_array_open_fmt) 'scf' call Roothaan_Hall_SCF - call ezfio_set_hartree_fock_energy(SCF_energy) - - write(json_unit,*) ']' + write(json_unit,json_array_close_fmtx) call json_close + call ezfio_set_hartree_fock_energy(SCF_energy) end diff --git a/src/iterations/EZFIO.cfg b/src/iterations/EZFIO.cfg deleted file mode 100644 index 2a5e94a7..00000000 --- a/src/iterations/EZFIO.cfg +++ /dev/null @@ -1,24 +0,0 @@ -[n_iter] -interface: ezfio -doc: Number of saved iterations -type:integer -default: 1 - -[n_det_iterations] -interface: ezfio, provider -doc: Number of determinants at each iteration -type: integer -size: (100) - -[energy_iterations] -interface: ezfio, provider -doc: The variational energy at each iteration -type: double precision -size: (determinants.n_states,100) - -[pt2_iterations] -interface: ezfio, provider -doc: The |PT2| correction at each iteration -type: double precision -size: (determinants.n_states,100) - diff --git a/src/iterations/io.irp.f b/src/iterations/io.irp.f deleted file mode 100644 index 821f5e84..00000000 --- a/src/iterations/io.irp.f +++ /dev/null @@ -1,37 +0,0 @@ -BEGIN_PROVIDER [ integer, n_iter ] - implicit none - BEGIN_DOC -! number of iterations - END_DOC - - logical :: has - PROVIDE ezfio_filename - if (mpi_master) then - - double precision :: zeros(N_states,100) - integer :: izeros(100) - zeros = 0.d0 - izeros = 0 - call ezfio_set_iterations_n_iter(0) - call ezfio_set_iterations_energy_iterations(zeros) - call ezfio_set_iterations_pt2_iterations(zeros) - call ezfio_set_iterations_n_det_iterations(izeros) - n_iter = 1 - endif - IRP_IF MPI_DEBUG - print *, irp_here, mpi_rank - call MPI_BARRIER(MPI_COMM_WORLD, ierr) - IRP_ENDIF - IRP_IF MPI - include 'mpif.h' - integer :: ierr - call MPI_BCAST( n_iter, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - if (ierr /= MPI_SUCCESS) then - stop 'Unable to read n_iter with MPI' - endif - IRP_ENDIF - - call write_time(6) - -END_PROVIDER - diff --git a/src/iterations/iterations.irp.f b/src/iterations/iterations.irp.f index 2c9faaf8..d06d1b6e 100644 --- a/src/iterations/iterations.irp.f +++ b/src/iterations/iterations.irp.f @@ -1,42 +1,65 @@ -BEGIN_PROVIDER [ double precision, extrapolated_energy, (N_iter,N_states) ] - implicit none - BEGIN_DOC - ! Extrapolated energy, using E_var = f(PT2) where PT2=0 - END_DOC - integer :: i - do i=1,min(N_states,N_det) - call extrapolate_data(N_iter, & - energy_iterations(i,1:N_iter), & - pt2_iterations(i,1:N_iter), & - extrapolated_energy(1:N_iter,i)) - enddo -END_PROVIDER - - -subroutine save_iterations(e_, pt2_,n_) +BEGIN_PROVIDER [ integer, N_iter ] implicit none BEGIN_DOC -! Update the energy in the EZFIO file. +! Number of CIPSI iterations END_DOC - integer, intent(in) :: n_ - double precision, intent(in) :: e_(N_states), pt2_(N_states) - integer :: i - if (N_iter == 101) then - do i=2,N_iter-1 - energy_iterations(1:N_states,N_iter-1) = energy_iterations(1:N_states,N_iter) - pt2_iterations(1:N_states,N_iter-1) = pt2_iterations(1:N_states,N_iter) + N_iter = 0 +END_PROVIDER + +BEGIN_PROVIDER [ integer, N_iter_max ] + implicit none + BEGIN_DOC + ! Max number of iterations for extrapolations + END_DOC + N_iter_max = 8 +END_PROVIDER + + BEGIN_PROVIDER [ double precision, energy_iterations , (n_states,N_iter_max) ] +&BEGIN_PROVIDER [ double precision, pt2_iterations , (n_states,N_iter_max) ] +&BEGIN_PROVIDER [ double precision, extrapolated_energy, (N_iter_max,N_states) ] + implicit none + BEGIN_DOC +! The energy at each iteration for the extrapolations + END_DOC + + energy_iterations = 0.d0 + pt2_iterations = 0.d0 + extrapolated_energy = 0.d0 +END_PROVIDER + +subroutine increment_n_iter(e, pt2_data) + use selection_types + implicit none + BEGIN_DOC +! Does what is necessary to increment n_iter + END_DOC + double precision, intent(in) :: e(*) + type(pt2_type), intent(in) :: pt2_data + integer :: k, i + + if (N_det < N_states) return + + if (N_iter < N_iter_max) then + N_iter += 1 + else + do k=2,N_iter + energy_iterations(1:N_states,k-1) = energy_iterations(1:N_states,k) + pt2_iterations(1:N_states,k-1) = pt2_iterations(1:N_states,k) enddo - N_iter = N_iter-1 - TOUCH N_iter endif + energy_iterations(1:N_states,N_iter) = e(1:N_states) + pt2_iterations(1:N_states,N_iter) = pt2_data % rpt2(1:N_states) - energy_iterations(1:N_states,N_iter) = e_(1:N_states) - pt2_iterations(1:N_states,N_iter) = pt2_(1:N_states) - n_det_iterations(N_iter) = n_ - call ezfio_set_iterations_N_iter(N_iter) - call ezfio_set_iterations_energy_iterations(energy_iterations) - call ezfio_set_iterations_pt2_iterations(pt2_iterations) - call ezfio_set_iterations_n_det_iterations(n_det_iterations) + if (N_iter < 2) then + extrapolated_energy(1,:) = energy_iterations(:,1) + pt2_iterations(:,1) + extrapolated_energy(2,:) = energy_iterations(:,2) + pt2_iterations(:,2) + else + do i=1,N_states + call extrapolate_data(N_iter, & + energy_iterations(i,1:N_iter), & + pt2_iterations(i,1:N_iter), & + extrapolated_energy(1:N_iter,i)) + enddo + endif end - diff --git a/src/iterations/print_extrapolation.irp.f b/src/iterations/print_extrapolation.irp.f index cb46fb67..111429bf 100644 --- a/src/iterations/print_extrapolation.irp.f +++ b/src/iterations/print_extrapolation.irp.f @@ -5,10 +5,14 @@ subroutine print_extrapolated_energy END_DOC integer :: i,k + integer :: N_states_p, N_iter_p if (N_iter< 2) then return endif + N_states_p = min(N_states,N_det) + N_iter_p = min(N_iter, 8) + write(*,'(A)') '' write(*,'(A)') 'Extrapolated energies' write(*,'(A)') '------------------------' @@ -20,20 +24,20 @@ subroutine print_extrapolated_energy write(*,*) '=========== ', '===================' write(*,*) 'minimum PT2 ', 'Extrapolated energy' write(*,*) '=========== ', '===================' - do k=2,min(N_iter,8) - write(*,'(F11.4,2X,F18.8)') pt2_iterations(1,N_iter+1-k), extrapolated_energy(k,1) + do k=2,N_iter_p + write(*,'(F11.4,2X,F18.8)') pt2_iterations(1,k), extrapolated_energy(k,1) enddo write(*,*) '=========== ', '===================' - do i=2, min(N_states,N_det) + do i=2, N_states_p print *, '' print *, 'State ', i print *, '' write(*,*) '=========== ', '=================== ', '=================== ', '===================' write(*,*) 'minimum PT2 ', 'Extrapolated energy ', ' Excitation (a.u) ', ' Excitation (eV) ' write(*,*) '=========== ', '=================== ', '=================== ', '===================' - do k=2,min(N_iter,8) - write(*,'(F11.4,X,3(X,F18.8))') pt2_iterations(i,N_iter+1-k), extrapolated_energy(k,i), & + do k=2,N_iter_p + write(*,'(F11.4,X,3(X,F18.8))') pt2_iterations(i,k), extrapolated_energy(k,i), & extrapolated_energy(k,i) - extrapolated_energy(k,1), & (extrapolated_energy(k,i) - extrapolated_energy(k,1) ) * 27.211396641308d0 enddo diff --git a/src/json/json_formats.irp.f b/src/json/json_formats.irp.f index 14a8f014..773114ba 100644 --- a/src/json/json_formats.irp.f +++ b/src/json/json_formats.irp.f @@ -8,6 +8,16 @@ &BEGIN_PROVIDER [ character*(64), json_true_fmtx ] &BEGIN_PROVIDER [ character*(64), json_false_fmt ] &BEGIN_PROVIDER [ character*(64), json_false_fmtx ] +&BEGIN_PROVIDER [ character*(64), json_array_open_fmt ] +&BEGIN_PROVIDER [ character*(64), json_array_uopen_fmt ] +&BEGIN_PROVIDER [ character*(64), json_array_close_fmt ] +&BEGIN_PROVIDER [ character*(64), json_array_close_uopen_fmt ] +&BEGIN_PROVIDER [ character*(64), json_array_close_fmtx ] +&BEGIN_PROVIDER [ character*(64), json_dict_open_fmt ] +&BEGIN_PROVIDER [ character*(64), json_dict_uopen_fmt ] +&BEGIN_PROVIDER [ character*(64), json_dict_close_uopen_fmt ] +&BEGIN_PROVIDER [ character*(64), json_dict_close_fmt ] +&BEGIN_PROVIDER [ character*(64), json_dict_close_fmtx ] implicit none BEGIN_DOC ! Formats for JSON output. @@ -23,4 +33,14 @@ json_true_fmtx = '('' "'',A,''": true'')' json_false_fmt = '('' "'',A,''": false,'')' json_false_fmtx = '('' "'',A,''": false'')' + json_array_open_fmt = '('' "'',A,''": ['')' + json_array_uopen_fmt = '('' ['')' + json_array_close_fmt = '('' ],'')' + json_array_close_uopen_fmt = '('' ], ['')' + json_array_close_fmtx = '('' ]'')' + json_dict_open_fmt = '('' "'',A,''": {'')' + json_dict_uopen_fmt = '('' {'')' + json_dict_close_fmt = '('' },'')' + json_dict_close_uopen_fmt = '('' }, {'')' + json_dict_close_fmtx = '('' }'')' END_PROVIDER diff --git a/src/scf_utils/roothaan_hall_scf.irp.f b/src/scf_utils/roothaan_hall_scf.irp.f index 08fe7acf..cf006035 100644 --- a/src/scf_utils/roothaan_hall_scf.irp.f +++ b/src/scf_utils/roothaan_hall_scf.irp.f @@ -155,9 +155,9 @@ END_DOC call lock_io if (iteration_SCF == 1) then - write(json_unit, *) '{' + write(json_unit, json_dict_uopen_fmt) else - write(json_unit, *) '}, {' + write(json_unit, json_dict_close_uopen_fmt) endif write(json_unit, json_int_fmt) 'iteration', iteration_SCF write(json_unit, json_real_fmt) 'energy', energy_SCF @@ -185,7 +185,7 @@ END_DOC if (qp_stop()) exit enddo - write(json_unit, *) '}' + write(json_unit, json_dict_close_fmtx) if (iteration_SCF < n_it_SCF_max) then mo_label = 'Canonical' From dd5291d90dca05f50976b10d42d9da5657641058 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 24 Apr 2023 01:01:31 +0200 Subject: [PATCH 054/337] Added exc_energy_error.py --- scripts/exc_energy_error.py | 186 ++++++++++++++++++++++++++++++++++++ 1 file changed, 186 insertions(+) create mode 100755 scripts/exc_energy_error.py diff --git a/scripts/exc_energy_error.py b/scripts/exc_energy_error.py new file mode 100755 index 00000000..ba9d7917 --- /dev/null +++ b/scripts/exc_energy_error.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python +# Computes the error on the excitation energy of a CIPSI run. + +def student(p,df): + import scipy + from scipy.stats import t + return t.ppf(p, df) + + +def chi2cdf(x,k): + import scipy + import scipy.stats + return scipy.stats.chi2.cdf(x,k) + + +def jarque_bera(data): + + n = max(len(data), 2) + norm = 1./ sum( [ w for (_,w) in data ] ) + + mu = sum( [ w* x for (x,w) in data ] ) * norm + sigma2 = sum( [ w*(x-mu)**2 for (x,w) in data ] ) * norm + if sigma2 > 0.: + S = ( sum( [ w*(x-mu)**3 for (x,w) in data ] ) * norm ) / sigma2**(3./2.) + K = ( sum( [ w*(x-mu)**4 for (x,w) in data ] ) * norm ) / sigma2**2 + else: + S = 0. + K = 0. + + # Value of the Jarque-Bera test + JB = n/6. * (S**2 + 1./4. * (K-3.)**2) + + # Probability that the data comes from a Gaussian distribution + p = 1. - chi2cdf(JB,2) + + return JB, mu, sqrt(sigma2/(n-1)), p + + + +to_eV = 27.2107362681 +import sys, os +import scipy +import scipy.stats +from math import sqrt, gamma, exp +import json + + +def read_data(filename,state): + """ Read energies and PT2 from input file """ + with open(filename,'r') as f: + lines = json.load(f)['fci'] + + print(f"State: {state}") + + gs = [] + es = [] + + for l in lines: + try: + pt2_0 = l['states'][0]['pt2'] + e_0 = l['states'][0]['energy'] + pt2_1 = l['states'][state]['pt2'] + e_1 = l['states'][state]['energy'] + gs.append( (e_0, pt2_0) ) + es.append( (e_1, pt2_1) ) + except: pass + + def f(p_1, p0, p1): + e, pt2 = p0 + y0, x0 = p_1 + y1, x1 = p1 + try: + alpha = (y1-y0)/(x0-x1) + except ZeroDivisionError: + alpha = 1. + return [e, pt2, alpha] + + for l in (gs, es): + p_1, p0, p1 = l[0], l[0], l[1] + l[0] = f(p_1, p0, p1) + + for i in range(1,len(l)-1): + p_1 = (l[i-1][0], l[i-1][1]) + p0 = l[i] + p1 = l[i+1] + l[i] = f(p_1, p0, p1) + + i = len(l)-1 + p_1 = (l[i-1][0], l[i-1][1]) + p0 = l[i] + p1 = l[-1] + l[i] = f(p_1, p0, p1) + + return [ x+y for x,y in zip(gs,es) ] + + +def compute(data): + + d = [] + for e0, p0, a0, e1, p1, a1 in data: + x = (e1+p1)-(e0+p0) + w = 1./sqrt(p0**2 + p1**2) + bias = (a1-1.)*p1 - (a0-1.)*p0 + d.append( (x,w,bias) ) + + x = [] + target = (scipy.stats.norm.cdf(1.)-0.5)*2 # = 0.6827 + + print("| %2s | %8s | %8s | %8s | %8s | %8s |"%( "N", "DE", "+/-", "bias", "P(G)", "J")) + print("|----+----------+----------+----------+----------+----------|") + xmax = (0.,0.,0.,0.,0.,0,0.) + for i in range(len(data)-1): + jb, mu, sigma, p = jarque_bera( [ (x,w) for (x,w,bias) in d[i:] ] ) + bias = sum ( [ w * e for (_,w,e) in d[i:] ] ) / sum ( [ w for (_,w,_) in d[i:] ] ) + mu = (mu+0.5*bias) * to_eV + sigma = sigma * to_eV + bias = bias * to_eV + n = len(data[i:]) + beta = student(0.5*(1.+target/p) ,n) + err = sigma * beta + 0.5*abs(bias) + print("| %2d | %8.3f | %8.3f | %8.3f | %8.3f | %8.3f |"%( n, mu, err, bias, p, jb)) + if n < 3 : + continue + y = (err, p, mu, err, jb,n,bias) + if p > xmax[1]: xmax = y + if p < 0.8: + continue + x.append(y) + + x = sorted(x) + + print("|----+----------+----------+----------+----------+----------|") + if x != []: + xmax = x[0] + _, p, mu, err, jb, n, bias = xmax + beta = student(0.5*(1.+target/p),n) + print("| %2d | %8.3f | %8.3f | %8.3f | %8.3f | %8.3f |\n"%(n, mu, err, bias, p, jb)) + + return mu, err, bias, p + +filename = sys.argv[1] +print(filename) +if len(sys.argv) > 2: + state = int(sys.argv[2]) +else: + state = 1 +data = read_data(filename,state) +mu, err, bias, _ = compute(data) +print(" %s: %8.3f +/- %5.3f eV\n"%(filename, mu, err)) + +import numpy as np +A = np.array( [ [ data[-1][1], 1. ], + [ data[-2][1], 1. ] ] ) +B = np.array( [ [ data[-1][0] ], + [ data[-2][0] ] ] ) +E0 = np.linalg.solve(A,B)[1] +A = np.array( [ [ data[-1][4], 1. ], + [ data[-2][4], 1. ] ] ) +B = np.array( [ [ data[-1][3] ], + [ data[-2][3] ] ] ) +E1 = np.linalg.solve(A,B)[1] +average_2 = (E1-E0)*to_eV + +A = np.array( [ [ data[-1][1], 1. ], + [ data[-2][1], 1. ], + [ data[-3][1], 1. ] ] ) +B = np.array( [ [ data[-1][0] ], + [ data[-2][0] ], + [ data[-3][0] ] ] ) +E0 = np.linalg.lstsq(A,B,rcond=None)[0][1] +A = np.array( [ [ data[-1][4], 1. ], + [ data[-2][4], 1. ], + [ data[-3][4], 1. ] ] ) +B = np.array( [ [ data[-1][3] ], + [ data[-2][3] ], + [ data[-3][3] ] ] ) +E1 = np.linalg.lstsq(A,B,rcond=None)[0][1] +average_3 = (E1-E0)*to_eV + +exc = ((data[-1][3] + data[-1][4]) - (data[-1][0] + data[-1][1])) * to_eV +error_2 = abs(average_2 - average_3) +error_3 = abs(average_3 - exc) +print(" 2-3 points: %.3f +/- %.3f "% (average_3, error_2)) +print(" largest wf: %.3f +/- %.3f "%(average_3, error_3)) + + From 918839fbf6636c36ae9752b2d029ff5f31d157d3 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 24 Apr 2023 01:22:24 +0200 Subject: [PATCH 055/337] Added JSON in FCI_TC --- src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f | 10 +++++++++- src/fci_tc_bi/fci_tc_bi_ortho.irp.f | 6 ++++++ src/tc_bi_ortho/h_tc_s2_u0.irp.f | 20 ++++++++++---------- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f index 1f4fe849..284a1e2e 100644 --- a/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f +++ b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f @@ -94,7 +94,15 @@ subroutine run_stochastic_cipsi call ZMQ_pt2(E_denom, pt2_data, pt2_data_err, relative_error,to_select) ! Stochastic PT2 and selection ! stop - N_iter += 1 + call print_summary(psi_energy_with_nucl_rep, & + pt2_data, pt2_data_err, N_det,N_configuration,N_states,psi_s2) + + call save_energy(psi_energy_with_nucl_rep, pt2_data % pt2) + + call increment_n_iter(psi_energy_with_nucl_rep, pt2_data) + call print_extrapolated_energy() + call print_mol_properties() + call write_cipsi_json(pt2_data,pt2_data_err) if (qp_stop()) exit diff --git a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f index 84ac8166..ed75c882 100644 --- a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f +++ b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f @@ -62,6 +62,7 @@ subroutine run_cipsi_tc endif endif ! --- + write(json_unit,json_array_open_fmt) 'fci_tc' if (do_pt2) then call run_stochastic_cipsi @@ -69,6 +70,11 @@ subroutine run_cipsi_tc call run_cipsi endif + write(json_unit,json_dict_uopen_fmt) + write(json_unit,json_dict_close_fmtx) + write(json_unit,json_array_close_fmtx) + call json_close + else PROVIDE mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e pt2_min_parallel_tasks if(elec_alpha_num+elec_beta_num.ge.3)then diff --git a/src/tc_bi_ortho/h_tc_s2_u0.irp.f b/src/tc_bi_ortho/h_tc_s2_u0.irp.f index 30b0f273..b9b85a96 100644 --- a/src/tc_bi_ortho/h_tc_s2_u0.irp.f +++ b/src/tc_bi_ortho/h_tc_s2_u0.irp.f @@ -12,9 +12,9 @@ subroutine get_H_tc_s2_l0_r0(l_0,r_0,N_st,sze,energies, s2) ! istart, iend, ishift, istep are used in ZMQ parallelization. END_DOC integer, intent(in) :: N_st,sze - double precision, intent(in) :: l_0(sze,N_st), r_0(sze,N_st) + double precision, intent(inout) :: l_0(sze,N_st), r_0(sze,N_st) double precision, intent(out) :: energies(N_st), s2(N_st) - logical :: do_right + logical :: do_right integer :: istate double precision, allocatable :: s_0(:,:), v_0(:,:) double precision :: u_dot_v, norm @@ -40,7 +40,7 @@ subroutine H_tc_s2_u_0_opt(v_0,s_0,u_0,N_st,sze) END_DOC integer, intent(in) :: N_st,sze double precision, intent(inout) :: v_0(sze,N_st), u_0(sze,N_st), s_0(sze,N_st) - logical :: do_right + logical :: do_right do_right = .True. call H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right) end @@ -57,7 +57,7 @@ subroutine H_tc_s2_dagger_u_0_opt(v_0,s_0,u_0,N_st,sze) END_DOC integer, intent(in) :: N_st,sze double precision, intent(inout) :: v_0(sze,N_st), u_0(sze,N_st), s_0(sze,N_st) - logical :: do_right + logical :: do_right do_right = .False. call H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right) end @@ -77,7 +77,7 @@ subroutine H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right) END_DOC integer, intent(in) :: N_st,sze double precision, intent(inout) :: v_0(sze,N_st), u_0(sze,N_st), s_0(sze,N_st) - logical, intent(in) :: do_right + logical, intent(in) :: do_right integer :: k double precision, allocatable :: u_t(:,:), v_t(:,:), s_t(:,:) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t @@ -124,7 +124,7 @@ subroutine H_tc_s2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,istart,iend,ishi use bitmasks implicit none BEGIN_DOC - ! Computes $v_t = H | u_t\rangle$ + ! Computes $v_t = H | u_t\rangle$ ! ! Default should be 1,N_det,0,1 ! @@ -132,7 +132,7 @@ subroutine H_tc_s2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,istart,iend,ishi END_DOC integer, intent(in) :: N_st,sze,istart,iend,ishift,istep double precision, intent(in) :: u_t(N_st,N_det) - logical, intent(in) :: do_right + logical, intent(in) :: do_right double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze) @@ -165,7 +165,7 @@ subroutine H_tc_s2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,ie END_DOC integer, intent(in) :: N_st,sze,istart,iend,ishift,istep double precision, intent(in) :: u_t(N_st,N_det) - logical, intent(in) :: do_right + logical, intent(in) :: do_right double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze) double precision :: hij, sij @@ -542,7 +542,7 @@ compute_singles=.True. lrow = psi_bilinear_matrix_rows(l_a) ASSERT (lrow <= N_det_alpha_unique) - tmp_det2(1:N_int,1) = psi_det_alpha_unique(1:N_int, lrow) + tmp_det2(1:N_int,1) = psi_det_alpha_unique(1:N_int, lrow) ! call i_H_j( tmp_det, tmp_det2, $N_int, hij) ! call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, lrow), $N_int, hij) if(do_right)then @@ -693,7 +693,7 @@ compute_singles=.True. lcol = psi_bilinear_matrix_transp_columns(l_b) ASSERT (lcol <= N_det_beta_unique) - tmp_det2(1:N_int,2) = psi_det_beta_unique(1:N_int, lcol) + tmp_det2(1:N_int,2) = psi_det_beta_unique(1:N_int, lcol) ! call i_H_j( tmp_det, tmp_det2, $N_int, hij) ! call i_H_j_double_spin( tmp_det(1,2), psi_det_beta_unique(1, lcol), $N_int, hij) if(do_right)then From 54a88fe4caf88dc8be032d922fe87c0b250deab5 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 24 Apr 2023 01:32:05 +0200 Subject: [PATCH 056/337] Added JSON to fci_tc_bi --- src/cipsi_tc_bi_ortho/NEED | 3 +- src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f | 2 +- src/cipsi_tc_bi_ortho/write_cipsi_json.irp.f | 53 ++++++++++ src/fci_tc_bi/NEED | 1 + src/fci_tc_bi/save_energy.irp.f | 4 +- src/iterations_tc/EZFIO.cfg | 24 ----- src/iterations_tc/NEED | 0 src/iterations_tc/io.irp.f | 37 ------- src/iterations_tc/iterations.irp.f | 43 -------- src/iterations_tc/print_extrapolation.irp.f | 46 -------- src/iterations_tc/print_summary.irp.f | 104 ------------------- 11 files changed, 59 insertions(+), 258 deletions(-) create mode 100644 src/cipsi_tc_bi_ortho/write_cipsi_json.irp.f delete mode 100644 src/iterations_tc/EZFIO.cfg delete mode 100644 src/iterations_tc/NEED delete mode 100644 src/iterations_tc/io.irp.f delete mode 100644 src/iterations_tc/iterations.irp.f delete mode 100644 src/iterations_tc/print_extrapolation.irp.f delete mode 100644 src/iterations_tc/print_summary.irp.f diff --git a/src/cipsi_tc_bi_ortho/NEED b/src/cipsi_tc_bi_ortho/NEED index 4dd1af36..8f05be69 100644 --- a/src/cipsi_tc_bi_ortho/NEED +++ b/src/cipsi_tc_bi_ortho/NEED @@ -1,6 +1,7 @@ +json mpi perturbation zmq -iterations_tc +iterations csf tc_bi_ortho diff --git a/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f index 284a1e2e..a06f28e9 100644 --- a/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f +++ b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f @@ -101,7 +101,7 @@ subroutine run_stochastic_cipsi call increment_n_iter(psi_energy_with_nucl_rep, pt2_data) call print_extrapolated_energy() - call print_mol_properties() +! call print_mol_properties() call write_cipsi_json(pt2_data,pt2_data_err) if (qp_stop()) exit diff --git a/src/cipsi_tc_bi_ortho/write_cipsi_json.irp.f b/src/cipsi_tc_bi_ortho/write_cipsi_json.irp.f new file mode 100644 index 00000000..98a402a2 --- /dev/null +++ b/src/cipsi_tc_bi_ortho/write_cipsi_json.irp.f @@ -0,0 +1,53 @@ +subroutine write_cipsi_json(pt2_data, pt2_data_err) + use selection_types + implicit none + BEGIN_DOC +! Writes JSON data for CIPSI runs + END_DOC + type(pt2_type), intent(in) :: pt2_data, pt2_data_err + integer :: i,j,k + + call lock_io + character*(64), allocatable :: fmtk(:) + integer :: N_states_p, N_iter_p + N_states_p = min(N_states,N_det) + N_iter_p = min(N_iter,8) + allocate(fmtk(0:N_iter_p)) + fmtk(:) = '('' '',E22.15,'','')' + fmtk(N_iter_p) = '('' '',E22.15)' + + write(json_unit, json_dict_uopen_fmt) + write(json_unit, json_int_fmt) 'n_det', N_det + if (s2_eig) then + write(json_unit, json_int_fmt) 'n_cfg', N_configuration + if (only_expected_s2) then + write(json_unit, json_int_fmt) 'n_csf', N_csf + endif + endif + write(json_unit, json_array_open_fmt) 'states' + do k=1,N_states_p + write(json_unit, json_dict_uopen_fmt) + write(json_unit, json_real_fmt) 'energy', psi_energy_with_nucl_rep(k) + write(json_unit, json_real_fmt) 's2', psi_s2(k) + write(json_unit, json_real_fmt) 'pt2', pt2_data % pt2(k) + write(json_unit, json_real_fmt) 'pt2_err', pt2_data_err % pt2(k) + write(json_unit, json_real_fmt) 'rpt2', pt2_data % rpt2(k) + write(json_unit, json_real_fmt) 'rpt2_err', pt2_data_err % rpt2(k) + write(json_unit, json_real_fmt) 'variance', pt2_data % variance(k) + write(json_unit, json_real_fmt) 'variance_err', pt2_data_err % variance(k) + write(json_unit, json_array_open_fmt) 'ex_energy' + do i=2,N_iter_p + write(json_unit, fmtk(i)) extrapolated_energy(i,k) + enddo + write(json_unit, json_array_close_fmtx) + if (k < N_states_p) then + write(json_unit, json_dict_close_fmt) + else + write(json_unit, json_dict_close_fmtx) + endif + enddo + write(json_unit, json_array_close_fmtx) + write(json_unit, json_dict_close_fmt) + deallocate(fmtk) + call unlock_io +end diff --git a/src/fci_tc_bi/NEED b/src/fci_tc_bi/NEED index 000b0deb..3bb9515a 100644 --- a/src/fci_tc_bi/NEED +++ b/src/fci_tc_bi/NEED @@ -1,3 +1,4 @@ +json tc_bi_ortho davidson_undressed cipsi_tc_bi_ortho diff --git a/src/fci_tc_bi/save_energy.irp.f b/src/fci_tc_bi/save_energy.irp.f index 7c41d00f..421ae5f8 100644 --- a/src/fci_tc_bi/save_energy.irp.f +++ b/src/fci_tc_bi/save_energy.irp.f @@ -4,6 +4,6 @@ subroutine save_energy(E,pt2) ! Saves the energy in |EZFIO|. END_DOC double precision, intent(in) :: E(N_states), pt2(N_states) - call ezfio_set_fci_tc_energy(E(1:N_states)) - call ezfio_set_fci_tc_energy_pt2(E(1:N_states)+pt2(1:N_states)) + call ezfio_set_fci_tc_bi_energy(E(1:N_states)) + call ezfio_set_fci_tc_bi_energy_pt2(E(1:N_states)+pt2(1:N_states)) end diff --git a/src/iterations_tc/EZFIO.cfg b/src/iterations_tc/EZFIO.cfg deleted file mode 100644 index 2a5e94a7..00000000 --- a/src/iterations_tc/EZFIO.cfg +++ /dev/null @@ -1,24 +0,0 @@ -[n_iter] -interface: ezfio -doc: Number of saved iterations -type:integer -default: 1 - -[n_det_iterations] -interface: ezfio, provider -doc: Number of determinants at each iteration -type: integer -size: (100) - -[energy_iterations] -interface: ezfio, provider -doc: The variational energy at each iteration -type: double precision -size: (determinants.n_states,100) - -[pt2_iterations] -interface: ezfio, provider -doc: The |PT2| correction at each iteration -type: double precision -size: (determinants.n_states,100) - diff --git a/src/iterations_tc/NEED b/src/iterations_tc/NEED deleted file mode 100644 index e69de29b..00000000 diff --git a/src/iterations_tc/io.irp.f b/src/iterations_tc/io.irp.f deleted file mode 100644 index 821f5e84..00000000 --- a/src/iterations_tc/io.irp.f +++ /dev/null @@ -1,37 +0,0 @@ -BEGIN_PROVIDER [ integer, n_iter ] - implicit none - BEGIN_DOC -! number of iterations - END_DOC - - logical :: has - PROVIDE ezfio_filename - if (mpi_master) then - - double precision :: zeros(N_states,100) - integer :: izeros(100) - zeros = 0.d0 - izeros = 0 - call ezfio_set_iterations_n_iter(0) - call ezfio_set_iterations_energy_iterations(zeros) - call ezfio_set_iterations_pt2_iterations(zeros) - call ezfio_set_iterations_n_det_iterations(izeros) - n_iter = 1 - endif - IRP_IF MPI_DEBUG - print *, irp_here, mpi_rank - call MPI_BARRIER(MPI_COMM_WORLD, ierr) - IRP_ENDIF - IRP_IF MPI - include 'mpif.h' - integer :: ierr - call MPI_BCAST( n_iter, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) - if (ierr /= MPI_SUCCESS) then - stop 'Unable to read n_iter with MPI' - endif - IRP_ENDIF - - call write_time(6) - -END_PROVIDER - diff --git a/src/iterations_tc/iterations.irp.f b/src/iterations_tc/iterations.irp.f deleted file mode 100644 index 2f1cf0c1..00000000 --- a/src/iterations_tc/iterations.irp.f +++ /dev/null @@ -1,43 +0,0 @@ -BEGIN_PROVIDER [ double precision, extrapolated_energy, (N_iter,N_states) ] - implicit none - BEGIN_DOC - ! Extrapolated energy, using E_var = f(PT2) where PT2=0 - END_DOC -! integer :: i - extrapolated_energy = 0.D0 -END_PROVIDER - - subroutine get_extrapolated_energy(Niter,ept2,pt1,extrap_energy) - implicit none - integer, intent(in) :: Niter - double precision, intent(in) :: ept2(Niter),pt1(Niter),extrap_energy(Niter) - call extrapolate_data(Niter,ept2,pt1,extrap_energy) - end - -subroutine save_iterations(e_, pt2_,n_) - implicit none - BEGIN_DOC -! Update the energy in the EZFIO file. - END_DOC - integer, intent(in) :: n_ - double precision, intent(in) :: e_(N_states), pt2_(N_states) - integer :: i - - if (N_iter == 101) then - do i=2,N_iter-1 - energy_iterations(1:N_states,N_iter-1) = energy_iterations(1:N_states,N_iter) - pt2_iterations(1:N_states,N_iter-1) = pt2_iterations(1:N_states,N_iter) - enddo - N_iter = N_iter-1 - TOUCH N_iter - endif - - energy_iterations(1:N_states,N_iter) = e_(1:N_states) - pt2_iterations(1:N_states,N_iter) = pt2_(1:N_states) - n_det_iterations(N_iter) = n_ - call ezfio_set_iterations_N_iter(N_iter) - call ezfio_set_iterations_energy_iterations(energy_iterations) - call ezfio_set_iterations_pt2_iterations(pt2_iterations) - call ezfio_set_iterations_n_det_iterations(n_det_iterations) -end - diff --git a/src/iterations_tc/print_extrapolation.irp.f b/src/iterations_tc/print_extrapolation.irp.f deleted file mode 100644 index cb46fb67..00000000 --- a/src/iterations_tc/print_extrapolation.irp.f +++ /dev/null @@ -1,46 +0,0 @@ -subroutine print_extrapolated_energy - implicit none - BEGIN_DOC -! Print the extrapolated energy in the output - END_DOC - - integer :: i,k - - if (N_iter< 2) then - return - endif - write(*,'(A)') '' - write(*,'(A)') 'Extrapolated energies' - write(*,'(A)') '------------------------' - write(*,'(A)') '' - - print *, '' - print *, 'State ', 1 - print *, '' - write(*,*) '=========== ', '===================' - write(*,*) 'minimum PT2 ', 'Extrapolated energy' - write(*,*) '=========== ', '===================' - do k=2,min(N_iter,8) - write(*,'(F11.4,2X,F18.8)') pt2_iterations(1,N_iter+1-k), extrapolated_energy(k,1) - enddo - write(*,*) '=========== ', '===================' - - do i=2, min(N_states,N_det) - print *, '' - print *, 'State ', i - print *, '' - write(*,*) '=========== ', '=================== ', '=================== ', '===================' - write(*,*) 'minimum PT2 ', 'Extrapolated energy ', ' Excitation (a.u) ', ' Excitation (eV) ' - write(*,*) '=========== ', '=================== ', '=================== ', '===================' - do k=2,min(N_iter,8) - write(*,'(F11.4,X,3(X,F18.8))') pt2_iterations(i,N_iter+1-k), extrapolated_energy(k,i), & - extrapolated_energy(k,i) - extrapolated_energy(k,1), & - (extrapolated_energy(k,i) - extrapolated_energy(k,1) ) * 27.211396641308d0 - enddo - write(*,*) '=========== ', '=================== ', '=================== ', '===================' - enddo - - print *, '' - -end subroutine - diff --git a/src/iterations_tc/print_summary.irp.f b/src/iterations_tc/print_summary.irp.f deleted file mode 100644 index 8e6285e2..00000000 --- a/src/iterations_tc/print_summary.irp.f +++ /dev/null @@ -1,104 +0,0 @@ -subroutine print_summary(e_,pt2_data,pt2_data_err,n_det_,n_configuration_,n_st,s2_) - use selection_types - implicit none - BEGIN_DOC -! Print the extrapolated energy in the output - END_DOC - - integer, intent(in) :: n_det_, n_configuration_, n_st - double precision, intent(in) :: e_(n_st), s2_(n_st) - type(pt2_type) , intent(in) :: pt2_data, pt2_data_err - integer :: i, k - integer :: N_states_p - character*(9) :: pt2_string - character*(512) :: fmt - - if (do_pt2) then - pt2_string = ' ' - else - pt2_string = '(approx)' - endif - - N_states_p = min(N_det_,n_st) - - print *, '' - print '(A,I12)', 'Summary at N_det = ', N_det_ - print '(A)', '-----------------------------------' - print *, '' - - write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' - write(*,fmt) - write(fmt,*) '(13X,', N_states_p, '(6X,A7,1X,I6,10X))' - write(*,fmt) ('State',k, k=1,N_states_p) - write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' - write(*,fmt) - write(fmt,*) '(A13,', N_states_p, '(1X,F14.8,15X))' - write(*,fmt) '# E ', e_(1:N_states_p) - if (N_states_p > 1) then - write(*,fmt) '# Excit. (au)', e_(1:N_states_p)-e_(1) - write(*,fmt) '# Excit. (eV)', (e_(1:N_states_p)-e_(1))*27.211396641308d0 - endif - write(fmt,*) '(A13,', 2*N_states_p, '(1X,F14.8))' - write(*,fmt) '# PT2 '//pt2_string, (pt2_data % pt2(k), pt2_data_err % pt2(k), k=1,N_states_p) - write(*,fmt) '# rPT2'//pt2_string, (pt2_data % rpt2(k), pt2_data_err % rpt2(k), k=1,N_states_p) - write(*,'(A)') '#' - write(*,fmt) '# E+PT2 ', (e_(k)+pt2_data % pt2(k),pt2_data_err % pt2(k), k=1,N_states_p) - write(*,fmt) '# E+rPT2 ', (e_(k)+pt2_data % rpt2(k),pt2_data_err % rpt2(k), k=1,N_states_p) - if (N_states_p > 1) then - write(*,fmt) '# Excit. (au)', ( (e_(k)+pt2_data % pt2(k)-e_(1)-pt2_data % pt2(1)), & - dsqrt(pt2_data_err % pt2(k)*pt2_data_err % pt2(k)+pt2_data_err % pt2(1)*pt2_data_err % pt2(1)), k=1,N_states_p) - write(*,fmt) '# Excit. (eV)', ( (e_(k)+pt2_data % pt2(k)-e_(1)-pt2_data % pt2(1))*27.211396641308d0, & - dsqrt(pt2_data_err % pt2(k)*pt2_data_err % pt2(k)+pt2_data_err % pt2(1)*pt2_data_err % pt2(1))*27.211396641308d0, k=1,N_states_p) - endif - write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))' - write(*,fmt) - print *, '' - - print *, 'N_det = ', N_det_ - print *, 'N_states = ', n_st - if (s2_eig) then - print *, 'N_cfg = ', N_configuration_ - if (only_expected_s2) then - print *, 'N_csf = ', N_csf - endif - endif - print *, '' - - do k=1, N_states_p - print*,'* State ',k - print *, '< S^2 > = ', s2_(k) - print *, 'E = ', e_(k) - print *, 'Variance = ', pt2_data % variance(k), ' +/- ', pt2_data_err % variance(k) - print *, 'PT norm = ', dsqrt(pt2_data % overlap(k,k)), ' +/- ', 0.5d0*dsqrt(pt2_data % overlap(k,k)) * pt2_data_err % overlap(k,k) / (pt2_data % overlap(k,k)) - print *, 'PT2 = ', pt2_data % pt2(k), ' +/- ', pt2_data_err % pt2(k) - print *, 'rPT2 = ', pt2_data % rpt2(k), ' +/- ', pt2_data_err % rpt2(k) - print *, 'E+PT2 '//pt2_string//' = ', e_(k)+pt2_data % pt2(k), ' +/- ', pt2_data_err % pt2(k) - print *, 'E+rPT2'//pt2_string//' = ', e_(k)+pt2_data % rpt2(k), ' +/- ', pt2_data_err % rpt2(k) - print *, '' - enddo - - print *, '-----' - if(n_st.gt.1)then - print *, 'Variational Energy difference (au | eV)' - do i=2, N_states_p - print*,'Delta E = ', (e_(i) - e_(1)), & - (e_(i) - e_(1)) * 27.211396641308d0 - enddo - print *, '-----' - print*, 'Variational + perturbative Energy difference (au | eV)' - do i=2, N_states_p - print*,'Delta E = ', (e_(i)+ pt2_data % pt2(i) - (e_(1) + pt2_data % pt2(1))), & - (e_(i)+ pt2_data % pt2(i) - (e_(1) + pt2_data % pt2(1))) * 27.211396641308d0 - enddo - print *, '-----' - print*, 'Variational + renormalized perturbative Energy difference (au | eV)' - do i=2, N_states_p - print*,'Delta E = ', (e_(i)+ pt2_data % rpt2(i) - (e_(1) + pt2_data % rpt2(1))), & - (e_(i)+ pt2_data % rpt2(i) - (e_(1) + pt2_data % rpt2(1))) * 27.211396641308d0 - enddo - endif - -! call print_energy_components() - -end subroutine - From 67902d437776d5caeb4f9292796baafaa9ca66fe Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 24 Apr 2023 01:36:49 +0200 Subject: [PATCH 057/337] Removed print_e_conv: should be replaced by python script --- src/tools/print_e_conv.irp.f | 80 ------------------------------------ 1 file changed, 80 deletions(-) delete mode 100644 src/tools/print_e_conv.irp.f diff --git a/src/tools/print_e_conv.irp.f b/src/tools/print_e_conv.irp.f deleted file mode 100644 index e358ebc1..00000000 --- a/src/tools/print_e_conv.irp.f +++ /dev/null @@ -1,80 +0,0 @@ -program print_e_conv - implicit none - BEGIN_DOC -! program that prints in a human readable format the convergence of the CIPSI algorithm. -! -! for all istate, this program produces -! -! * a file "EZFIO.istate.conv" containing the variational and var+PT2 energies as a function of N_det -! -! * for istate > 1, a file EZFIO.istate.delta_e.conv containing the energy difference (both var and var+PT2) with the ground state as a function of N_det - END_DOC - - provide ezfio_filename - call routine_e_conv - end - -subroutine routine_e_conv - implicit none - BEGIN_DOC -! routine called by :c:func:`print_e_conv` - END_DOC - integer :: N_iter_tmp - integer :: i,istate - character*(128) :: output - integer :: i_unit_output,getUnitAndOpen - character*(128) :: filename - - integer, allocatable :: n_det_tmp(:) - call ezfio_get_iterations_N_iter(N_iter_tmp) - print*,'N_iter_tmp = ',N_iter_tmp - double precision, allocatable :: e(:,:),pt2(:,:) - allocate(e(N_states, 100),pt2(N_states, 100),n_det_tmp(100)) - call ezfio_get_iterations_energy_iterations(e) - call ezfio_get_iterations_pt2_iterations(pt2) - call ezfio_get_iterations_n_det_iterations(n_det_tmp) - - - do istate = 1, N_states - if (istate.lt.10)then - write (filename, "(I1)")istate - else - write (filename, "(I2)")istate - endif - print*,filename - output=trim(ezfio_filename)//'.'//trim(filename)//'.conv' - output=trim(output) - print*,'output = ',trim(output) - i_unit_output = getUnitAndOpen(output,'w') - write(i_unit_output,*)'# N_det E_var E_var + PT2' - do i = 1, N_iter_tmp - write(i_unit_output,'(I9,X,3(F16.10,X))')n_det_tmp(i),e(istate,i),e(istate,i) + pt2(istate,i) - enddo - enddo - - if(N_states.gt.1)then - double precision, allocatable :: deltae(:,:),deltae_pt2(:,:) - allocate(deltae(N_states,100),deltae_pt2(N_states,100)) - do i = 1, N_iter_tmp - do istate = 1, N_states - deltae(istate,i) = e(istate,i) - e(1,i) - deltae_pt2(istate,i) = e(istate,i) + pt2(istate,i) - (e(1,i) + pt2(1,i)) - enddo - enddo - do istate = 2, N_states - if (istate.lt.10)then - write (filename, "(I1)")istate - else - write (filename, "(I2)")istate - endif - output=trim(ezfio_filename)//'.'//trim(filename)//'.delta_e.conv' - print*,'output = ',trim(output) - i_unit_output = getUnitAndOpen(output,'w') - write(i_unit_output,*)'# N_det Delta E_var Delta (E_var + PT2)' - do i = 1, N_iter_tmp - write(i_unit_output,'(I9,X,100(F16.10,X))')n_det_tmp(i),deltae(istate,i),deltae_pt2(istate,i) - enddo - enddo - endif - -end From 64bfddbb00e50d46bbae11c2cc895435d298bcd0 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 24 Apr 2023 10:48:21 +0200 Subject: [PATCH 058/337] Renamed scripts/exc_energy_error.py scripts/qp_exc_energy.py --- scripts/{exc_energy_error.py => qp_exc_energy.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scripts/{exc_energy_error.py => qp_exc_energy.py} (100%) diff --git a/scripts/exc_energy_error.py b/scripts/qp_exc_energy.py similarity index 100% rename from scripts/exc_energy_error.py rename to scripts/qp_exc_energy.py From 207e52d2202bca52eebc0ba8f394150a9bd98b48 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Wed, 26 Apr 2023 08:52:06 +0200 Subject: [PATCH 059/337] fixed bug in TC integrals --- src/bi_ort_ints/one_e_bi_ort.irp.f | 17 +- src/bi_ort_ints/semi_num_ints_mo.irp.f | 86 ++++--- src/bi_ort_ints/three_body_ints_bi_ort.irp.f | 21 +- src/bi_ort_ints/total_twoe_pot.irp.f | 3 + src/bi_ortho_mos/mos_rl.irp.f | 4 + src/non_h_ints_mu/grad_squared.irp.f | 8 +- src/non_h_ints_mu/new_grad_tc.irp.f | 139 ++++++----- src/non_h_ints_mu/tc_integ.irp.f | 246 +++++++++---------- src/non_h_ints_mu/total_tc_int.irp.f | 30 ++- src/tc_scf/diis_tcscf.irp.f | 8 + src/tc_scf/fock_3e_bi_ortho_uhf.irp.f | 14 +- src/tc_scf/fock_tc.irp.f | 14 +- src/tc_scf/fock_tc_mo_tot.irp.f | 218 ++++++++-------- src/tc_scf/fock_three_bi_ortho.irp.f | 8 +- src/tc_scf/tc_scf_energy.irp.f | 1 + 15 files changed, 453 insertions(+), 364 deletions(-) diff --git a/src/bi_ort_ints/one_e_bi_ort.irp.f b/src/bi_ort_ints/one_e_bi_ort.irp.f index 7f89899b..b0455570 100644 --- a/src/bi_ort_ints/one_e_bi_ort.irp.f +++ b/src/bi_ort_ints/one_e_bi_ort.irp.f @@ -11,14 +11,17 @@ BEGIN_PROVIDER [double precision, ao_one_e_integrals_tc_tot, (ao_num,ao_num)] provide j1b_type if( (j1b_type .eq. 1) .or. (j1b_type .eq. 2) ) then + + print *, ' do things properly !' + stop - do i = 1, ao_num - do j = 1, ao_num - ao_one_e_integrals_tc_tot(j,i) += ( j1b_gauss_hermI (j,i) & - + j1b_gauss_hermII (j,i) & - + j1b_gauss_nonherm(j,i) ) - enddo - enddo + !do i = 1, ao_num + ! do j = 1, ao_num + ! ao_one_e_integrals_tc_tot(j,i) += ( j1b_gauss_hermI (j,i) & + ! + j1b_gauss_hermII (j,i) & + ! + j1b_gauss_nonherm(j,i) ) + ! enddo + !enddo endif diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f index 4694a998..0d727785 100644 --- a/src/bi_ort_ints/semi_num_ints_mo.irp.f +++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f @@ -110,27 +110,36 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3, print *, ' providing int2_grad1_u12_ao_transp ...' call wall_time(wall0) - if(test_cycle_tc)then - do ipoint = 1, n_points_final_grid - do i = 1, ao_num - do j = 1, ao_num - int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,1) - int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,2) - int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,3) - enddo - enddo - enddo + if(test_cycle_tc) then + + PROVIDE int2_grad1_u12_ao_test + + do ipoint = 1, n_points_final_grid + do i = 1, ao_num + do j = 1, ao_num + int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,1) + int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,2) + int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,3) + enddo + enddo + enddo + else - do ipoint = 1, n_points_final_grid - do i = 1, ao_num - do j = 1, ao_num - int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao(j,i,ipoint,1) - int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao(j,i,ipoint,2) - int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao(j,i,ipoint,3) - enddo - enddo - enddo + + PROVIDE int2_grad1_u12_ao + + do ipoint = 1, n_points_final_grid + do i = 1, ao_num + do j = 1, ao_num + int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao(j,i,ipoint,1) + int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao(j,i,ipoint,2) + int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao(j,i,ipoint,3) + enddo + enddo + enddo + endif + call wall_time(wall1) print *, ' wall time for int2_grad1_u12_ao_transp ', wall1 - wall0 @@ -144,9 +153,12 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, integer :: ipoint double precision :: wall0, wall1 - !print *, ' providing int2_grad1_u12_bimo_transp' + PROVIDE mo_l_coef mo_r_coef + PROVIDE int2_grad1_u12_ao_transp + + !print *, ' providing int2_grad1_u12_bimo_transp' + !call wall_time(wall0) - call wall_time(wall0) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (ipoint) & @@ -163,25 +175,31 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, !$OMP END DO !$OMP END PARALLEL - call wall_time(wall1) + !call wall_time(wall1) !print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0 END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid,3, mo_num, mo_num )] - implicit none - integer :: i, j, ipoint - do ipoint = 1, n_points_final_grid - do i = 1, mo_num - do j = 1, mo_num - int2_grad1_u12_bimo_t(ipoint,1,j,i) = int2_grad1_u12_bimo_transp(j,i,1,ipoint) - int2_grad1_u12_bimo_t(ipoint,2,j,i) = int2_grad1_u12_bimo_transp(j,i,2,ipoint) - int2_grad1_u12_bimo_t(ipoint,3,j,i) = int2_grad1_u12_bimo_transp(j,i,3,ipoint) - enddo - enddo - enddo +BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, 3, mo_num, mo_num)] + + implicit none + integer :: i, j, ipoint + + PROVIDE mo_l_coef mo_r_coef + PROVIDE int2_grad1_u12_bimo_transp + + do ipoint = 1, n_points_final_grid + do i = 1, mo_num + do j = 1, mo_num + int2_grad1_u12_bimo_t(ipoint,1,j,i) = int2_grad1_u12_bimo_transp(j,i,1,ipoint) + int2_grad1_u12_bimo_t(ipoint,2,j,i) = int2_grad1_u12_bimo_transp(j,i,2,ipoint) + int2_grad1_u12_bimo_t(ipoint,3,j,i) = int2_grad1_u12_bimo_transp(j,i,3,ipoint) + enddo + enddo + enddo + END_PROVIDER ! --- diff --git a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f index 48fa84f7..e8b56307 100644 --- a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f +++ b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f @@ -81,21 +81,24 @@ subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral) integer :: ipoint double precision :: weight + PROVIDE mo_l_coef mo_r_coef + PROVIDE int2_grad1_u12_bimo_t + integral = 0.d0 do ipoint = 1, n_points_final_grid weight = final_weight_at_r_vector(ipoint) - integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & - * ( int2_grad1_u12_bimo_t(ipoint,1,n,m) * int2_grad1_u12_bimo_t(ipoint,1,l,j) & - + int2_grad1_u12_bimo_t(ipoint,2,n,m) * int2_grad1_u12_bimo_t(ipoint,2,l,j) & + integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & + * ( int2_grad1_u12_bimo_t(ipoint,1,n,m) * int2_grad1_u12_bimo_t(ipoint,1,l,j) & + + int2_grad1_u12_bimo_t(ipoint,2,n,m) * int2_grad1_u12_bimo_t(ipoint,2,l,j) & + int2_grad1_u12_bimo_t(ipoint,3,n,m) * int2_grad1_u12_bimo_t(ipoint,3,l,j) ) - integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & - * ( int2_grad1_u12_bimo_t(ipoint,1,n,m) * int2_grad1_u12_bimo_t(ipoint,1,k,i) & - + int2_grad1_u12_bimo_t(ipoint,2,n,m) * int2_grad1_u12_bimo_t(ipoint,2,k,i) & + integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & + * ( int2_grad1_u12_bimo_t(ipoint,1,n,m) * int2_grad1_u12_bimo_t(ipoint,1,k,i) & + + int2_grad1_u12_bimo_t(ipoint,2,n,m) * int2_grad1_u12_bimo_t(ipoint,2,k,i) & + int2_grad1_u12_bimo_t(ipoint,3,n,m) * int2_grad1_u12_bimo_t(ipoint,3,k,i) ) - integral += weight * mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,m) & - * ( int2_grad1_u12_bimo_t(ipoint,1,l,j) * int2_grad1_u12_bimo_t(ipoint,1,k,i) & - + int2_grad1_u12_bimo_t(ipoint,2,l,j) * int2_grad1_u12_bimo_t(ipoint,2,k,i) & + integral += weight * mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,m) & + * ( int2_grad1_u12_bimo_t(ipoint,1,l,j) * int2_grad1_u12_bimo_t(ipoint,1,k,i) & + + int2_grad1_u12_bimo_t(ipoint,2,l,j) * int2_grad1_u12_bimo_t(ipoint,2,k,i) & + int2_grad1_u12_bimo_t(ipoint,3,l,j) * int2_grad1_u12_bimo_t(ipoint,3,k,i) ) enddo diff --git a/src/bi_ort_ints/total_twoe_pot.irp.f b/src/bi_ort_ints/total_twoe_pot.irp.f index bdebe890..721ea0c8 100644 --- a/src/bi_ort_ints/total_twoe_pot.irp.f +++ b/src/bi_ort_ints/total_twoe_pot.irp.f @@ -20,6 +20,7 @@ BEGIN_PROVIDER [double precision, ao_two_e_vartc_tot, (ao_num, ao_num, ao_num, a enddo END_PROVIDER + ! --- BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_num) ] @@ -66,6 +67,8 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n else + PROVIDE ao_tc_int_chemist + do j = 1, ao_num do l = 1, ao_num do i = 1, ao_num diff --git a/src/bi_ortho_mos/mos_rl.irp.f b/src/bi_ortho_mos/mos_rl.irp.f index d51999fc..c69309d1 100644 --- a/src/bi_ortho_mos/mos_rl.irp.f +++ b/src/bi_ortho_mos/mos_rl.irp.f @@ -17,6 +17,8 @@ subroutine ao_to_mo_bi_ortho(A_ao, LDA_ao, A_mo, LDA_mo) double precision, intent(out) :: A_mo(LDA_mo,mo_num) double precision, allocatable :: T(:,:) + PROVIDE mo_l_coef mo_r_coef + allocate ( T(ao_num,mo_num) ) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T @@ -54,6 +56,8 @@ subroutine mo_to_ao_bi_ortho(A_mo, LDA_mo, A_ao, LDA_ao) double precision, intent(out) :: A_ao(LDA_ao,ao_num) double precision, allocatable :: tmp_1(:,:), tmp_2(:,:) + PROVIDE mo_l_coef mo_r_coef + ! ao_overlap x mo_r_coef allocate( tmp_1(ao_num,mo_num) ) call dgemm( 'N', 'N', ao_num, mo_num, ao_num, 1.d0 & diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f index 8b801f9d..ea4bd36c 100644 --- a/src/non_h_ints_mu/grad_squared.irp.f +++ b/src/non_h_ints_mu/grad_squared.irp.f @@ -352,7 +352,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao BEGIN_DOC ! - ! tc_grad_square_ao(k,i,l,j) = 1/2 + ! tc_grad_square_ao(k,i,l,j) = -1/2 ! END_DOC @@ -373,6 +373,8 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao else + PROVIDE int2_grad1_u12_square_ao + allocate(b_mat(n_points_final_grid,ao_num,ao_num)) b_mat = 0.d0 @@ -392,8 +394,8 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao !$OMP END PARALLEL tc_grad_square_ao = 0.d0 - call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & - , int2_grad1_u12_squared_ao(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & + , int2_grad1_u12_square_ao(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & , 0.d0, tc_grad_square_ao, ao_num*ao_num) deallocate(b_mat) diff --git a/src/non_h_ints_mu/new_grad_tc.irp.f b/src/non_h_ints_mu/new_grad_tc.irp.f index 064eb9f1..24e7e743 100644 --- a/src/non_h_ints_mu/new_grad_tc.irp.f +++ b/src/non_h_ints_mu/new_grad_tc.irp.f @@ -1,68 +1,68 @@ ! --- -BEGIN_PROVIDER [ double precision, int1_grad2_u12_ao, (3, ao_num, ao_num, n_points_final_grid)] - - BEGIN_DOC - ! - ! int1_grad2_u12_ao(:,i,j,ipoint) = \int dr1 [-1 * \grad_r2 J(r1,r2)] \phi_i(r1) \phi_j(r1) - ! - ! where r1 = r(ipoint) - ! - ! if J(r1,r2) = u12: - ! - ! int1_grad2_u12_ao(:,i,j,ipoint) = +0.5 x \int dr1 [-(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r1) \phi_j(r1) - ! = -0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ] - ! = -int2_grad1_u12_ao(i,j,ipoint,:) - ! - ! if J(r1,r2) = u12 x v1 x v2 - ! - ! int1_grad2_u12_ao(:,i,j,ipoint) = v2 x [ 0.5 x \int dr1 [-(r1 - r2) (erf(mu * r12)-1)r_12] v1 \phi_i(r1) \phi_j(r1) ] - ! - \grad_2 v2 x [ \int dr1 u12 v1 \phi_i(r1) \phi_j(r1) ] - ! = -0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) - ! + 0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) - ! - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint) - ! - ! - END_DOC - - implicit none - integer :: ipoint, i, j - double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2 - - PROVIDE j1b_type - - if(j1b_type .eq. 3) then - - do ipoint = 1, n_points_final_grid - x = final_grid_points(1,ipoint) - y = final_grid_points(2,ipoint) - z = final_grid_points(3,ipoint) - - tmp0 = 0.5d0 * v_1b(ipoint) - tmp_x = v_1b_grad(1,ipoint) - tmp_y = v_1b_grad(2,ipoint) - tmp_z = v_1b_grad(3,ipoint) - - do j = 1, ao_num - do i = 1, ao_num - - tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) - tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint) - - int1_grad2_u12_ao(1,i,j,ipoint) = -tmp1 * x + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x - int1_grad2_u12_ao(2,i,j,ipoint) = -tmp1 * y + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y - int1_grad2_u12_ao(3,i,j,ipoint) = -tmp1 * z + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z - enddo - enddo - enddo - - else - - int1_grad2_u12_ao = -1.d0 * int2_grad1_u12_ao - - endif - -END_PROVIDER +!BEGIN_PROVIDER [ double precision, int1_grad2_u12_ao, (3, ao_num, ao_num, n_points_final_grid)] +! +! BEGIN_DOC +! ! +! ! int1_grad2_u12_ao(:,i,j,ipoint) = \int dr1 [-1 * \grad_r2 J(r1,r2)] \phi_i(r1) \phi_j(r1) +! ! +! ! where r1 = r(ipoint) +! ! +! ! if J(r1,r2) = u12: +! ! +! ! int1_grad2_u12_ao(:,i,j,ipoint) = +0.5 x \int dr1 [-(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r1) \phi_j(r1) +! ! = -0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ] +! ! = -int2_grad1_u12_ao(i,j,ipoint,:) +! ! +! ! if J(r1,r2) = u12 x v1 x v2 +! ! +! ! int1_grad2_u12_ao(:,i,j,ipoint) = v2 x [ 0.5 x \int dr1 [-(r1 - r2) (erf(mu * r12)-1)r_12] v1 \phi_i(r1) \phi_j(r1) ] +! ! - \grad_2 v2 x [ \int dr1 u12 v1 \phi_i(r1) \phi_j(r1) ] +! ! = -0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) +! ! + 0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) +! ! - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint) +! ! +! ! +! END_DOC +! +! implicit none +! integer :: ipoint, i, j +! double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2 +! +! PROVIDE j1b_type +! +! if(j1b_type .eq. 3) then +! +! do ipoint = 1, n_points_final_grid +! x = final_grid_points(1,ipoint) +! y = final_grid_points(2,ipoint) +! z = final_grid_points(3,ipoint) +! +! tmp0 = 0.5d0 * v_1b(ipoint) +! tmp_x = v_1b_grad(1,ipoint) +! tmp_y = v_1b_grad(2,ipoint) +! tmp_z = v_1b_grad(3,ipoint) +! +! do j = 1, ao_num +! do i = 1, ao_num +! +! tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) +! tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint) +! +! int1_grad2_u12_ao(1,i,j,ipoint) = -tmp1 * x + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x +! int1_grad2_u12_ao(2,i,j,ipoint) = -tmp1 * y + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y +! int1_grad2_u12_ao(3,i,j,ipoint) = -tmp1 * z + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z +! enddo +! enddo +! enddo +! +! else +! +! int1_grad2_u12_ao = -1.d0 * int2_grad1_u12_ao +! +! endif +! +!END_PROVIDER ! --- @@ -192,7 +192,10 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, ! ! tc_grad_and_lapl_ao(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) . \grad_1 | ij > ! - ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) + ! = -1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) + ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 (-1) \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) + ! + ! -1 in \int dr2 ! ! This is obtained by integration by parts. ! @@ -207,8 +210,6 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, print*, ' providing tc_grad_and_lapl_ao ...' call wall_time(time0) - PROVIDE int2_grad1_u12_ao - if(read_tc_integ) then open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_and_lapl_ao', action="read") @@ -217,6 +218,8 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, else + PROVIDE int2_grad1_u12_ao + allocate(b_mat(n_points_final_grid,ao_num,ao_num,3)) b_mat = 0.d0 @@ -247,10 +250,10 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, do m = 1, 3 call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid & - , 0.d0, tc_grad_and_lapl_ao, ao_num*ao_num) + , 1.d0, tc_grad_and_lapl_ao, ao_num*ao_num) enddo deallocate(b_mat) - + call sum_A_At(tc_grad_and_lapl_ao(1,1,1,1), ao_num*ao_num) ! !$OMP PARALLEL & ! !$OMP DEFAULT (NONE) & diff --git a/src/non_h_ints_mu/tc_integ.irp.f b/src/non_h_ints_mu/tc_integ.irp.f index 865522e0..41209a36 100644 --- a/src/non_h_ints_mu/tc_integ.irp.f +++ b/src/non_h_ints_mu/tc_integ.irp.f @@ -1,8 +1,7 @@ ! --- - BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_final_grid, 3)] -&BEGIN_PROVIDER [double precision, int2_grad1_u12_squared_ao, (ao_num, ao_num, n_points_final_grid)] +BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_final_grid, 3)] BEGIN_DOC ! @@ -23,10 +22,6 @@ ! - 0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) ! - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint) ! - ! - ! - ! int2_grad1_u12_squared_ao = int dr2 chi_l(r2) chi_j(r2) [grad_1 u(r1,r2)]^2 - ! END_DOC implicit none @@ -34,25 +29,22 @@ double precision :: time0, time1 double precision :: x, y, z, w, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2 - print*, ' providing int2_grad1_u12_ao & int2_grad1_u12_squared_ao ...' + print*, ' providing int2_grad1_u12_ao ...' + call wall_time(time0) PROVIDE j1b_type if(read_tc_integ) then - call wall_time(time0) open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="read") read(11) int2_grad1_u12_ao - call wall_time(time1) - print*, ' wall time for int2_grad1_u12_ao =', time1-time0 endif if(j1b_type .eq. 3) then - ! --- - if(.not.read_tc_integ) then - call wall_time(time0) - PROVIDE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b x_v_ij_erf_rk_cst_mu_j1b + + PROVIDE v_1b_grad v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b x_v_ij_erf_rk_cst_mu_j1b + int2_grad1_u12_ao = 0.d0 ! TODO OPENMP do ipoint = 1, n_points_final_grid @@ -73,42 +65,14 @@ enddo enddo enddo - call wall_time(time1) - print*, ' wall time for int2_grad1_u12_ao =', time1-time0 + endif - ! --- - - call wall_time(time0) - PROVIDE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12 - int2_grad1_u12_squared_ao = 0.d0 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, j, ipoint) & - !$OMP SHARED (int2_grad1_u12_squared_ao, ao_num, n_points_final_grid, u12sq_j1bsq, u12_grad1_u12_j1b_grad1_j1b, grad12_j12) - !$OMP DO SCHEDULE (static) - do ipoint = 1, n_points_final_grid - do j = 1, ao_num - do i = 1, ao_num - int2_grad1_u12_squared_ao(i,j,ipoint) = u12sq_j1bsq(i,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) + 0.5d0 * grad12_j12(i,j,ipoint) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - call wall_time(time1) - print*, ' wall time for int2_grad1_u12_squared_ao =', time1-time0 - - ! --- - elseif(j1b_type .ge. 100) then - ! --- - - call wall_time(time0) PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra - PROVIDE grad1_u12_squared_num PROVIDE grad1_u12_num + double precision, allocatable :: tmp(:,:,:) allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) tmp = 0.d0 @@ -130,34 +94,14 @@ if(.not.read_tc_integ) then int2_grad1_u12_ao = 0.d0 do m = 1, 3 - call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, 1.d0 & + !call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, +1.d0 & + ! this work also because of the symmetry in K(1,2) and sign compensation in L(1,2,3) + call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, -1.d0 & , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_num(1,1,m), n_points_extra_final_grid & , 0.d0, int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num) enddo - !! DEBUG - !PROVIDE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b x_v_ij_erf_rk_cst_mu_j1b - !int2_grad1_u12_ao = 0.d0 - !do ipoint = 1, n_points_final_grid - ! x = final_grid_points(1,ipoint) - ! y = final_grid_points(2,ipoint) - ! z = final_grid_points(3,ipoint) - ! tmp0 = 0.5d0 * v_1b(ipoint) - ! tmp_x = v_1b_grad(1,ipoint) - ! tmp_y = v_1b_grad(2,ipoint) - ! tmp_z = v_1b_grad(3,ipoint) - ! do j = 1, ao_num - ! do i = 1, ao_num - ! tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) - ! tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint) - ! int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x - ! int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y - ! int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z - ! enddo - ! enddo - !enddo - - ! these dgemm are equivalen to + !! these dgemm are equivalen to !!$OMP PARALLEL & !!$OMP DEFAULT (NONE) & !!$OMP PRIVATE (j, i, ipoint, jpoint, w) & @@ -169,7 +113,9 @@ ! do j = 1, ao_num ! do i = 1, ao_num ! do jpoint = 1, n_points_extra_final_grid - ! w = tmp(jpoint,i,j) + ! w = -tmp(jpoint,i,j) + ! !w = tmp(jpoint,i,j) this work also because of the symmetry in K(1,2) + ! ! and sign compensation in L(1,2,3) ! int2_grad1_u12_ao(i,j,ipoint,1) += w * grad1_u12_num(jpoint,ipoint,1) ! int2_grad1_u12_ao(i,j,ipoint,2) += w * grad1_u12_num(jpoint,ipoint,2) ! int2_grad1_u12_ao(i,j,ipoint,3) += w * grad1_u12_num(jpoint,ipoint,3) @@ -179,67 +125,15 @@ !enddo !!$OMP END DO !!$OMP END PARALLEL - call wall_time(time1) - print*, ' wall time for int2_grad1_u12_ao =', time1-time0 endif - ! --- - - call wall_time(time0) - int2_grad1_u12_squared_ao = 0.d0 - call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, -0.5d0 & - , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_squared_num(1,1), n_points_extra_final_grid & - , 0.d0, int2_grad1_u12_squared_ao(1,1,1), ao_num*ao_num) - - !! DEBUG - !PROVIDE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12 - !int2_grad1_u12_squared_ao = 0.d0 - !!$OMP PARALLEL & - !!$OMP DEFAULT (NONE) & - !!$OMP PRIVATE (i, j, ipoint) & - !!$OMP SHARED (int2_grad1_u12_squared_ao, ao_num, n_points_final_grid, u12sq_j1bsq, u12_grad1_u12_j1b_grad1_j1b, grad12_j12) - !!$OMP DO SCHEDULE (static) - !do ipoint = 1, n_points_final_grid - ! do j = 1, ao_num - ! do i = 1, ao_num - ! int2_grad1_u12_squared_ao(i,j,ipoint) = u12sq_j1bsq(i,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) + 0.5d0 * grad12_j12(i,j,ipoint) - ! enddo - ! enddo - !enddo - !!$OMP END DO - !!$OMP END PARALLEL - !call wall_time(time1) - - !! this dgemm is equivalen to - !!$OMP PARALLEL & - !!$OMP DEFAULT (NONE) & - !!$OMP PRIVATE (i, j, ipoint, jpoint, w) & - !!$OMP SHARED (int2_grad1_u12_squared_ao, ao_num, n_points_final_grid, & - !!$OMP n_points_extra_final_grid, final_weight_at_r_vector_extra, & - !!$OMP aos_in_r_array_extra_transp, grad1_u12_squared_num, tmp) - !!$OMP DO SCHEDULE (static) - !do ipoint = 1, n_points_final_grid - ! do j = 1, ao_num - ! do i = 1, ao_num - ! do jpoint = 1, n_points_extra_final_grid - ! w = 0.5d0 * tmp(jpoint,i,j) - ! int2_grad1_u12_squared_ao(i,j,ipoint) += w * grad1_u12_squared_num(jpoint,ipoint) - ! enddo - ! enddo - ! enddo - !enddo - !!$OMP END DO - !!$OMP END PARALLEL - call wall_time(time1) - print*, ' wall time for int2_grad1_u12_squared_ao =', time1-time0 - - ! --- - deallocate(tmp) else + print *, ' j1b_type = ', j1b_type, 'not implemented yet' stop + endif if(write_tc_integ.and.mpi_master) then @@ -250,6 +144,112 @@ call ezfio_set_tc_keywords_io_tc_integ('Read') endif + call wall_time(time1) + print*, ' wall time for int2_grad1_u12_ao =', time1-time0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_points_final_grid)] + + BEGIN_DOC + ! + ! int2_grad1_u12_square_ao = -(1/2) x int dr2 chi_l(r2) chi_j(r2) [grad_1 u(r1,r2)]^2 + ! + END_DOC + + implicit none + integer :: ipoint, i, j, m, jpoint + double precision :: time0, time1 + double precision :: x, y, z, w, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2 + + print*, ' providing int2_grad1_u12_square_ao ...' + call wall_time(time0) + + PROVIDE j1b_type + + if(j1b_type .eq. 3) then + + PROVIDE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12 + + int2_grad1_u12_square_ao = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, ipoint) & + !$OMP SHARED (int2_grad1_u12_square_ao, ao_num, n_points_final_grid, u12sq_j1bsq, u12_grad1_u12_j1b_grad1_j1b, grad12_j12) + !$OMP DO SCHEDULE (static) + do ipoint = 1, n_points_final_grid + do j = 1, ao_num + do i = 1, ao_num + int2_grad1_u12_square_ao(i,j,ipoint) = u12sq_j1bsq(i,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) + 0.5d0 * grad12_j12(i,j,ipoint) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + elseif(j1b_type .ge. 100) then + + PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra + PROVIDE grad1_u12_squared_num + + double precision, allocatable :: tmp(:,:,:) + allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) + tmp = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, i, jpoint) & + !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) + !$OMP DO SCHEDULE (static) + do j = 1, ao_num + do i = 1, ao_num + do jpoint = 1, n_points_extra_final_grid + tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + int2_grad1_u12_square_ao = 0.d0 + call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, -0.5d0 & + , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_squared_num(1,1), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_square_ao(1,1,1), ao_num*ao_num) + + !! this dgemm is equivalen to + !!$OMP PARALLEL & + !!$OMP DEFAULT (NONE) & + !!$OMP PRIVATE (i, j, ipoint, jpoint, w) & + !!$OMP SHARED (int2_grad1_u12_square_ao, ao_num, n_points_final_grid, & + !!$OMP n_points_extra_final_grid, final_weight_at_r_vector_extra, & + !!$OMP aos_in_r_array_extra_transp, grad1_u12_squared_num, tmp) + !!$OMP DO SCHEDULE (static) + !do ipoint = 1, n_points_final_grid + ! do j = 1, ao_num + ! do i = 1, ao_num + ! do jpoint = 1, n_points_extra_final_grid + ! w = -0.5d0 * tmp(jpoint,i,j) + ! int2_grad1_u12_square_ao(i,j,ipoint) += w * grad1_u12_squared_num(jpoint,ipoint) + ! enddo + ! enddo + ! enddo + !enddo + !!$OMP END DO + !!$OMP END PARALLEL + + deallocate(tmp) + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + + call wall_time(time1) + print*, ' wall time for int2_grad1_u12_square_ao =', time1-time0 + END_PROVIDER ! --- diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f index 4f8dc74d..a60c99da 100644 --- a/src/non_h_ints_mu/total_tc_int.irp.f +++ b/src/non_h_ints_mu/total_tc_int.irp.f @@ -48,9 +48,14 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao print *, ' providing ao_tc_int_chemist ...' call wall_time(wall0) - if(test_cycle_tc)then + if(test_cycle_tc) then + ao_tc_int_chemist = ao_tc_int_chemist_test + else + + PROVIDE tc_grad_square_ao tc_grad_and_lapl_ao ao_two_e_coul + do j = 1, ao_num do l = 1, ao_num do i = 1, ao_num @@ -68,27 +73,34 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao END_PROVIDER -BEGIN_PROVIDER [double precision, ao_tc_int_chemist_no_cycle, (ao_num, ao_num, ao_num, ao_num)] ! --- + +BEGIN_PROVIDER [double precision, ao_tc_int_chemist_no_cycle, (ao_num, ao_num, ao_num, ao_num)] + implicit none integer :: i, j, k, l double precision :: wall1, wall0 + print *, ' providing ao_tc_int_chemist_no_cycle ...' call wall_time(wall0) - do j = 1, ao_num - do l = 1, ao_num - do i = 1, ao_num - do k = 1, ao_num - ao_tc_int_chemist_no_cycle(k,i,l,j) = tc_grad_square_ao(k,i,l,j) + tc_grad_and_lapl_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j) -! ao_tc_int_chemist(k,i,l,j) = ao_two_e_coul(k,i,l,j) - enddo + + do j = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do k = 1, ao_num + ao_tc_int_chemist_no_cycle(k,i,l,j) = tc_grad_square_ao(k,i,l,j) + tc_grad_and_lapl_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j) + !ao_tc_int_chemist(k,i,l,j) = ao_two_e_coul(k,i,l,j) enddo enddo enddo + enddo + call wall_time(wall1) print *, ' wall time for ao_tc_int_chemist_no_cycle ', wall1 - wall0 END_PROVIDER +! --- + BEGIN_PROVIDER [double precision, ao_tc_int_chemist_test, (ao_num, ao_num, ao_num, ao_num)] implicit none diff --git a/src/tc_scf/diis_tcscf.irp.f b/src/tc_scf/diis_tcscf.irp.f index 0b08f784..4ec70de5 100644 --- a/src/tc_scf/diis_tcscf.irp.f +++ b/src/tc_scf/diis_tcscf.irp.f @@ -92,17 +92,22 @@ BEGIN_PROVIDER [double precision, FQS_SQF_ao, (ao_num, ao_num)] allocate(F(ao_num,ao_num)) if(var_tc) then + do i = 1, ao_num do j = 1, ao_num F(j,i) = Fock_matrix_vartc_ao_tot(j,i) enddo enddo + else + + PROVIDE Fock_matrix_tc_ao_tot do i = 1, ao_num do j = 1, ao_num F(j,i) = Fock_matrix_tc_ao_tot(j,i) enddo enddo + endif allocate(tmp(ao_num,ao_num)) @@ -139,6 +144,9 @@ BEGIN_PROVIDER [double precision, FQS_SQF_mo, (mo_num, mo_num)] implicit none + PROVIDE mo_r_coef mo_l_coef + PROVIDE FQS_SQF_ao + call ao_to_mo_bi_ortho( FQS_SQF_ao, size(FQS_SQF_ao, 1) & , FQS_SQF_mo, size(FQS_SQF_mo, 1) ) diff --git a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f index d8b962d7..a67a3705 100644 --- a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f +++ b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f @@ -50,19 +50,23 @@ END_PROVIDER BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] BEGIN_DOC -! ALPHA part of the Fock matrix from three-electron terms -! -! WARNING :: non hermitian if bi-ortho MOS used + ! + ! ALPHA part of the Fock matrix from three-electron terms + ! + ! WARNING :: non hermitian if bi-ortho MOS used + ! END_DOC + implicit none integer :: a, b, i, j, o double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia double precision :: ti, tf PROVIDE mo_l_coef mo_r_coef + PROVIDE fock_3e_uhf_mo_cs !print *, ' PROVIDING fock_3e_uhf_mo_a ...' - call wall_time(ti) + !call wall_time(ti) o = elec_beta_num + 1 @@ -142,7 +146,7 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] enddo enddo - call wall_time(tf) + !call wall_time(tf) !print *, ' total Wall time for fock_3e_uhf_mo_a =', tf - ti END_PROVIDER diff --git a/src/tc_scf/fock_tc.irp.f b/src/tc_scf/fock_tc.irp.f index 1d651c4e..52eeb694 100644 --- a/src/tc_scf/fock_tc.irp.f +++ b/src/tc_scf/fock_tc.irp.f @@ -18,6 +18,8 @@ double precision :: density, density_a, density_b double precision :: t0, t1 + PROVIDE ao_two_e_tc_tot + !print*, ' providing two_e_tc_non_hermit_integral_seq ...' !call wall_time(t0) @@ -80,6 +82,10 @@ END_PROVIDER double precision :: t0, t1 double precision, allocatable :: tmp_a(:,:), tmp_b(:,:) + PROVIDE ao_two_e_tc_tot + PROVIDE mo_l_coef mo_r_coef + PROVIDE TCSCF_density_matrix_ao_alpha TCSCF_density_matrix_ao_beta + !print*, ' providing two_e_tc_non_hermit_integral ...' !call wall_time(t0) @@ -142,7 +148,7 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_alpha, (ao_num, ao_num)] implicit none - Fock_matrix_tc_ao_alpha = ao_one_e_integrals_tc_tot + two_e_tc_non_hermit_integral_alpha + Fock_matrix_tc_ao_alpha = ao_one_e_integrals_tc_tot + two_e_tc_non_hermit_integral_alpha END_PROVIDER @@ -181,14 +187,17 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num) ] !call ao_to_mo_bi_ortho(tmp, size(tmp, 1), Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1)) !deallocate(tmp) + PROVIDE mo_l_coef mo_r_coef call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) & , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) ) if(three_body_h_tc) then !Fock_matrix_tc_mo_alpha += fock_a_tot_3e_bi_orth + PROVIDE fock_3e_uhf_mo_a Fock_matrix_tc_mo_alpha += fock_3e_uhf_mo_a endif else + call ao_to_mo( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) & , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) ) @@ -276,6 +285,9 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_tot, (ao_num, ao_num) ] implicit none + PROVIDE mo_l_coef mo_r_coef + PROVIDE Fock_matrix_tc_mo_tot + call mo_to_ao_bi_ortho( Fock_matrix_tc_mo_tot, size(Fock_matrix_tc_mo_tot, 1) & , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) ) diff --git a/src/tc_scf/fock_tc_mo_tot.irp.f b/src/tc_scf/fock_tc_mo_tot.irp.f index a03a0624..78f4c9b0 100644 --- a/src/tc_scf/fock_tc_mo_tot.irp.f +++ b/src/tc_scf/fock_tc_mo_tot.irp.f @@ -1,107 +1,120 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_tot, (mo_num,mo_num) ] &BEGIN_PROVIDER [ double precision, Fock_matrix_tc_diag_mo_tot, (mo_num)] - implicit none - BEGIN_DOC - ! TC-Fock matrix on the MO basis. WARNING !!! NON HERMITIAN !!! - ! For open shells, the ROHF Fock Matrix is :: - ! - ! | F-K | F + K/2 | F | - ! |---------------------------------| - ! | F + K/2 | F | F - K/2 | - ! |---------------------------------| - ! | F | F - K/2 | F + K | - ! - ! - ! F = 1/2 (Fa + Fb) - ! - ! K = Fb - Fa - ! - END_DOC - integer :: i,j,n - if (elec_alpha_num == elec_beta_num) then - Fock_matrix_tc_mo_tot = Fock_matrix_tc_mo_alpha - else - do j=1,elec_beta_num - ! F-K - do i=1,elec_beta_num !CC - Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))& + BEGIN_DOC + ! TC-Fock matrix on the MO basis. WARNING !!! NON HERMITIAN !!! + ! For open shells, the ROHF Fock Matrix is :: + ! + ! | F-K | F + K/2 | F | + ! |---------------------------------| + ! | F + K/2 | F | F - K/2 | + ! |---------------------------------| + ! | F | F - K/2 | F + K | + ! + ! + ! F = 1/2 (Fa + Fb) + ! + ! K = Fb - Fa + ! + END_DOC + + implicit none + integer :: i, j, n + + if(elec_alpha_num == elec_beta_num) then + + PROVIDE Fock_matrix_tc_mo_alpha + + Fock_matrix_tc_mo_tot = Fock_matrix_tc_mo_alpha + + else + + PROVIDE Fock_matrix_tc_mo_beta Fock_matrix_tc_mo_alpha + + do j = 1, elec_beta_num + ! F-K + do i = 1, elec_beta_num !CC + Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))& - (Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j)) - enddo - ! F+K/2 - do i=elec_beta_num+1,elec_alpha_num !CA - Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))& + enddo + ! F+K/2 + do i = elec_beta_num+1, elec_alpha_num !CA + Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))& + 0.5d0*(Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j)) - enddo - ! F - do i=elec_alpha_num+1, mo_num !CV - Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j)) - enddo - enddo + enddo + ! F + do i = elec_alpha_num+1, mo_num !CV + Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j)) + enddo + enddo - do j=elec_beta_num+1,elec_alpha_num - ! F+K/2 - do i=1,elec_beta_num !AC - Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))& + do j = elec_beta_num+1, elec_alpha_num + ! F+K/2 + do i = 1, elec_beta_num !AC + Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))& + 0.5d0*(Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j)) - enddo - ! F - do i=elec_beta_num+1,elec_alpha_num !AA - Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j)) - enddo - ! F-K/2 - do i=elec_alpha_num+1, mo_num !AV - Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))& + enddo + ! F + do i = elec_beta_num+1, elec_alpha_num !AA + Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j)) + enddo + ! F-K/2 + do i = elec_alpha_num+1, mo_num !AV + Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))& - 0.5d0*(Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j)) - enddo - enddo + enddo + enddo - do j=elec_alpha_num+1, mo_num - ! F - do i=1,elec_beta_num !VC - Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j)) - enddo - ! F-K/2 - do i=elec_beta_num+1,elec_alpha_num !VA - Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))& + do j = elec_alpha_num+1, mo_num + ! F + do i = 1, elec_beta_num !VC + Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j)) + enddo + ! F-K/2 + do i = elec_beta_num+1, elec_alpha_num !VA + Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))& - 0.5d0*(Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j)) - enddo - ! F+K - do i=elec_alpha_num+1,mo_num !VV - Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j)) & + enddo + ! F+K + do i = elec_alpha_num+1, mo_num !VV + Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j)) & + (Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j)) - enddo - enddo - if(three_body_h_tc)then + enddo + enddo + + if(three_body_h_tc) then + + PROVIDE fock_a_tot_3e_bi_orth fock_b_tot_3e_bi_orth + ! C-O do j = 1, elec_beta_num - do i = elec_beta_num+1, elec_alpha_num - Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j)) - Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i)) - enddo + do i = elec_beta_num+1, elec_alpha_num + Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j)) + Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i)) + enddo enddo ! C-V do j = 1, elec_beta_num - do i = elec_alpha_num+1, mo_num - Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j)) - Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i)) - enddo + do i = elec_alpha_num+1, mo_num + Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j)) + Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i)) + enddo enddo ! O-V do j = elec_beta_num+1, elec_alpha_num - do i = elec_alpha_num+1, mo_num - Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j)) - Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i)) - enddo + do i = elec_alpha_num+1, mo_num + Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j)) + Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i)) + enddo enddo - endif + endif - endif + endif - do i = 1, mo_num - Fock_matrix_tc_diag_mo_tot(i) = Fock_matrix_tc_mo_tot(i,i) - enddo + do i = 1, mo_num + Fock_matrix_tc_diag_mo_tot(i) = Fock_matrix_tc_mo_tot(i,i) + enddo if(frozen_orb_scf)then @@ -116,28 +129,29 @@ enddo endif - if(no_oa_or_av_opt)then - do i = 1, n_act_orb - iorb = list_act(i) - do j = 1, n_inact_orb - jorb = list_inact(j) - Fock_matrix_tc_mo_tot(iorb,jorb) = 0.d0 - Fock_matrix_tc_mo_tot(jorb,iorb) = 0.d0 - enddo - do j = 1, n_virt_orb - jorb = list_virt(j) - Fock_matrix_tc_mo_tot(iorb,jorb) = 0.d0 - Fock_matrix_tc_mo_tot(jorb,iorb) = 0.d0 - enddo - do j = 1, n_core_orb - jorb = list_core(j) - Fock_matrix_tc_mo_tot(iorb,jorb) = 0.d0 - Fock_matrix_tc_mo_tot(jorb,iorb) = 0.d0 - enddo - enddo - endif + if(no_oa_or_av_opt)then + do i = 1, n_act_orb + iorb = list_act(i) + do j = 1, n_inact_orb + jorb = list_inact(j) + Fock_matrix_tc_mo_tot(iorb,jorb) = 0.d0 + Fock_matrix_tc_mo_tot(jorb,iorb) = 0.d0 + enddo + do j = 1, n_virt_orb + jorb = list_virt(j) + Fock_matrix_tc_mo_tot(iorb,jorb) = 0.d0 + Fock_matrix_tc_mo_tot(jorb,iorb) = 0.d0 + enddo + do j = 1, n_core_orb + jorb = list_core(j) + Fock_matrix_tc_mo_tot(iorb,jorb) = 0.d0 + Fock_matrix_tc_mo_tot(jorb,iorb) = 0.d0 + enddo + enddo + endif + if(.not.bi_ortho .and. three_body_h_tc)then - Fock_matrix_tc_mo_tot += fock_3_mat + Fock_matrix_tc_mo_tot += fock_3_mat endif END_PROVIDER diff --git a/src/tc_scf/fock_three_bi_ortho.irp.f b/src/tc_scf/fock_three_bi_ortho.irp.f index 7c776ce5..cca4b5aa 100644 --- a/src/tc_scf/fock_three_bi_ortho.irp.f +++ b/src/tc_scf/fock_three_bi_ortho.irp.f @@ -4,14 +4,16 @@ BEGIN_PROVIDER [double precision, fock_a_tot_3e_bi_orth, (mo_num, mo_num)] BEGIN_DOC -! Alpha part of the Fock matrix from three-electron terms -! -! WARNING :: non hermitian if bi-ortho MOS used + ! Alpha part of the Fock matrix from three-electron terms + ! + ! WARNING :: non hermitian if bi-ortho MOS used END_DOC + implicit none integer :: i, a PROVIDE mo_l_coef mo_r_coef + PROVIDE fock_cs_3e_bi_orth fock_a_tmp1_bi_ortho fock_a_tmp2_bi_ortho fock_a_tot_3e_bi_orth = 0.d0 diff --git a/src/tc_scf/tc_scf_energy.irp.f b/src/tc_scf/tc_scf_energy.irp.f index c3de0322..1fb09828 100644 --- a/src/tc_scf/tc_scf_energy.irp.f +++ b/src/tc_scf/tc_scf_energy.irp.f @@ -11,6 +11,7 @@ integer :: i, j PROVIDE mo_l_coef mo_r_coef + PROVIDE two_e_tc_non_hermit_integral_alpha two_e_tc_non_hermit_integral_beta TC_HF_energy = nuclear_repulsion TC_HF_one_e_energy = 0.d0 From b2e65d010be82ec5cab814013e50565e7fa7df47 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Thu, 27 Apr 2023 16:52:31 +0200 Subject: [PATCH 060/337] added OPENMP for 3e terms --- src/non_h_ints_mu/tc_integ.irp.f | 23 ++- src/tc_scf/diis_tcscf.irp.f | 14 ++ src/tc_scf/fock_3e_bi_ortho_uhf.irp.f | 8 +- src/tc_scf/fock_tc.irp.f | 66 ++++++-- src/tc_scf/fock_tc_mo_tot.irp.f | 9 +- src/tc_scf/fock_three_bi_ortho.irp.f | 233 ++++++++++++++++++++++---- src/tc_scf/tc_scf_energy.irp.f | 9 +- 7 files changed, 300 insertions(+), 62 deletions(-) diff --git a/src/non_h_ints_mu/tc_integ.irp.f b/src/non_h_ints_mu/tc_integ.irp.f index 41209a36..f725d134 100644 --- a/src/non_h_ints_mu/tc_integ.irp.f +++ b/src/non_h_ints_mu/tc_integ.irp.f @@ -46,15 +46,20 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f PROVIDE v_1b_grad v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b x_v_ij_erf_rk_cst_mu_j1b int2_grad1_u12_ao = 0.d0 - ! TODO OPENMP + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, x, y, z, tmp0, tmp1, tmp2, tmp_x, tmp_y, tmp_z) & + !$OMP SHARED ( ao_num, n_points_final_grid, final_grid_points, v_1b, v_1b_grad & + !$OMP , v_ij_erf_rk_cst_mu_j1b, v_ij_u_cst_mu_j1b, x_v_ij_erf_rk_cst_mu_j1b, int2_grad1_u12_ao) + !$OMP DO SCHEDULE (static) do ipoint = 1, n_points_final_grid - x = final_grid_points(1,ipoint) - y = final_grid_points(2,ipoint) - z = final_grid_points(3,ipoint) - tmp0 = 0.5d0 * v_1b(ipoint) - tmp_x = v_1b_grad(1,ipoint) - tmp_y = v_1b_grad(2,ipoint) - tmp_z = v_1b_grad(3,ipoint) + x = final_grid_points(1,ipoint) + y = final_grid_points(2,ipoint) + z = final_grid_points(3,ipoint) + tmp0 = 0.5d0 * v_1b(ipoint) + tmp_x = v_1b_grad(1,ipoint) + tmp_y = v_1b_grad(2,ipoint) + tmp_z = v_1b_grad(3,ipoint) do j = 1, ao_num do i = 1, ao_num tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) @@ -65,6 +70,8 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f enddo enddo enddo + !$OMP END DO + !$OMP END PARALLEL endif diff --git a/src/tc_scf/diis_tcscf.irp.f b/src/tc_scf/diis_tcscf.irp.f index 4ec70de5..5d7d6b2e 100644 --- a/src/tc_scf/diis_tcscf.irp.f +++ b/src/tc_scf/diis_tcscf.irp.f @@ -87,9 +87,13 @@ BEGIN_PROVIDER [double precision, FQS_SQF_ao, (ao_num, ao_num)] implicit none integer :: i, j + double precision :: t0, t1 double precision, allocatable :: tmp(:,:) double precision, allocatable :: F(:,:) + !print *, ' Providing FQS_SQF_ao ...' + !call wall_time(t0) + allocate(F(ao_num,ao_num)) if(var_tc) then @@ -136,6 +140,9 @@ BEGIN_PROVIDER [double precision, FQS_SQF_ao, (ao_num, ao_num)] deallocate(tmp) deallocate(F) + !call wall_time(t1) + !print *, ' Wall time for FQS_SQF_ao =', t1-t0 + END_PROVIDER ! --- @@ -143,6 +150,10 @@ END_PROVIDER BEGIN_PROVIDER [double precision, FQS_SQF_mo, (mo_num, mo_num)] implicit none + double precision :: t0, t1 + + !print*, ' Providing FQS_SQF_mo ...' + !call wall_time(t0) PROVIDE mo_r_coef mo_l_coef PROVIDE FQS_SQF_ao @@ -150,6 +161,9 @@ BEGIN_PROVIDER [double precision, FQS_SQF_mo, (mo_num, mo_num)] call ao_to_mo_bi_ortho( FQS_SQF_ao, size(FQS_SQF_ao, 1) & , FQS_SQF_mo, size(FQS_SQF_mo, 1) ) + !call wall_time(t1) + !print*, ' Wall time for FQS_SQF_mo =', t1-t0 + END_PROVIDER ! --- diff --git a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f index a67a3705..14d3e5f6 100644 --- a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f +++ b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f @@ -65,8 +65,8 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] PROVIDE mo_l_coef mo_r_coef PROVIDE fock_3e_uhf_mo_cs - !print *, ' PROVIDING fock_3e_uhf_mo_a ...' - !call wall_time(ti) + print *, ' Providing fock_3e_uhf_mo_a ...' + call wall_time(ti) o = elec_beta_num + 1 @@ -146,8 +146,8 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] enddo enddo - !call wall_time(tf) - !print *, ' total Wall time for fock_3e_uhf_mo_a =', tf - ti + call wall_time(tf) + print *, ' Wall time for fock_3e_uhf_mo_a =', tf - ti END_PROVIDER diff --git a/src/tc_scf/fock_tc.irp.f b/src/tc_scf/fock_tc.irp.f index 52eeb694..207154ea 100644 --- a/src/tc_scf/fock_tc.irp.f +++ b/src/tc_scf/fock_tc.irp.f @@ -86,22 +86,22 @@ END_PROVIDER PROVIDE mo_l_coef mo_r_coef PROVIDE TCSCF_density_matrix_ao_alpha TCSCF_density_matrix_ao_beta - !print*, ' providing two_e_tc_non_hermit_integral ...' + !print*, ' Providing two_e_tc_non_hermit_integral ...' !call wall_time(t0) two_e_tc_non_hermit_integral_alpha = 0.d0 two_e_tc_non_hermit_integral_beta = 0.d0 - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (i, j, k, l, density_a, density_b, density, tmp_a, tmp_b, I_coul, I_kjli) & - !$OMP SHARED (ao_num, TCSCF_density_matrix_ao_alpha, TCSCF_density_matrix_ao_beta, ao_two_e_tc_tot, & - !$OMP two_e_tc_non_hermit_integral_alpha, two_e_tc_non_hermit_integral_beta) + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (i, j, k, l, density_a, density_b, density, tmp_a, tmp_b, I_coul, I_kjli) & + !$OMP SHARED (ao_num, TCSCF_density_matrix_ao_alpha, TCSCF_density_matrix_ao_beta, ao_two_e_tc_tot, & + !$OMP two_e_tc_non_hermit_integral_alpha, two_e_tc_non_hermit_integral_beta) allocate(tmp_a(ao_num,ao_num), tmp_b(ao_num,ao_num)) tmp_a = 0.d0 tmp_b = 0.d0 - !$OMP DO + !$OMP DO do j = 1, ao_num do l = 1, ao_num density_a = TCSCF_density_matrix_ao_alpha(l,j) @@ -119,22 +119,22 @@ END_PROVIDER enddo enddo enddo - !$OMP END DO NOWAIT + !$OMP END DO NOWAIT - !$OMP CRITICAL + !$OMP CRITICAL do i = 1, ao_num do j = 1, ao_num two_e_tc_non_hermit_integral_alpha(j,i) += tmp_a(j,i) two_e_tc_non_hermit_integral_beta (j,i) += tmp_b(j,i) enddo enddo - !$OMP END CRITICAL + !$OMP END CRITICAL deallocate(tmp_a, tmp_b) - !$OMP END PARALLEL + !$OMP END PARALLEL !call wall_time(t1) - !print*, ' wall time for two_e_tc_non_hermit_integral after = ', t1 - t0 + !print*, ' Wall time for two_e_tc_non_hermit_integral = ', t1 - t0 END_PROVIDER @@ -147,8 +147,15 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_alpha, (ao_num, ao_num)] END_DOC implicit none + double precision :: t0, t1 - Fock_matrix_tc_ao_alpha = ao_one_e_integrals_tc_tot + two_e_tc_non_hermit_integral_alpha + !print*, ' Providing Fock_matrix_tc_ao_alpha ...' + !call wall_time(t0) + + Fock_matrix_tc_ao_alpha = ao_one_e_integrals_tc_tot + two_e_tc_non_hermit_integral_alpha + + !call wall_time(t1) + !print*, ' Wall time for Fock_matrix_tc_ao_alpha =', t1-t0 END_PROVIDER @@ -175,8 +182,12 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num) ] END_DOC implicit none + double precision :: t0, t1, tt0, tt1 double precision, allocatable :: tmp(:,:) + !print*, ' Providing Fock_matrix_tc_mo_alpha ...' + !call wall_time(t0) + if(bi_ortho) then !allocate(tmp(ao_num,ao_num)) @@ -188,12 +199,21 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num) ] !deallocate(tmp) PROVIDE mo_l_coef mo_r_coef + + !call wall_time(tt0) call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) & , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) ) + !call wall_time(tt1) + !print*, ' 2-e term:', tt1-tt0 + if(three_body_h_tc) then - !Fock_matrix_tc_mo_alpha += fock_a_tot_3e_bi_orth - PROVIDE fock_3e_uhf_mo_a - Fock_matrix_tc_mo_alpha += fock_3e_uhf_mo_a + !call wall_time(tt0) + PROVIDE fock_a_tot_3e_bi_orth + Fock_matrix_tc_mo_alpha += fock_a_tot_3e_bi_orth + !PROVIDE fock_3e_uhf_mo_a + !Fock_matrix_tc_mo_alpha += fock_3e_uhf_mo_a + !call wall_time(tt1) + !print*, ' 3-e term:', tt1-tt0 endif else @@ -203,6 +223,9 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num) ] endif + !call wall_time(t1) + !print*, ' Wall time for Fock_matrix_tc_mo_alpha =', t1-t0 + END_PROVIDER ! --- @@ -229,8 +252,10 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_beta, (mo_num,mo_num) ] call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) & , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) ) if(three_body_h_tc) then - !Fock_matrix_tc_mo_beta += fock_b_tot_3e_bi_orth - Fock_matrix_tc_mo_beta += fock_3e_uhf_mo_b + PROVIDE fock_b_tot_3e_bi_orth + Fock_matrix_tc_mo_beta += fock_b_tot_3e_bi_orth + !PROVIDE fock_3e_uhf_mo_b + !Fock_matrix_tc_mo_beta += fock_3e_uhf_mo_b endif else @@ -284,6 +309,10 @@ END_PROVIDER BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_tot, (ao_num, ao_num) ] implicit none + double precision :: t0, t1 + + !print*, ' Providing Fock_matrix_tc_ao_tot ...' + !call wall_time(t0) PROVIDE mo_l_coef mo_r_coef PROVIDE Fock_matrix_tc_mo_tot @@ -291,6 +320,9 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_tot, (ao_num, ao_num) ] call mo_to_ao_bi_ortho( Fock_matrix_tc_mo_tot, size(Fock_matrix_tc_mo_tot, 1) & , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) ) + !call wall_time(t1) + !print*, ' Wall time for Fock_matrix_tc_ao_tot =', t1-t0 + END_PROVIDER ! --- diff --git a/src/tc_scf/fock_tc_mo_tot.irp.f b/src/tc_scf/fock_tc_mo_tot.irp.f index 78f4c9b0..eb8973ff 100644 --- a/src/tc_scf/fock_tc_mo_tot.irp.f +++ b/src/tc_scf/fock_tc_mo_tot.irp.f @@ -20,7 +20,11 @@ END_DOC implicit none - integer :: i, j, n + integer :: i, j, n + double precision :: t0, t1 + + !print*, ' Providing Fock_matrix_tc_mo_tot ...' + !call wall_time(t0) if(elec_alpha_num == elec_beta_num) then @@ -154,5 +158,8 @@ Fock_matrix_tc_mo_tot += fock_3_mat endif + !call wall_time(t1) + !print*, ' Wall time for Fock_matrix_tc_mo_tot =', t1-t0 + END_PROVIDER diff --git a/src/tc_scf/fock_three_bi_ortho.irp.f b/src/tc_scf/fock_three_bi_ortho.irp.f index cca4b5aa..a3b342d7 100644 --- a/src/tc_scf/fock_three_bi_ortho.irp.f +++ b/src/tc_scf/fock_three_bi_ortho.irp.f @@ -4,13 +4,21 @@ BEGIN_PROVIDER [double precision, fock_a_tot_3e_bi_orth, (mo_num, mo_num)] BEGIN_DOC + ! ! Alpha part of the Fock matrix from three-electron terms ! ! WARNING :: non hermitian if bi-ortho MOS used + ! + ! This calculation becomes the dominant part one the integrals are provided + ! END_DOC implicit none - integer :: i, a + integer :: i, a + double precision :: t0, t1 + + !print*, ' Providing fock_a_tot_3e_bi_orth ...' + !call wall_time(t0) PROVIDE mo_l_coef mo_r_coef PROVIDE fock_cs_3e_bi_orth fock_a_tmp1_bi_ortho fock_a_tmp2_bi_ortho @@ -25,6 +33,9 @@ BEGIN_PROVIDER [double precision, fock_a_tot_3e_bi_orth, (mo_num, mo_num)] enddo enddo + !call wall_time(t1) + !print*, ' Wall time for fock_a_tot_3e_bi_orth =', t1-t0 + END_PROVIDER ! --- @@ -32,10 +43,15 @@ END_PROVIDER BEGIN_PROVIDER [double precision, fock_b_tot_3e_bi_orth, (mo_num, mo_num)] BEGIN_DOC -! Beta part of the Fock matrix from three-electron terms -! -! WARNING :: non hermitian if bi-ortho MOS used + ! + ! Beta part of the Fock matrix from three-electron terms + ! + ! WARNING :: non hermitian if bi-ortho MOS used + ! + ! This calculation becomes the dominant part one the integrals are provided + ! END_DOC + implicit none integer :: i, a @@ -58,15 +74,30 @@ END_PROVIDER BEGIN_PROVIDER [double precision, fock_cs_3e_bi_orth, (mo_num, mo_num)] implicit none - integer :: i, a, j, k - double precision :: contrib_sss, contrib_sos, contrib_soo, contrib - double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int - double precision :: new + integer :: i, a, j, k + double precision :: contrib_sss, contrib_sos, contrib_soo, contrib + double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int + double precision :: t0, t1 + double precision, allocatable :: tmp(:,:) + + !print*, ' Providing fock_cs_3e_bi_orth ...' + !call wall_time(t0) PROVIDE mo_l_coef mo_r_coef + ! to PROVIDE stuffs + call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, contrib) + fock_cs_3e_bi_orth = 0.d0 + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (i, a, j, k, direct_int, c_3_int, c_minus_3_int, exch_13_int, exch_23_int, exch_12_int, tmp) & + !$OMP SHARED (mo_num, elec_beta_num, fock_cs_3e_bi_orth) + + allocate(tmp(mo_num,mo_num)) + tmp = 0.d0 + + !$OMP DO do i = 1, mo_num do a = 1, mo_num @@ -87,16 +118,29 @@ BEGIN_PROVIDER [double precision, fock_cs_3e_bi_orth, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23 call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12 - new = 2.d0 * direct_int + 0.5d0 * (c_3_int + c_minus_3_int - exch_12_int) -1.5d0 * exch_13_int - exch_23_int - - fock_cs_3e_bi_orth(a,i) += new + tmp(a,i) += 2.d0 * direct_int + 0.5d0 * (c_3_int + c_minus_3_int - exch_12_int) -1.5d0 * exch_13_int - exch_23_int enddo enddo enddo enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do i = 1, mo_num + do j = 1, mo_num + fock_cs_3e_bi_orth(a,i) += tmp(a,i) + enddo + enddo + !$OMP END CRITICAL + + deallocate(tmp) + !$OMP END PARALLEL fock_cs_3e_bi_orth = - fock_cs_3e_bi_orth + !call wall_time(t1) + !print*, ' Wall time for fock_cs_3e_bi_orth =', t1-t0 + END_PROVIDER ! --- @@ -104,20 +148,37 @@ END_PROVIDER BEGIN_PROVIDER [double precision, fock_a_tmp1_bi_ortho, (mo_num, mo_num)] implicit none - integer :: i, a, j, k - double precision :: contrib_sss, contrib_sos, contrib_soo, contrib - double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int - double precision :: new + integer :: i, a, j, k, ee + double precision :: contrib_sss, contrib_sos, contrib_soo, contrib + double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int + double precision :: t0, t1 + double precision, allocatable :: tmp(:,:) + + !print*, ' Providing fock_a_tmp1_bi_ortho ...' + !call wall_time(t0) PROVIDE mo_l_coef mo_r_coef + ! to PROVIDE stuffs + call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, contrib) + + ee = elec_beta_num + 1 fock_a_tmp1_bi_ortho = 0.d0 + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (i, a, j, k, direct_int, c_3_int, c_minus_3_int, exch_13_int, exch_23_int, exch_12_int, tmp) & + !$OMP SHARED (mo_num, elec_alpha_num, elec_beta_num, ee, fock_a_tmp1_bi_ortho) + + allocate(tmp(mo_num,mo_num)) + tmp = 0.d0 + + !$OMP DO do i = 1, mo_num do a = 1, mo_num - - do j = elec_beta_num + 1, elec_alpha_num + + do j = ee, elec_alpha_num do k = 1, elec_beta_num + call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j > call give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int) ! < a k j | j i k > call give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i > @@ -125,14 +186,29 @@ BEGIN_PROVIDER [double precision, fock_a_tmp1_bi_ortho, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23 call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12 - fock_a_tmp1_bi_ortho(a,i) += 1.5d0 * (direct_int - exch_13_int) + 0.5d0 * (c_3_int + c_minus_3_int - exch_23_int - exch_12_int) + tmp(a,i) += 1.5d0 * (direct_int - exch_13_int) + 0.5d0 * (c_3_int + c_minus_3_int - exch_23_int - exch_12_int) enddo enddo enddo enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do i = 1, mo_num + do j = 1, mo_num + fock_a_tmp1_bi_ortho(a,i) += tmp(a,i) + enddo + enddo + !$OMP END CRITICAL + + deallocate(tmp) + !$OMP END PARALLEL fock_a_tmp1_bi_ortho = - fock_a_tmp1_bi_ortho + !call wall_time(t1) + !print*, ' Wall time for fock_a_tmp1_bi_ortho =', t1-t0 + END_PROVIDER ! --- @@ -140,24 +216,56 @@ END_PROVIDER BEGIN_PROVIDER [double precision, fock_a_tmp2_bi_ortho, (mo_num, mo_num)] implicit none - integer :: i, a, j, k - double precision :: contrib_sss + integer :: i, a, j, k, ee + double precision :: contrib_sss + double precision :: t0, t1 + double precision, allocatable :: tmp(:,:) + + !print*, ' Providing fock_a_tmp2_bi_ortho ...' + !call wall_time(t0) PROVIDE mo_l_coef mo_r_coef + ! to PROVIDE stuffs + call contrib_3e_sss(1, 1, 1, 1, contrib_sss) + + ee = elec_beta_num + 1 fock_a_tmp2_bi_ortho = 0.d0 + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (i, a, j, k, contrib_sss, tmp) & + !$OMP SHARED (mo_num, elec_alpha_num, ee, fock_a_tmp2_bi_ortho) + + allocate(tmp(mo_num,mo_num)) + tmp = 0.d0 + + !$OMP DO do i = 1, mo_num do a = 1, mo_num do j = 1, elec_alpha_num - do k = elec_beta_num+1, elec_alpha_num + do k = ee, elec_alpha_num call contrib_3e_sss(a, i, j, k, contrib_sss) - fock_a_tmp2_bi_ortho(a,i) += 0.5d0 * contrib_sss + tmp(a,i) += 0.5d0 * contrib_sss enddo enddo enddo enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do i = 1, mo_num + do j = 1, mo_num + fock_a_tmp2_bi_ortho(a,i) += tmp(a,i) + enddo + enddo + !$OMP END CRITICAL + + deallocate(tmp) + !$OMP END PARALLEL + + !call wall_time(t1) + !print*, ' Wall time for fock_a_tmp2_bi_ortho =', t1-t0 END_PROVIDER @@ -166,30 +274,61 @@ END_PROVIDER BEGIN_PROVIDER [double precision, fock_b_tmp1_bi_ortho, (mo_num, mo_num)] implicit none - integer :: i, a, j, k - double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int - double precision :: new + integer :: i, a, j, k, ee + double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int + double precision :: t0, t1 + double precision, allocatable :: tmp(:,:) + + !print*, ' Providing fock_b_tmp1_bi_ortho ...' + !call wall_time(t0) PROVIDE mo_l_coef mo_r_coef + ! to PROVIDE stuffs + call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, direct_int) + + ee = elec_beta_num + 1 fock_b_tmp1_bi_ortho = 0.d0 + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (i, a, j, k, direct_int, exch_13_int, exch_23_int, tmp) & + !$OMP SHARED (mo_num, elec_beta_num, elec_alpha_num, ee, fock_b_tmp1_bi_ortho) + + allocate(tmp(mo_num,mo_num)) + tmp = 0.d0 + + !$OMP DO do i = 1, mo_num do a = 1, mo_num do j = 1, elec_beta_num - do k = elec_beta_num+1, elec_alpha_num + do k = ee, elec_alpha_num call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j > call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23 - fock_b_tmp1_bi_ortho(a,i) += 1.5d0 * direct_int - 0.5d0 * exch_23_int - exch_13_int + tmp(a,i) += 1.5d0 * direct_int - 0.5d0 * exch_23_int - exch_13_int enddo enddo enddo enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do i = 1, mo_num + do j = 1, mo_num + fock_b_tmp1_bi_ortho(a,i) += tmp(a,i) + enddo + enddo + !$OMP END CRITICAL + + deallocate(tmp) + !$OMP END PARALLEL fock_b_tmp1_bi_ortho = - fock_b_tmp1_bi_ortho + !call wall_time(t1) + !print*, ' Wall time for fock_b_tmp1_bi_ortho =', t1-t0 + END_PROVIDER ! --- @@ -197,24 +336,56 @@ END_PROVIDER BEGIN_PROVIDER [double precision, fock_b_tmp2_bi_ortho, (mo_num, mo_num)] implicit none - integer :: i, a, j, k - double precision :: contrib_soo + integer :: i, a, j, k, ee + double precision :: contrib_soo + double precision :: t0, t1 + double precision, allocatable :: tmp(:,:) + + !print*, ' Providing fock_b_tmp2_bi_ortho ...' + !call wall_time(t0) PROVIDE mo_l_coef mo_r_coef + ! to PROVIDE stuffs + call contrib_3e_soo(1, 1, 1, 1, contrib_soo) + + ee = elec_beta_num + 1 fock_b_tmp2_bi_ortho = 0.d0 + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (i, a, j, k, contrib_soo, tmp) & + !$OMP SHARED (mo_num, elec_alpha_num, ee, fock_b_tmp2_bi_ortho) + + allocate(tmp(mo_num,mo_num)) + tmp = 0.d0 + + !$OMP DO do i = 1, mo_num do a = 1, mo_num - do j = elec_beta_num + 1, elec_alpha_num + do j = ee, elec_alpha_num do k = 1, elec_alpha_num call contrib_3e_soo(a, i, j, k, contrib_soo) - fock_b_tmp2_bi_ortho(a,i) += 0.5d0 * contrib_soo + tmp(a,i) += 0.5d0 * contrib_soo enddo enddo enddo enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do i = 1, mo_num + do j = 1, mo_num + fock_b_tmp2_bi_ortho(a,i) += tmp(a,i) + enddo + enddo + !$OMP END CRITICAL + + deallocate(tmp) + !$OMP END PARALLEL + + !call wall_time(t1) + !print*, ' Wall time for fock_b_tmp2_bi_ortho =', t1-t0 END_PROVIDER diff --git a/src/tc_scf/tc_scf_energy.irp.f b/src/tc_scf/tc_scf_energy.irp.f index 1fb09828..5c643f19 100644 --- a/src/tc_scf/tc_scf_energy.irp.f +++ b/src/tc_scf/tc_scf_energy.irp.f @@ -8,7 +8,11 @@ END_DOC implicit none - integer :: i, j + integer :: i, j + double precision :: t0, t1 + + !print*, ' Providing TC energy ...' + !call wall_time(t0) PROVIDE mo_l_coef mo_r_coef PROVIDE two_e_tc_non_hermit_integral_alpha two_e_tc_non_hermit_integral_beta @@ -29,6 +33,9 @@ TC_HF_energy += TC_HF_one_e_energy + TC_HF_two_e_energy TC_HF_energy += diag_three_elem_hf + !call wall_time(t1) + !print*, ' Wall time for TC energy=', t1-t0 + END_PROVIDER ! --- From ff66fe8d262a60f7232e91f231754914064710f1 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Thu, 27 Apr 2023 18:03:30 +0200 Subject: [PATCH 061/337] added OPENMP for all 3e PROVIDERS --- src/tc_scf/fock_3e_bi_ortho_uhf.irp.f | 207 +++++++++++++++++--------- src/tc_scf/fock_tc.irp.f | 16 +- 2 files changed, 145 insertions(+), 78 deletions(-) diff --git a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f index 14d3e5f6..3e624941 100644 --- a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f +++ b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f @@ -4,17 +4,27 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] implicit none - integer :: a, b, i, j - double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia - double precision :: ti, tf + integer :: a, b, i, j + double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia + double precision :: ti, tf + double precision, allocatable :: tmp(:,:) PROVIDE mo_l_coef mo_r_coef + call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) !print *, ' PROVIDING fock_3e_uhf_mo_cs ...' - call wall_time(ti) + !call wall_time(ti) fock_3e_uhf_mo_cs = 0.d0 + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & + !$OMP SHARED (mo_num, elec_beta_num, fock_3e_uhf_mo_cs) + + allocate(tmp(mo_num,mo_num)) + tmp = 0.d0 + + !$OMP DO do a = 1, mo_num do b = 1, mo_num @@ -28,19 +38,31 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - fock_3e_uhf_mo_cs(b,a) -= 0.5d0 * ( 4.d0 * I_bij_aij & - + I_bij_ija & - + I_bij_jai & - - 2.d0 * I_bij_aji & - - 2.d0 * I_bij_iaj & - - 2.d0 * I_bij_jia ) + tmp(b,a) -= 0.5d0 * ( 4.d0 * I_bij_aij & + + I_bij_ija & + + I_bij_jai & + - 2.d0 * I_bij_aji & + - 2.d0 * I_bij_iaj & + - 2.d0 * I_bij_jia ) enddo enddo enddo enddo + !$OMP END DO NOWAIT - call wall_time(tf) + !$OMP CRITICAL + do a = 1, mo_num + do b = 1, mo_num + fock_3e_uhf_mo_cs(b,a) += tmp(b,a) + enddo + enddo + !$OMP END CRITICAL + + deallocate(tmp) + !$OMP END PARALLEL + + !call wall_time(tf) !print *, ' total Wall time for fock_3e_uhf_mo_cs =', tf - ti END_PROVIDER @@ -58,20 +80,30 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] END_DOC implicit none - integer :: a, b, i, j, o - double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia - double precision :: ti, tf + integer :: a, b, i, j, o + double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia + double precision :: ti, tf + double precision, allocatable :: tmp(:,:) PROVIDE mo_l_coef mo_r_coef PROVIDE fock_3e_uhf_mo_cs - print *, ' Providing fock_3e_uhf_mo_a ...' - call wall_time(ti) + !print *, ' Providing fock_3e_uhf_mo_a ...' + !call wall_time(ti) o = elec_beta_num + 1 + call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) fock_3e_uhf_mo_a = fock_3e_uhf_mo_cs + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & + !$OMP SHARED (mo_num, o, elec_alpha_num, elec_beta_num, fock_3e_uhf_mo_a) + + allocate(tmp(mo_num,mo_num)) + tmp = 0.d0 + + !$OMP DO do a = 1, mo_num do b = 1, mo_num @@ -87,12 +119,12 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - fock_3e_uhf_mo_a(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & - + I_bij_ija & - + I_bij_jai & - - I_bij_aji & - - I_bij_iaj & - - 2.d0 * I_bij_jia ) + tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & + + I_bij_ija & + + I_bij_jai & + - I_bij_aji & + - I_bij_iaj & + - 2.d0 * I_bij_jia ) enddo enddo @@ -109,12 +141,12 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - fock_3e_uhf_mo_a(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & - + I_bij_ija & - + I_bij_jai & - - I_bij_aji & - - 2.d0 * I_bij_iaj & - - I_bij_jia ) + tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & + + I_bij_ija & + + I_bij_jai & + - I_bij_aji & + - 2.d0 * I_bij_iaj & + - I_bij_jia ) enddo enddo @@ -131,12 +163,12 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - fock_3e_uhf_mo_a(b,a) -= 0.5d0 * ( I_bij_aij & - + I_bij_ija & - + I_bij_jai & - - I_bij_aji & - - I_bij_iaj & - - I_bij_jia ) + tmp(b,a) -= 0.5d0 * ( I_bij_aij & + + I_bij_ija & + + I_bij_jai & + - I_bij_aji & + - I_bij_iaj & + - I_bij_jia ) enddo enddo @@ -145,35 +177,58 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] enddo enddo + !$OMP END DO NOWAIT - call wall_time(tf) - print *, ' Wall time for fock_3e_uhf_mo_a =', tf - ti + !$OMP CRITICAL + do a = 1, mo_num + do b = 1, mo_num + fock_3e_uhf_mo_a(b,a) += tmp(b,a) + enddo + enddo + !$OMP END CRITICAL + + deallocate(tmp) + !$OMP END PARALLEL + + !call wall_time(tf) + !print *, ' Wall time for fock_3e_uhf_mo_a =', tf - ti END_PROVIDER ! --- BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)] + BEGIN_DOC -! BETA part of the Fock matrix from three-electron terms -! -! WARNING :: non hermitian if bi-ortho MOS used + ! BETA part of the Fock matrix from three-electron terms + ! + ! WARNING :: non hermitian if bi-ortho MOS used END_DOC implicit none - integer :: a, b, i, j, o - double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia - double precision :: ti, tf + integer :: a, b, i, j, o + double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia + double precision :: ti, tf + double precision, allocatable :: tmp(:,:) PROVIDE mo_l_coef mo_r_coef !print *, ' PROVIDING fock_3e_uhf_mo_b ...' - call wall_time(ti) + !call wall_time(ti) o = elec_beta_num + 1 + call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) fock_3e_uhf_mo_b = fock_3e_uhf_mo_cs + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & + !$OMP SHARED (mo_num, o, elec_alpha_num, elec_beta_num, fock_3e_uhf_mo_b) + + allocate(tmp(mo_num,mo_num)) + tmp = 0.d0 + + !$OMP DO do a = 1, mo_num do b = 1, mo_num @@ -189,9 +244,9 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - fock_3e_uhf_mo_b(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & - - I_bij_aji & - - I_bij_iaj ) + tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & + - I_bij_aji & + - I_bij_iaj ) enddo enddo @@ -208,9 +263,9 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - fock_3e_uhf_mo_b(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & - - I_bij_aji & - - I_bij_jia ) + tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & + - I_bij_aji & + - I_bij_jia ) enddo enddo @@ -227,8 +282,8 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - fock_3e_uhf_mo_b(b,a) -= 0.5d0 * ( I_bij_aij & - - I_bij_aji ) + tmp(b,a) -= 0.5d0 * ( I_bij_aij & + - I_bij_aji ) enddo enddo @@ -237,8 +292,20 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)] enddo enddo + !$OMP END DO NOWAIT - call wall_time(tf) + !$OMP CRITICAL + do a = 1, mo_num + do b = 1, mo_num + fock_3e_uhf_mo_b(b,a) += tmp(b,a) + enddo + enddo + !$OMP END CRITICAL + + deallocate(tmp) + !$OMP END PARALLEL + + !call wall_time(tf) !print *, ' total Wall time for fock_3e_uhf_mo_b =', tf - ti END_PROVIDER @@ -271,15 +338,15 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_a, (ao_num, ao_num)] fock_3e_uhf_ao_a = 0.d0 - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp, & - !$OMP i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) & - !$OMP SHARED (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_a) + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp, & + !$OMP i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) & + !$OMP SHARED (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_a) allocate(f_tmp(ao_num,ao_num)) f_tmp = 0.d0 - !$OMP DO + !$OMP DO do g = 1, ao_num do e = 1, ao_num dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e) @@ -311,18 +378,18 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_a, (ao_num, ao_num)] enddo enddo enddo - !$OMP END DO NOWAIT + !$OMP END DO NOWAIT - !$OMP CRITICAL + !$OMP CRITICAL do mu = 1, ao_num do nu = 1, ao_num fock_3e_uhf_ao_a(mu,nu) += f_tmp(mu,nu) enddo enddo - !$OMP END CRITICAL + !$OMP END CRITICAL deallocate(f_tmp) - !$OMP END PARALLEL + !$OMP END PARALLEL call wall_time(tf) print *, ' total Wall time for fock_3e_uhf_ao_a =', tf - ti @@ -357,15 +424,15 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_b, (ao_num, ao_num)] fock_3e_uhf_ao_b = 0.d0 - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp, & - !$OMP i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) & - !$OMP SHARED (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_b) + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp, & + !$OMP i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) & + !$OMP SHARED (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_b) allocate(f_tmp(ao_num,ao_num)) f_tmp = 0.d0 - !$OMP DO + !$OMP DO do g = 1, ao_num do e = 1, ao_num dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e) @@ -397,18 +464,18 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_b, (ao_num, ao_num)] enddo enddo enddo - !$OMP END DO NOWAIT + !$OMP END DO NOWAIT - !$OMP CRITICAL + !$OMP CRITICAL do mu = 1, ao_num do nu = 1, ao_num fock_3e_uhf_ao_b(mu,nu) += f_tmp(mu,nu) enddo enddo - !$OMP END CRITICAL + !$OMP END CRITICAL deallocate(f_tmp) - !$OMP END PARALLEL + !$OMP END PARALLEL call wall_time(tf) print *, ' total Wall time for fock_3e_uhf_ao_b =', tf - ti diff --git a/src/tc_scf/fock_tc.irp.f b/src/tc_scf/fock_tc.irp.f index 207154ea..0ae515bb 100644 --- a/src/tc_scf/fock_tc.irp.f +++ b/src/tc_scf/fock_tc.irp.f @@ -208,10 +208,10 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num) ] if(three_body_h_tc) then !call wall_time(tt0) - PROVIDE fock_a_tot_3e_bi_orth - Fock_matrix_tc_mo_alpha += fock_a_tot_3e_bi_orth - !PROVIDE fock_3e_uhf_mo_a - !Fock_matrix_tc_mo_alpha += fock_3e_uhf_mo_a + !PROVIDE fock_a_tot_3e_bi_orth + !Fock_matrix_tc_mo_alpha += fock_a_tot_3e_bi_orth + PROVIDE fock_3e_uhf_mo_a + Fock_matrix_tc_mo_alpha += fock_3e_uhf_mo_a !call wall_time(tt1) !print*, ' 3-e term:', tt1-tt0 endif @@ -252,10 +252,10 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_beta, (mo_num,mo_num) ] call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) & , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) ) if(three_body_h_tc) then - PROVIDE fock_b_tot_3e_bi_orth - Fock_matrix_tc_mo_beta += fock_b_tot_3e_bi_orth - !PROVIDE fock_3e_uhf_mo_b - !Fock_matrix_tc_mo_beta += fock_3e_uhf_mo_b + !PROVIDE fock_b_tot_3e_bi_orth + !Fock_matrix_tc_mo_beta += fock_b_tot_3e_bi_orth + PROVIDE fock_3e_uhf_mo_b + Fock_matrix_tc_mo_beta += fock_3e_uhf_mo_b endif else From 8b67dfbe761d7a371f60ac681e2a3fdd940f97cb Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Fri, 28 Apr 2023 10:21:58 +0200 Subject: [PATCH 062/337] fixed bug in tc-env --- src/non_h_ints_mu/jast_deriv.irp.f | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/non_h_ints_mu/jast_deriv.irp.f b/src/non_h_ints_mu/jast_deriv.irp.f index 6c3f4214..31856a3d 100644 --- a/src/non_h_ints_mu/jast_deriv.irp.f +++ b/src/non_h_ints_mu/jast_deriv.irp.f @@ -272,9 +272,9 @@ subroutine grad1_j1b_nucl(r, grad) fact_z += e * z enddo - grad(1) = -2.d0 * fact_x - grad(2) = -2.d0 * fact_y - grad(3) = -2.d0 * fact_z + grad(1) = 2.d0 * fact_x + grad(2) = 2.d0 * fact_y + grad(3) = 2.d0 * fact_z else if(j1b_type .eq. 105) then @@ -294,9 +294,9 @@ subroutine grad1_j1b_nucl(r, grad) fact_z += e * z enddo - grad(1) = -4.d0 * fact_x - grad(2) = -4.d0 * fact_y - grad(3) = -4.d0 * fact_z + grad(1) = 4.d0 * fact_x + grad(2) = 4.d0 * fact_y + grad(3) = 4.d0 * fact_z else From c80ebe27b8d2d8592570aecb46e19fac6ca65064 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 28 Apr 2023 10:31:24 +0200 Subject: [PATCH 063/337] Introducing Cholesky-decomposed SCF --- src/ao_two_e_ints/cholesky.irp.f | 100 +++++++++ src/ao_two_e_ints/map_integrals.irp.f | 2 +- src/hartree_fock/fock_matrix_hf.irp.f | 311 ++++++++++++++++---------- src/utils/linear_algebra.irp.f | 2 +- 4 files changed, 298 insertions(+), 117 deletions(-) create mode 100644 src/ao_two_e_ints/cholesky.irp.f diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f new file mode 100644 index 00000000..d4c201aa --- /dev/null +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -0,0 +1,100 @@ +BEGIN_PROVIDER [ integer, cholesky_ao_num_guess ] + implicit none + BEGIN_DOC + ! Number of Cholesky vectors in AO basis + END_DOC + + integer :: i,j,k,l + double precision :: xnorm0, x, integral + double precision, external :: ao_two_e_integral + + cholesky_ao_num_guess = 0 + xnorm0 = 0.d0 + x = 0.d0 + do j=1,ao_num + do i=1,ao_num + integral = ao_two_e_integral(i,i,j,j) + if (integral > ao_integrals_threshold) then + cholesky_ao_num_guess += 1 + else + x += integral + endif + enddo + enddo + print *, 'Cholesky decomposition of AO integrals' + print *, '--------------------------------------' + print *, '' + print *, 'Estimated Error: ', x + print *, 'Guess size: ', cholesky_ao_num_guess, '(', 100.d0*dble(cholesky_ao_num_guess)/dble(ao_num*ao_num), ' %)' + +END_PROVIDER + + BEGIN_PROVIDER [ integer, cholesky_ao_num ] +&BEGIN_PROVIDER [ double precision, cholesky_ao, (ao_num, ao_num, cholesky_ao_num_guess) ] + use mmap_module + implicit none + BEGIN_DOC + ! Cholesky vectors in AO basis: (ik|a): + ! = (ik|jl) = sum_a (ik|a).(a|jl) + END_DOC + + type(c_ptr) :: ptr + integer :: fd, i,j,k,l, rank + double precision, pointer :: ao_integrals(:,:,:,:) + double precision, external :: ao_two_e_integral + + ! Store AO integrals in a memory mapped file + call mmap(trim(ezfio_work_dir)//'ao_integrals', & + (/ int(ao_num,8), int(ao_num,8), int(ao_num,8), int(ao_num,8) /), & + 8, fd, .False., ptr) + call c_f_pointer(ptr, ao_integrals, (/ao_num, ao_num, ao_num, ao_num/)) + + double precision :: integral + logical, external :: ao_two_e_integral_zero + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j,k,l, integral) SCHEDULE(dynamic) + do l=1,ao_num + do j=1,l + do k=1,ao_num + do i=1,k + if (ao_two_e_integral_zero(i,j,k,l)) cycle + integral = ao_two_e_integral(i,k,j,l) + ao_integrals(i,k,j,l) = integral + ao_integrals(k,i,j,l) = integral + ao_integrals(i,k,l,j) = integral + ao_integrals(k,i,l,j) = integral + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! Call Lapack + cholesky_ao_num = cholesky_ao_num_guess + call pivoted_cholesky(ao_integrals, cholesky_ao_num, ao_integrals_threshold, ao_num*ao_num, cholesky_ao) + print *, 'Rank: ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' + + ! Remove mmap + double precision, external :: getUnitAndOpen + call munmap( & + (/ int(ao_num,8), int(ao_num,8), int(ao_num,8), int(ao_num,8) /), & + 8, fd, ptr) + open(unit=99,file=trim(ezfio_work_dir)//'ao_integrals') + close(99, status='delete') + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, ao_num) ] + implicit none + BEGIN_DOC +! Transposed of the Cholesky vectors in AO basis set + END_DOC + integer :: i,j,k + do j=1,ao_num + do i=1,ao_num + do k=1,ao_num + cholesky_ao_transp(k,i,j) = cholesky_ao(i,j,k) + enddo + enddo + enddo +END_PROVIDER + diff --git a/src/ao_two_e_ints/map_integrals.irp.f b/src/ao_two_e_ints/map_integrals.irp.f index fa7c29cc..7d6a7da4 100644 --- a/src/ao_two_e_ints/map_integrals.irp.f +++ b/src/ao_two_e_ints/map_integrals.irp.f @@ -486,7 +486,7 @@ subroutine get_ao_two_e_integrals(j,k,l,sze,out_val) PROVIDE ao_two_e_integrals_in_map ao_integrals_map if (ao_one_e_integral_zero(j,l)) then - out_val = 0.d0 + out_val(1:sze) = 0.d0 return endif diff --git a/src/hartree_fock/fock_matrix_hf.irp.f b/src/hartree_fock/fock_matrix_hf.irp.f index d7d8fa7d..12641516 100644 --- a/src/hartree_fock/fock_matrix_hf.irp.f +++ b/src/hartree_fock/fock_matrix_hf.irp.f @@ -15,115 +15,59 @@ double precision, allocatable :: ao_two_e_integral_alpha_tmp(:,:) double precision, allocatable :: ao_two_e_integral_beta_tmp(:,:) - ao_two_e_integral_alpha = 0.d0 - ao_two_e_integral_beta = 0.d0 - if (do_direct_integrals) then + if (.True.) then ! Use Cholesky-decomposed integrals + ao_two_e_integral_alpha(:,:) = ao_two_e_integral_alpha_chol(:,:) + ao_two_e_integral_beta (:,:) = ao_two_e_integral_beta_chol (:,:) - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(i,j,l,k1,k,integral,ii,jj,kk,ll,keys,values,p,q,r,s,i0,j0,k0,l0, & - !$OMP ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp, c0, c1, c2, & - !$OMP local_threshold)& - !$OMP SHARED(ao_num,SCF_density_matrix_ao_alpha,SCF_density_matrix_ao_beta,& - !$OMP ao_integrals_map,ao_integrals_threshold, ao_two_e_integral_schwartz, & - !$OMP ao_two_e_integral_alpha, ao_two_e_integral_beta) + else ! Use integrals in AO basis set - allocate(keys(1), values(1)) - allocate(ao_two_e_integral_alpha_tmp(ao_num,ao_num), & - ao_two_e_integral_beta_tmp(ao_num,ao_num)) - ao_two_e_integral_alpha_tmp = 0.d0 - ao_two_e_integral_beta_tmp = 0.d0 + ao_two_e_integral_alpha = 0.d0 + ao_two_e_integral_beta = 0.d0 + if (do_direct_integrals) then - q = ao_num*ao_num*ao_num*ao_num - !$OMP DO SCHEDULE(static,64) - do p=1_8,q - call two_e_integrals_index_reverse(kk,ii,ll,jj,p) - if ( (kk(1)>ao_num).or. & - (ii(1)>ao_num).or. & - (jj(1)>ao_num).or. & - (ll(1)>ao_num) ) then - cycle - endif - k = kk(1) - i = ii(1) - l = ll(1) - j = jj(1) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,l,k1,k,integral,ii,jj,kk,ll,keys,values,p,q,r,s,i0,j0,k0,l0,& + !$OMP ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp, c0, c1, c2,& + !$OMP local_threshold) & + !$OMP SHARED(ao_num,SCF_density_matrix_ao_alpha,SCF_density_matrix_ao_beta,& + !$OMP ao_integrals_map,ao_integrals_threshold, ao_two_e_integral_schwartz,& + !$OMP ao_two_e_integral_alpha, ao_two_e_integral_beta) - logical, external :: ao_two_e_integral_zero - if (ao_two_e_integral_zero(i,k,j,l)) then - cycle - endif - local_threshold = ao_two_e_integral_schwartz(k,l)*ao_two_e_integral_schwartz(i,j) - if (local_threshold < ao_integrals_threshold) then - cycle - endif - i0 = i - j0 = j - k0 = k - l0 = l - values(1) = 0.d0 - local_threshold = ao_integrals_threshold/local_threshold - do k2=1,8 - if (kk(k2)==0) then - cycle - endif - i = ii(k2) - j = jj(k2) - k = kk(k2) - l = ll(k2) - c0 = SCF_density_matrix_ao_alpha(k,l)+SCF_density_matrix_ao_beta(k,l) - c1 = SCF_density_matrix_ao_alpha(k,i) - c2 = SCF_density_matrix_ao_beta(k,i) - if ( dabs(c0)+dabs(c1)+dabs(c2) < local_threshold) then - cycle - endif - if (values(1) == 0.d0) then - values(1) = ao_two_e_integral(k0,l0,i0,j0) - endif - integral = c0 * values(1) - ao_two_e_integral_alpha_tmp(i,j) += integral - ao_two_e_integral_beta_tmp (i,j) += integral - integral = values(1) - ao_two_e_integral_alpha_tmp(l,j) -= c1 * integral - ao_two_e_integral_beta_tmp (l,j) -= c2 * integral - enddo - enddo - !$OMP END DO NOWAIT - !$OMP CRITICAL - ao_two_e_integral_alpha += ao_two_e_integral_alpha_tmp - ao_two_e_integral_beta += ao_two_e_integral_beta_tmp - !$OMP END CRITICAL - deallocate(keys,values,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp) - !$OMP END PARALLEL - else - PROVIDE ao_two_e_integrals_in_map + allocate(keys(1), values(1)) + allocate(ao_two_e_integral_alpha_tmp(ao_num,ao_num), & + ao_two_e_integral_beta_tmp(ao_num,ao_num)) + ao_two_e_integral_alpha_tmp = 0.d0 + ao_two_e_integral_beta_tmp = 0.d0 - integer(omp_lock_kind) :: lck(ao_num) - integer(map_size_kind) :: i8 - integer :: ii(8), jj(8), kk(8), ll(8), k2 - integer(cache_map_size_kind) :: n_elements_max, n_elements - integer(key_kind), allocatable :: keys(:) - double precision, allocatable :: values(:) - - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(i,j,l,k1,k,integral,ii,jj,kk,ll,i8,keys,values,n_elements_max, & - !$OMP n_elements,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp)& - !$OMP SHARED(ao_num,SCF_density_matrix_ao_alpha,SCF_density_matrix_ao_beta,& - !$OMP ao_integrals_map, ao_two_e_integral_alpha, ao_two_e_integral_beta) - - call get_cache_map_n_elements_max(ao_integrals_map,n_elements_max) - allocate(keys(n_elements_max), values(n_elements_max)) - allocate(ao_two_e_integral_alpha_tmp(ao_num,ao_num), & - ao_two_e_integral_beta_tmp(ao_num,ao_num)) - ao_two_e_integral_alpha_tmp = 0.d0 - ao_two_e_integral_beta_tmp = 0.d0 - - !$OMP DO SCHEDULE(static,1) - do i8=0_8,ao_integrals_map%map_size - n_elements = n_elements_max - call get_cache_map(ao_integrals_map,i8,keys,values,n_elements) - do k1=1,n_elements - call two_e_integrals_index_reverse(kk,ii,ll,jj,keys(k1)) + q = ao_num*ao_num*ao_num*ao_num + !$OMP DO SCHEDULE(static,64) + do p=1_8,q + call two_e_integrals_index_reverse(kk,ii,ll,jj,p) + if ( (kk(1)>ao_num).or. & + (ii(1)>ao_num).or. & + (jj(1)>ao_num).or. & + (ll(1)>ao_num) ) then + cycle + endif + k = kk(1) + i = ii(1) + l = ll(1) + j = jj(1) + logical, external :: ao_two_e_integral_zero + if (ao_two_e_integral_zero(i,k,j,l)) then + cycle + endif + local_threshold = ao_two_e_integral_schwartz(k,l)*ao_two_e_integral_schwartz(i,j) + if (local_threshold < ao_integrals_threshold) then + cycle + endif + i0 = i + j0 = j + k0 = k + l0 = l + values(1) = 0.d0 + local_threshold = ao_integrals_threshold/local_threshold do k2=1,8 if (kk(k2)==0) then cycle @@ -132,25 +76,162 @@ j = jj(k2) k = kk(k2) l = ll(k2) - integral = (SCF_density_matrix_ao_alpha(k,l)+SCF_density_matrix_ao_beta(k,l)) * values(k1) + c0 = SCF_density_matrix_ao_alpha(k,l)+SCF_density_matrix_ao_beta(k,l) + c1 = SCF_density_matrix_ao_alpha(k,i) + c2 = SCF_density_matrix_ao_beta(k,i) + if ( dabs(c0)+dabs(c1)+dabs(c2) < local_threshold) then + cycle + endif + if (values(1) == 0.d0) then + values(1) = ao_two_e_integral(k0,l0,i0,j0) + endif + integral = c0 * values(1) ao_two_e_integral_alpha_tmp(i,j) += integral ao_two_e_integral_beta_tmp (i,j) += integral - integral = values(k1) - ao_two_e_integral_alpha_tmp(l,j) -= SCF_density_matrix_ao_alpha(k,i) * integral - ao_two_e_integral_beta_tmp (l,j) -= SCF_density_matrix_ao_beta (k,i) * integral + integral = values(1) + ao_two_e_integral_alpha_tmp(l,j) -= c1 * integral + ao_two_e_integral_beta_tmp (l,j) -= c2 * integral enddo enddo - enddo - !$OMP END DO NOWAIT - !$OMP CRITICAL - ao_two_e_integral_alpha += ao_two_e_integral_alpha_tmp - ao_two_e_integral_beta += ao_two_e_integral_beta_tmp - !$OMP END CRITICAL - deallocate(keys,values,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp) - !$OMP END PARALLEL + !$OMP END DO NOWAIT + !$OMP CRITICAL + ao_two_e_integral_alpha += ao_two_e_integral_alpha_tmp + ao_two_e_integral_beta += ao_two_e_integral_beta_tmp + !$OMP END CRITICAL + deallocate(keys,values,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp) + !$OMP END PARALLEL + else + PROVIDE ao_two_e_integrals_in_map + integer(omp_lock_kind) :: lck(ao_num) + integer(map_size_kind) :: i8 + integer :: ii(8), jj(8), kk(8), ll(8), k2 + integer(cache_map_size_kind) :: n_elements_max, n_elements + integer(key_kind), allocatable :: keys(:) + double precision, allocatable :: values(:) + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,j,l,k1,k,integral,ii,jj,kk,ll,i8,keys,values,n_elements_max,& + !$OMP n_elements,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp)& + !$OMP SHARED(ao_num,SCF_density_matrix_ao_alpha,SCF_density_matrix_ao_beta,& + !$OMP ao_integrals_map, ao_two_e_integral_alpha, ao_two_e_integral_beta) + + call get_cache_map_n_elements_max(ao_integrals_map,n_elements_max) + allocate(keys(n_elements_max), values(n_elements_max)) + allocate(ao_two_e_integral_alpha_tmp(ao_num,ao_num), & + ao_two_e_integral_beta_tmp(ao_num,ao_num)) + ao_two_e_integral_alpha_tmp = 0.d0 + ao_two_e_integral_beta_tmp = 0.d0 + + !$OMP DO SCHEDULE(static,1) + do i8=0_8,ao_integrals_map%map_size + n_elements = n_elements_max + call get_cache_map(ao_integrals_map,i8,keys,values,n_elements) + do k1=1,n_elements + call two_e_integrals_index_reverse(kk,ii,ll,jj,keys(k1)) + + do k2=1,8 + if (kk(k2)==0) then + cycle + endif + i = ii(k2) + j = jj(k2) + k = kk(k2) + l = ll(k2) + integral = (SCF_density_matrix_ao_alpha(k,l)+SCF_density_matrix_ao_beta(k,l)) * values(k1) + ao_two_e_integral_alpha_tmp(i,j) += integral + ao_two_e_integral_beta_tmp (i,j) += integral + integral = values(k1) + ao_two_e_integral_alpha_tmp(l,j) -= SCF_density_matrix_ao_alpha(k,i) * integral + ao_two_e_integral_beta_tmp (l,j) -= SCF_density_matrix_ao_beta (k,i) * integral + enddo + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + ao_two_e_integral_alpha += ao_two_e_integral_alpha_tmp + ao_two_e_integral_beta += ao_two_e_integral_beta_tmp + !$OMP END CRITICAL + deallocate(keys,values,ao_two_e_integral_alpha_tmp,ao_two_e_integral_beta_tmp) + !$OMP END PARALLEL + + endif endif +END_PROVIDER + + BEGIN_PROVIDER [ double precision, ao_two_e_integral_alpha_chol, (ao_num, ao_num) ] +&BEGIN_PROVIDER [ double precision, ao_two_e_integral_beta_chol , (ao_num, ao_num) ] + use map_module + implicit none + BEGIN_DOC + ! Alpha and Beta Fock matrices in AO basis set + END_DOC + + integer :: m,n,l,s,j + double precision :: integral + double precision, allocatable :: X(:), X2(:,:,:,:), X3(:,:,:,:) + + allocate (X(cholesky_ao_num)) + + + ! X(j) = \sum_{mn} SCF_density_matrix_ao(m,n) * cholesky_ao(m,n,j) + call dgemm('T','N',cholesky_ao_num,1,ao_num*ao_num,1.d0, & + cholesky_ao, ao_num*ao_num, & + SCF_density_matrix_ao, ao_num*ao_num,0.d0, & + X, cholesky_ao_num) +! + + ! ao_two_e_integral_alpha(m,n) = \sum_{j} cholesky_ao(m,n,j) * X(j) + call dgemm('N','N',ao_num*ao_num,1,cholesky_ao_num, 1.d0, & + cholesky_ao, ao_num*ao_num, & + X, cholesky_ao_num, 0.d0, & + ao_two_e_integral_alpha_chol, ao_num*ao_num) + + deallocate(X) + + ao_two_e_integral_beta_chol = ao_two_e_integral_alpha_chol + + + allocate(X2(ao_num,ao_num,cholesky_ao_num,2)) + +! ao_two_e_integral_alpha_chol (l,s) -= cholesky_ao(l,m,j) * SCF_density_matrix_ao_beta (m,n) * cholesky_ao(n,s,j) + + call dgemm('N','N',ao_num,ao_num*cholesky_ao_num,ao_num, 1.d0, & + SCF_density_matrix_ao_alpha, ao_num, & + cholesky_ao, ao_num, 0.d0, & + X2(1,1,1,1), ao_num) + + call dgemm('N','N',ao_num,ao_num*cholesky_ao_num,ao_num, 1.d0, & + SCF_density_matrix_ao_beta, ao_num, & + cholesky_ao, ao_num, 0.d0, & + X2(1,1,1,2), ao_num) + + allocate(X3(ao_num,cholesky_ao_num,ao_num,2)) + + do s=1,ao_num + do j=1,cholesky_ao_num + do m=1,ao_num + X3(m,j,s,1) = X2(m,s,j,1) + X3(m,j,s,2) = X2(m,s,j,2) + enddo + enddo + enddo + + deallocate(X2) + + call dgemm('N','N',ao_num,ao_num,ao_num*cholesky_ao_num, -1.d0, & + cholesky_ao, ao_num, & + X3(1,1,1,1), ao_num*cholesky_ao_num, 1.d0, & + ao_two_e_integral_alpha_chol, ao_num) + + call dgemm('N','N',ao_num,ao_num,ao_num*cholesky_ao_num, -1.d0, & + cholesky_ao, ao_num, & + X3(1,1,1,2), ao_num*cholesky_ao_num, 1.d0, & + ao_two_e_integral_beta_chol, ao_num) + + deallocate(X3) + END_PROVIDER BEGIN_PROVIDER [ double precision, Fock_matrix_ao_alpha, (ao_num, ao_num) ] diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index c02560e3..3b43d607 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1854,7 +1854,7 @@ do k = 1, N end do ! TODO: It should be possible to use only one vector of size (1:rank) as a buffer ! to do the swapping in-place -U = 0.00D+0 +U(:,:) = 0.00D+0 do k = 1, N l = piv(k) U(l, :) = A(1:rank, k) From e25436de8d400ee7e42f0d47757793bc1f93ef48 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Mon, 1 May 2023 09:15:58 +0200 Subject: [PATCH 064/337] minor modif --- src/tc_bi_ortho/print_tc_var.irp.f | 20 ++++++++++++++++++++ src/tc_bi_ortho/tc_utils.irp.f | 26 ++++++++++++++++++++++++++ src/tc_scf/rh_tcscf_diis.irp.f | 3 +-- src/tc_scf/routines_rotates.irp.f | 3 ++- src/tc_scf/tc_scf.irp.f | 1 - 5 files changed, 49 insertions(+), 4 deletions(-) create mode 100644 src/tc_bi_ortho/print_tc_var.irp.f diff --git a/src/tc_bi_ortho/print_tc_var.irp.f b/src/tc_bi_ortho/print_tc_var.irp.f new file mode 100644 index 00000000..fa0a4363 --- /dev/null +++ b/src/tc_bi_ortho/print_tc_var.irp.f @@ -0,0 +1,20 @@ +program print_tc_var + + BEGIN_DOC + ! TODO : Put the documentation of the program here + END_DOC + + implicit none + + print *, 'Hello world' + my_grid_becke = .True. + my_n_pt_r_grid = 30 + my_n_pt_a_grid = 50 + read_wf = .True. + touch read_wf + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + call write_tc_var() + +end + diff --git a/src/tc_bi_ortho/tc_utils.irp.f b/src/tc_bi_ortho/tc_utils.irp.f index 594b466c..f8f648e8 100644 --- a/src/tc_bi_ortho/tc_utils.irp.f +++ b/src/tc_bi_ortho/tc_utils.irp.f @@ -32,3 +32,29 @@ subroutine write_tc_energy() end +! --- + +subroutine write_tc_var() + + implicit none + integer :: i, j, k + double precision :: hmono, htwoe, hthree, htot + double precision :: SIGMA_TC + + do k = 1, n_states + + SIGMA_TC = 0.d0 + do j = 2, N_det + call htilde_mu_mat_bi_ortho(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) + SIGMA_TC = SIGMA_TC + htot * htot + enddo + + print *, " state : ", k + print *, " SIGMA_TC = ", SIGMA_TC + + enddo + +end + +! --- + diff --git a/src/tc_scf/rh_tcscf_diis.irp.f b/src/tc_scf/rh_tcscf_diis.irp.f index 645742c8..5901911c 100644 --- a/src/tc_scf/rh_tcscf_diis.irp.f +++ b/src/tc_scf/rh_tcscf_diis.irp.f @@ -231,8 +231,7 @@ subroutine rh_tcscf_diis() ! --- print *, ' TCSCF DIIS converged !' - call print_energy_and_mos() - + !call print_energy_and_mos() call write_time(6) deallocate(mo_r_coef_save, mo_l_coef_save, F_DIIS, E_DIIS) diff --git a/src/tc_scf/routines_rotates.irp.f b/src/tc_scf/routines_rotates.irp.f index 3c12118f..8c1071b2 100644 --- a/src/tc_scf/routines_rotates.irp.f +++ b/src/tc_scf/routines_rotates.irp.f @@ -46,6 +46,7 @@ end subroutine LTxR ! --- subroutine minimize_tc_orb_angles() + BEGIN_DOC ! routine that minimizes the angle between left- and right-orbitals when degeneracies are found END_DOC @@ -362,7 +363,7 @@ subroutine print_energy_and_mos() integer :: i print *, ' ' - print *, ' TC energy = ', TC_HF_energy + print *, ' TC energy = ', TC_HF_energy print *, ' TC SCF energy gradient = ', grad_non_hermit print *, ' Max angle Left/right = ', max_angle_left_right diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f index 85389f30..88ddd26c 100644 --- a/src/tc_scf/tc_scf.irp.f +++ b/src/tc_scf/tc_scf.irp.f @@ -54,7 +54,6 @@ program tc_scf endif call minimize_tc_orb_angles() - call print_energy_and_mos() endif From ff1314a94ee947198ff69df4ce5771fd9b85a872 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Mon, 1 May 2023 09:17:38 +0200 Subject: [PATCH 065/337] fixed OpenMP bug in 3e terms --- src/tc_scf/fock_three_bi_ortho.irp.f | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/tc_scf/fock_three_bi_ortho.irp.f b/src/tc_scf/fock_three_bi_ortho.irp.f index a3b342d7..5d2f199c 100644 --- a/src/tc_scf/fock_three_bi_ortho.irp.f +++ b/src/tc_scf/fock_three_bi_ortho.irp.f @@ -127,7 +127,7 @@ BEGIN_PROVIDER [double precision, fock_cs_3e_bi_orth, (mo_num, mo_num)] !$OMP CRITICAL do i = 1, mo_num - do j = 1, mo_num + do a = 1, mo_num fock_cs_3e_bi_orth(a,i) += tmp(a,i) enddo enddo @@ -195,7 +195,7 @@ BEGIN_PROVIDER [double precision, fock_a_tmp1_bi_ortho, (mo_num, mo_num)] !$OMP CRITICAL do i = 1, mo_num - do j = 1, mo_num + do a = 1, mo_num fock_a_tmp1_bi_ortho(a,i) += tmp(a,i) enddo enddo @@ -255,7 +255,7 @@ BEGIN_PROVIDER [double precision, fock_a_tmp2_bi_ortho, (mo_num, mo_num)] !$OMP CRITICAL do i = 1, mo_num - do j = 1, mo_num + do a = 1, mo_num fock_a_tmp2_bi_ortho(a,i) += tmp(a,i) enddo enddo @@ -315,7 +315,7 @@ BEGIN_PROVIDER [double precision, fock_b_tmp1_bi_ortho, (mo_num, mo_num)] !$OMP CRITICAL do i = 1, mo_num - do j = 1, mo_num + do a = 1, mo_num fock_b_tmp1_bi_ortho(a,i) += tmp(a,i) enddo enddo @@ -375,7 +375,7 @@ BEGIN_PROVIDER [double precision, fock_b_tmp2_bi_ortho, (mo_num, mo_num)] !$OMP CRITICAL do i = 1, mo_num - do j = 1, mo_num + do a = 1, mo_num fock_b_tmp2_bi_ortho(a,i) += tmp(a,i) enddo enddo From 0405a71572034e2d244e7dc2b1bbe1dc468e22f0 Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 1 May 2023 14:00:04 +0200 Subject: [PATCH 066/337] added pt2_tc and pt2_tc_cisd --- external/qp2-dependencies | 2 +- src/cipsi_tc_bi_ortho/pt2.irp.f | 31 ++++------------ src/fci_tc_bi/pt2_tc.irp.f | 31 ++++++++++++++++ src/kohn_sham/print_mos.irp.f | 12 ++---- src/tc_bi_ortho/e_corr_bi_ortho.irp.f | 7 +++- src/tc_bi_ortho/pt2_tc_cisd.irp.f | 43 ++++++++++++++++++++++ src/tc_bi_ortho/slater_tc_opt_diag.irp.f | 4 +- src/tc_bi_ortho/slater_tc_opt_double.irp.f | 14 +++---- src/tc_bi_ortho/slater_tc_opt_single.irp.f | 2 +- src/tc_bi_ortho/tc_bi_ortho.irp.f | 25 +++++++------ 10 files changed, 115 insertions(+), 56 deletions(-) create mode 100644 src/fci_tc_bi/pt2_tc.irp.f create mode 100644 src/tc_bi_ortho/pt2_tc_cisd.irp.f diff --git a/external/qp2-dependencies b/external/qp2-dependencies index 6e23ebac..e0d0e02e 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit 6e23ebac001acae91d1c762ca934e09a9b7d614a +Subproject commit e0d0e02e9f5ece138d1520106954a881ab0b8db2 diff --git a/src/cipsi_tc_bi_ortho/pt2.irp.f b/src/cipsi_tc_bi_ortho/pt2.irp.f index 13b4dff4..833cc0ea 100644 --- a/src/cipsi_tc_bi_ortho/pt2.irp.f +++ b/src/cipsi_tc_bi_ortho/pt2.irp.f @@ -1,4 +1,4 @@ -subroutine pt2_tc_bi_ortho +subroutine tc_pt2 use selection_types implicit none BEGIN_DOC @@ -15,7 +15,7 @@ subroutine pt2_tc_bi_ortho double precision, external :: memory_of_double double precision :: correlation_energy_ratio,E_denom,E_tc,norm double precision, allocatable :: ept2(:), pt1(:),extrap_energy(:) - PROVIDE H_apply_buffer_allocated distributed_davidson mo_two_e_integrals_in_map + PROVIDE H_apply_buffer_allocated distributed_davidson print*,'Diagonal elements of the Fock matrix ' do i = 1, mo_num @@ -44,24 +44,14 @@ subroutine pt2_tc_bi_ortho pt2_data % overlap= 0.d0 pt2_data % variance = huge(1.e0) - if (s2_eig) then - call make_s2_eigenfunction - endif + !!!! WARNING !!!! SEEMS TO BE PROBLEM WTH make_s2_eigenfunction !!!! THE DETERMINANTS CAN APPEAR TWICE IN THE WFT DURING SELECTION +! if (s2_eig) then +! call make_s2_eigenfunction +! endif print_pt2 = .False. call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) ! call routine_save_right - if (N_det > N_det_max) then - psi_det(1:N_int,1:2,1:N_det) = psi_det_generators(1:N_int,1:2,1:N_det) - psi_coef(1:N_det,1:N_states) = psi_coef_sorted_tc_gen(1:N_det,1:N_states) - N_det = N_det_max - soft_touch N_det psi_det psi_coef - if (s2_eig) then - call make_s2_eigenfunction - endif - print_pt2 = .False. - call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) - endif allocate(ept2(1000),pt1(1000),extrap_energy(100)) @@ -71,18 +61,11 @@ subroutine pt2_tc_bi_ortho ! soft_touch thresh_it_dav print_pt2 = .True. - to_select = int(sqrt(dble(N_states))*dble(N_det)*selection_factor) - to_select = max(N_states_diag, to_select) - - E_denom = E_tc ! TC Energy of the current wave function call pt2_dealloc(pt2_data) call pt2_dealloc(pt2_data_err) call pt2_alloc(pt2_data, N_states) call pt2_alloc(pt2_data_err, N_states) - call ZMQ_pt2(E_denom, pt2_data, pt2_data_err, relative_error,to_select) ! Stochastic PT2 and selection - - N_iter += 1 - + call ZMQ_pt2(E_tc, pt2_data, pt2_data_err, relative_error,0) ! Stochastic PT2 and selection call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) end diff --git a/src/fci_tc_bi/pt2_tc.irp.f b/src/fci_tc_bi/pt2_tc.irp.f new file mode 100644 index 00000000..96a54825 --- /dev/null +++ b/src/fci_tc_bi/pt2_tc.irp.f @@ -0,0 +1,31 @@ +program tc_pt2_prog + implicit none + my_grid_becke = .True. + my_n_pt_r_grid = 30 + my_n_pt_a_grid = 50 + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + pruning = -1.d0 + touch pruning +! pt2_relative_error = 0.01d0 +! touch pt2_relative_error + call run_pt2_tc + +end + + +subroutine run_pt2_tc + + implicit none + + PROVIDE psi_det psi_coef mo_bi_ortho_tc_two_e mo_bi_ortho_tc_one_e + if(elec_alpha_num+elec_beta_num.ge.3)then + if(three_body_h_tc)then + call provide_all_three_ints_bi_ortho + endif + endif + ! --- + + call tc_pt2 + + +end diff --git a/src/kohn_sham/print_mos.irp.f b/src/kohn_sham/print_mos.irp.f index 5e728444..19bb98bc 100644 --- a/src/kohn_sham/print_mos.irp.f +++ b/src/kohn_sham/print_mos.irp.f @@ -3,7 +3,7 @@ program print_mos integer :: i,nx double precision :: r(3), xmax, dx, accu double precision, allocatable :: mos_array(:) - double precision:: alpha,envelop + double precision:: alpha,envelop,dm_a,dm_b allocate(mos_array(mo_num)) xmax = 5.d0 nx = 1000 @@ -11,20 +11,14 @@ program print_mos r = 0.d0 alpha = 0.5d0 do i = 1, nx + call dm_dft_alpha_beta_at_r(r,dm_a,dm_b) call give_all_mos_at_r(r,mos_array) accu = mos_array(3)**2+mos_array(4)**2+mos_array(5)**2 accu = dsqrt(accu) envelop = (1.d0 - dexp(-alpha * r(3)**2)) - write(33,'(100(F16.10,X))')r(3), mos_array(1), mos_array(2), accu, envelop + write(33,'(100(F16.10,X))')r(3), mos_array(1), mos_array(2), accu, dm_a+dm_b, envelop r(3) += dx enddo end -double precision function f_mu(x) - implicit none - double precision, intent(in) :: x - - - -end diff --git a/src/tc_bi_ortho/e_corr_bi_ortho.irp.f b/src/tc_bi_ortho/e_corr_bi_ortho.irp.f index ec66a8b5..3a715b44 100644 --- a/src/tc_bi_ortho/e_corr_bi_ortho.irp.f +++ b/src/tc_bi_ortho/e_corr_bi_ortho.irp.f @@ -45,6 +45,9 @@ &BEGIN_PROVIDER [ double precision, e_corr_bi_orth_proj ] &BEGIN_PROVIDER [ double precision, e_corr_single_bi_orth ] &BEGIN_PROVIDER [ double precision, e_corr_double_bi_orth ] +&BEGIN_PROVIDER [ double precision, e_corr_bi_orth_proj_abs ] +&BEGIN_PROVIDER [ double precision, e_corr_single_bi_orth_abs ] +&BEGIN_PROVIDER [ double precision, e_corr_double_bi_orth_abs ] implicit none integer :: i,degree double precision :: hmono,htwoe,hthree,htilde_ij @@ -57,13 +60,15 @@ call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) if(degree == 1)then e_corr_single_bi_orth += reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1) + e_corr_single_bi_orth_abs += dabs(reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1)) else if(degree == 2)then e_corr_double_bi_orth += reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1) -! print*,'coef_wf , e_cor',reigvec_tc_bi_orth(i,1)/reigvec_tc_bi_orth(1,1), reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1) + e_corr_double_bi_orth_abs += dabs(reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1)) endif enddo e_corr_bi_orth_proj = e_corr_single_bi_orth + e_corr_double_bi_orth e_corr_bi_orth = eigval_right_tc_bi_orth(1) - e_tilde_bi_orth_00 + e_corr_bi_orth_proj_abs = e_corr_single_bi_orth_abs + e_corr_double_bi_orth_abs END_PROVIDER BEGIN_PROVIDER [ double precision, e_tc_left_right ] diff --git a/src/tc_bi_ortho/pt2_tc_cisd.irp.f b/src/tc_bi_ortho/pt2_tc_cisd.irp.f new file mode 100644 index 00000000..ecf5bb42 --- /dev/null +++ b/src/tc_bi_ortho/pt2_tc_cisd.irp.f @@ -0,0 +1,43 @@ +program pt2_tc_cisd + + BEGIN_DOC + ! + ! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together + ! with the energy. Saves the left-right wave functions at the end. + ! + END_DOC + + my_grid_becke = .True. + my_n_pt_r_grid = 30 + my_n_pt_a_grid = 50 + read_wf = .True. + touch read_wf + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + print*, ' nb of states = ', N_states + print*, ' nb of det = ', N_det + + call routine +end + +subroutine routine + implicit none + integer :: i,h1,p1,h2,p2,s1,s2 + double precision :: h0i,hi0,e00,ei,delta_e + double precision :: norm,e_corr,coef + norm = 0.d0 + e_corr = 0.d0 + call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,1), psi_det(1,1,1), N_int, e00) + do i = 2, N_det + call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,1), N_int, hi0) + call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,1), psi_det(1,1,i), N_int, h0i) + call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, ei) + delta_e = e00 - ei + coef = hi0/delta_e + norm += coef*coef + e_corr += dabs(coef* h0i) + enddo + print*,'e_corr = ',e_corr + print*,'norm = ',norm + +end diff --git a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f index 00cebf3a..5a3f9935 100644 --- a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f @@ -156,7 +156,7 @@ subroutine ac_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb) htwoe = htwoe + mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb) enddo - if(three_body_h_tc)then + if(three_body_h_tc.and.elec_num.gt.2)then !!!!! 3-e part !! same-spin/same-spin do j = 1, na @@ -243,7 +243,7 @@ subroutine a_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb) htwoe= htwoe- mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb) enddo - if(three_body_h_tc)then + if(three_body_h_tc.and.elec_num.gt.2)then !!!!! 3-e part !! same-spin/same-spin do j = 1, na diff --git a/src/tc_bi_ortho/slater_tc_opt_double.irp.f b/src/tc_bi_ortho/slater_tc_opt_double.irp.f index baca498c..1b0e43bb 100644 --- a/src/tc_bi_ortho/slater_tc_opt_double.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_double.irp.f @@ -41,15 +41,15 @@ subroutine double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, if(s1.ne.s2)then ! opposite spin two-body htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) - if(three_body_h_tc)then + if(three_body_h_tc.and.elec_num.gt.2)then if(.not.double_normal_ord)then if(degree_i>degree_j)then call three_comp_two_e_elem(key_j,h1,h2,p1,p2,s1,s2,hthree) else call three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree) endif - elseif(double_normal_ord.and.elec_num+elec_num.gt.2)then - htwoe += normal_two_body_bi_orth(p2,h2,p1,h1)!!! WTF ??? + elseif(double_normal_ord.and.elec_num.gt.2)then + htwoe += normal_two_body_bi_orth(p2,h2,p1,h1) endif endif else @@ -58,16 +58,16 @@ subroutine double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) ! exchange terms htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) - if(three_body_h_tc)then + if(three_body_h_tc.and.elec_num.gt.2)then if(.not.double_normal_ord)then if(degree_i>degree_j)then call three_comp_two_e_elem(key_j,h1,h2,p1,p2,s1,s2,hthree) else call three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree) endif - elseif(double_normal_ord.and.elec_num+elec_num.gt.2)then - htwoe -= normal_two_body_bi_orth(h2,p1,h1,p2)!!! WTF ??? - htwoe += normal_two_body_bi_orth(h1,p1,h2,p2)!!! WTF ??? + elseif(double_normal_ord.and.elec_num.gt.2)then + htwoe -= normal_two_body_bi_orth(h2,p1,h1,p2) + htwoe += normal_two_body_bi_orth(h1,p1,h2,p2) endif endif endif diff --git a/src/tc_bi_ortho/slater_tc_opt_single.irp.f b/src/tc_bi_ortho/slater_tc_opt_single.irp.f index 7cff3c73..2f9d83bf 100644 --- a/src/tc_bi_ortho/slater_tc_opt_single.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_single.irp.f @@ -106,7 +106,7 @@ subroutine get_single_excitation_from_fock_tc(key_i,key_j,h,p,spin,phase,hmono,h htwoe -= buffer_x(i) enddo hthree = 0.d0 - if (three_body_h_tc)then + if (three_body_h_tc.and.elec_num.gt.2)then call three_comp_fock_elem(key_i,h,p,spin,hthree) endif diff --git a/src/tc_bi_ortho/tc_bi_ortho.irp.f b/src/tc_bi_ortho/tc_bi_ortho.irp.f index b69a2fe6..f69684c0 100644 --- a/src/tc_bi_ortho/tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/tc_bi_ortho.irp.f @@ -48,17 +48,20 @@ subroutine routine_diag() if(N_states .eq. 1) then - print*,'eigval_right_tc_bi_orth = ',eigval_right_tc_bi_orth(1) - print*,'e_tc_left_right = ',e_tc_left_right - print*,'e_tilde_bi_orth_00 = ',e_tilde_bi_orth_00 - print*,'e_pt2_tc_bi_orth = ',e_pt2_tc_bi_orth - print*,'e_pt2_tc_bi_orth_single = ',e_pt2_tc_bi_orth_single - print*,'e_pt2_tc_bi_orth_double = ',e_pt2_tc_bi_orth_double - print*,'***' - print*,'e_corr_bi_orth = ',e_corr_bi_orth - print*,'e_corr_bi_orth_proj = ',e_corr_bi_orth_proj - print*,'e_corr_single_bi_orth = ',e_corr_single_bi_orth - print*,'e_corr_double_bi_orth = ',e_corr_double_bi_orth + print*,'eigval_right_tc_bi_orth = ',eigval_right_tc_bi_orth(1) + print*,'e_tc_left_right = ',e_tc_left_right + print*,'e_tilde_bi_orth_00 = ',e_tilde_bi_orth_00 + print*,'e_pt2_tc_bi_orth = ',e_pt2_tc_bi_orth + print*,'e_pt2_tc_bi_orth_single = ',e_pt2_tc_bi_orth_single + print*,'e_pt2_tc_bi_orth_double = ',e_pt2_tc_bi_orth_double + print*,'***' + print*,'e_corr_bi_orth = ',e_corr_bi_orth + print*,'e_corr_bi_orth_proj = ',e_corr_bi_orth_proj + print*,'e_corr_bi_orth_proj_abs = ',e_corr_bi_orth_proj_abs + print*,'e_corr_single_bi_orth = ',e_corr_single_bi_orth + print*,'e_corr_double_bi_orth = ',e_corr_double_bi_orth + print*,'e_corr_single_bi_orth_abs = ',e_corr_single_bi_orth_abs + print*,'e_corr_double_bi_orth_abs = ',e_corr_double_bi_orth_abs print*,'Left/right eigenvectors' do i = 1,N_det write(*,'(I5,X,(100(F12.7,X)))')i,leigvec_tc_bi_orth(i,1),reigvec_tc_bi_orth(i,1),leigvec_tc_bi_orth(i,1)*reigvec_tc_bi_orth(i,1) From 989affabb877a4707e371c0454283e3758925f36 Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 2 May 2023 10:47:56 +0200 Subject: [PATCH 067/337] added some testing PT2 TC programs --- src/tc_bi_ortho/pt2_tc_cisd.irp.f | 94 +++++++++++++++++++++++++++++-- 1 file changed, 88 insertions(+), 6 deletions(-) diff --git a/src/tc_bi_ortho/pt2_tc_cisd.irp.f b/src/tc_bi_ortho/pt2_tc_cisd.irp.f index ecf5bb42..50d9dd45 100644 --- a/src/tc_bi_ortho/pt2_tc_cisd.irp.f +++ b/src/tc_bi_ortho/pt2_tc_cisd.irp.f @@ -16,28 +16,110 @@ program pt2_tc_cisd print*, ' nb of states = ', N_states print*, ' nb of det = ', N_det + call routine_diag() call routine end subroutine routine implicit none - integer :: i,h1,p1,h2,p2,s1,s2 + integer :: i,h1,p1,h2,p2,s1,s2,degree double precision :: h0i,hi0,e00,ei,delta_e - double precision :: norm,e_corr,coef + double precision :: norm,e_corr,coef,e_corr_pos,e_corr_neg,e_corr_abs + + integer :: exc(0:2,2,2) + double precision :: phase + double precision :: eh1,ep1,eh2,ep2 + norm = 0.d0 e_corr = 0.d0 + e_corr_abs = 0.d0 + e_corr_pos = 0.d0 + e_corr_neg = 0.d0 call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,1), psi_det(1,1,1), N_int, e00) do i = 2, N_det call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,1), N_int, hi0) call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,1), psi_det(1,1,i), N_int, h0i) call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, ei) - delta_e = e00 - ei + call get_excitation_degree(psi_det(1,1,1), psi_det(1,1,i),degree,N_int) + call get_excitation(psi_det(1,1,1), psi_det(1,1,i),exc,degree,phase,N_int) + call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) + eh1 = Fock_matrix_tc_diag_mo_tot(h1) + ep1 = Fock_matrix_tc_diag_mo_tot(p1) + delta_e = eh1 - ep1 + if (degree==2)then + eh2 = Fock_matrix_tc_diag_mo_tot(h2) + ep2 = Fock_matrix_tc_diag_mo_tot(p2) + delta_e += eh2 - ep2 + endif +! delta_e = e00 - ei coef = hi0/delta_e norm += coef*coef - e_corr += dabs(coef* h0i) + e_corr = coef* h0i + if(e_corr.lt.0.d0)then + e_corr_neg += e_corr + elseif(e_corr.gt.0.d0)then + e_corr_pos += e_corr + endif + e_corr_abs += dabs(e_corr) enddo - print*,'e_corr = ',e_corr - print*,'norm = ',norm + print*,'e_corr_abs = ',e_corr_abs + print*,'e_corr_pos = ',e_corr_pos + print*,'e_corr_neg = ',e_corr_neg + print*,'norm = ',dsqrt(norm) end + +subroutine routine_diag() + + implicit none + integer :: i, j, k + double precision :: dE + + ! provide eigval_right_tc_bi_orth + ! provide overlap_bi_ortho + ! provide htilde_matrix_elmt_bi_ortho + + if(N_states .eq. 1) then + + print*,'eigval_right_tc_bi_orth = ',eigval_right_tc_bi_orth(1) + print*,'e_tc_left_right = ',e_tc_left_right + print*,'e_tilde_bi_orth_00 = ',e_tilde_bi_orth_00 + print*,'e_pt2_tc_bi_orth = ',e_pt2_tc_bi_orth + print*,'e_pt2_tc_bi_orth_single = ',e_pt2_tc_bi_orth_single + print*,'e_pt2_tc_bi_orth_double = ',e_pt2_tc_bi_orth_double + print*,'***' + print*,'e_corr_bi_orth = ',e_corr_bi_orth + print*,'e_corr_bi_orth_proj = ',e_corr_bi_orth_proj + print*,'e_corr_bi_orth_proj_abs = ',e_corr_bi_orth_proj_abs + print*,'e_corr_single_bi_orth = ',e_corr_single_bi_orth + print*,'e_corr_double_bi_orth = ',e_corr_double_bi_orth + print*,'e_corr_single_bi_orth_abs = ',e_corr_single_bi_orth_abs + print*,'e_corr_double_bi_orth_abs = ',e_corr_double_bi_orth_abs + print*,'Left/right eigenvectors' + do i = 1,N_det + write(*,'(I5,X,(100(F12.7,X)))')i,leigvec_tc_bi_orth(i,1),reigvec_tc_bi_orth(i,1),leigvec_tc_bi_orth(i,1)*reigvec_tc_bi_orth(i,1) + enddo + + else + + print*,'eigval_right_tc_bi_orth : ' + do i = 1, N_states + print*, i, eigval_right_tc_bi_orth(i) + enddo + + print*,'' + print*,'******************************************************' + print*,'TC Excitation energies (au) (eV)' + do i = 2, N_states + dE = eigval_right_tc_bi_orth(i) - eigval_right_tc_bi_orth(1) + print*, i, dE, dE/0.0367502d0 + enddo + print*,'' + + endif + +end + + + From 4b1d384fb92a14437d0eebc42bd2bde651024111 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Tue, 2 May 2023 19:01:25 +0200 Subject: [PATCH 068/337] added Slater-type envelope --- src/non_h_ints_mu/jast_deriv.irp.f | 41 +++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/src/non_h_ints_mu/jast_deriv.irp.f b/src/non_h_ints_mu/jast_deriv.irp.f index 31856a3d..3a06196c 100644 --- a/src/non_h_ints_mu/jast_deriv.irp.f +++ b/src/non_h_ints_mu/jast_deriv.irp.f @@ -157,7 +157,18 @@ double precision function j1b_nucl(r) integer :: i double precision :: a, d, e, x, y, z - if(j1b_type .eq. 103) then + if(j1b_type .eq. 102) then + + j1b_nucl = 1.d0 + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + j1b_nucl = j1b_nucl - dexp(-a*dsqrt(d)) + enddo + + elseif(j1b_type .eq. 103) then j1b_nucl = 1.d0 do i = 1, nucl_num @@ -215,7 +226,29 @@ subroutine grad1_j1b_nucl(r, grad) double precision :: fact_x, fact_y, fact_z double precision :: ax_der, ay_der, az_der, a_expo - if(j1b_type .eq. 103) then + if(j1b_type .eq. 102) then + + fact_x = 0.d0 + fact_y = 0.d0 + fact_z = 0.d0 + do i = 1, nucl_num + a = j1b_pen(i) + x = r(1) - nucl_coord(i,1) + y = r(2) - nucl_coord(i,2) + z = r(3) - nucl_coord(i,3) + d = dsqrt(x*x + y*y + z*z) + e = a * dexp(-a*d) / d + + fact_x += e * x + fact_y += e * y + fact_z += e * z + enddo + + grad(1) = fact_x + grad(2) = fact_y + grad(3) = fact_z + + elseif(j1b_type .eq. 103) then x = r(1) y = r(2) @@ -254,7 +287,7 @@ subroutine grad1_j1b_nucl(r, grad) grad(2) = fact_y grad(3) = fact_z - else if(j1b_type .eq. 104) then + elseif(j1b_type .eq. 104) then fact_x = 0.d0 fact_y = 0.d0 @@ -276,7 +309,7 @@ subroutine grad1_j1b_nucl(r, grad) grad(2) = 2.d0 * fact_y grad(3) = 2.d0 * fact_z - else if(j1b_type .eq. 105) then + elseif(j1b_type .eq. 105) then fact_x = 0.d0 fact_y = 0.d0 From f4aec2957d06f2d155adcd979f77b9566866d0a8 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Tue, 2 May 2023 21:47:29 +0200 Subject: [PATCH 069/337] norm modif in print_tc_wf --- src/tc_bi_ortho/print_tc_wf.irp.f | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/tc_bi_ortho/print_tc_wf.irp.f b/src/tc_bi_ortho/print_tc_wf.irp.f index 58a733a7..0cf3ca87 100644 --- a/src/tc_bi_ortho/print_tc_wf.irp.f +++ b/src/tc_bi_ortho/print_tc_wf.irp.f @@ -26,7 +26,8 @@ subroutine write_l_r_wf integer :: i print*,'Writing the left-right wf' do i = 1, N_det - write(i_unit_output,*)i,psi_l_coef_sorted_bi_ortho_left(i),psi_r_coef_sorted_bi_ortho_right(i) + write(i_unit_output,*)i, psi_l_coef_sorted_bi_ortho_left(i)/psi_l_coef_sorted_bi_ortho_left(1) & + , psi_r_coef_sorted_bi_ortho_right(i)/psi_r_coef_sorted_bi_ortho_right(1) enddo From 5eb3b69873f0b7bdacfcea72cbe8364a51c46bfb Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Thu, 4 May 2023 01:42:06 +0200 Subject: [PATCH 070/337] mu(r) added --- src/non_h_ints_mu/jast_deriv.irp.f | 103 +++++++++++++++++++++++++++-- src/non_h_ints_mu/tc_integ.irp.f | 62 ++++++++--------- src/tc_keywords/EZFIO.cfg | 6 ++ 3 files changed, 134 insertions(+), 37 deletions(-) diff --git a/src/non_h_ints_mu/jast_deriv.irp.f b/src/non_h_ints_mu/jast_deriv.irp.f index 3a06196c..0adc49fb 100644 --- a/src/non_h_ints_mu/jast_deriv.irp.f +++ b/src/non_h_ints_mu/jast_deriv.irp.f @@ -74,6 +74,42 @@ !$OMP END DO !$OMP END PARALLEL + elseif((j1b_type .ge. 200) .and. (j1b_type .lt. 300)) then + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, jpoint, r1, r2, grad1_u2b, dx, dy, dz) & + !$OMP SHARED (n_points_final_grid, n_points_extra_final_grid, final_grid_points, & + !$OMP final_grid_points_extra, grad1_u12_num, grad1_u12_squared_num) + !$OMP DO SCHEDULE (static) + do ipoint = 1, n_points_final_grid ! r1 + + r1(1) = final_grid_points(1,ipoint) + r1(2) = final_grid_points(2,ipoint) + r1(3) = final_grid_points(3,ipoint) + + do jpoint = 1, n_points_extra_final_grid ! r2 + + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + + call grad1_j12_mu(r1, r2, grad1_u2b) + + dx = grad1_u2b(1) + dy = grad1_u2b(2) + dz = grad1_u2b(3) + + grad1_u12_num(jpoint,ipoint,1) = dx + grad1_u12_num(jpoint,ipoint,2) = dy + grad1_u12_num(jpoint,ipoint,3) = dz + + grad1_u12_squared_num(jpoint,ipoint) = dx*dx + dy*dy + dz*dz + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + else print *, ' j1b_type = ', j1b_type, 'not implemented yet' @@ -91,16 +127,16 @@ double precision function j12_mu(r1, r2) implicit none double precision, intent(in) :: r1(3), r2(3) - double precision :: mu_r12, r12 + double precision :: mu_tmp, r12 if((j1b_type .ge. 100) .and. (j1b_type .lt. 200)) then r12 = dsqrt( (r1(1) - r2(1)) * (r1(1) - r2(1)) & + (r1(2) - r2(2)) * (r1(2) - r2(2)) & + (r1(3) - r2(3)) * (r1(3) - r2(3)) ) - mu_r12 = mu_erf * r12 + mu_tmp = mu_erf * r12 - j12_mu = 0.5d0 * r12 * (1.d0 - derf(mu_r12)) - inv_sq_pi_2 * dexp(-mu_r12*mu_r12) / mu_erf + j12_mu = 0.5d0 * r12 * (1.d0 - derf(mu_tmp)) - inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / mu_erf else @@ -116,6 +152,8 @@ end function j12_mu subroutine grad1_j12_mu(r1, r2, grad) + include 'constants.include.F' + implicit none double precision, intent(in) :: r1(3), r2(3) double precision, intent(out) :: grad(3) @@ -129,7 +167,7 @@ subroutine grad1_j12_mu(r1, r2, grad) dy = r1(2) - r2(2) dz = r1(3) - r2(3) - r12 = dsqrt( dx * dx + dy * dy + dz * dz ) + r12 = dsqrt(dx * dx + dy * dy + dz * dz) if(r12 .lt. 1d-10) return tmp = 0.5d0 * (1.d0 - derf(mu_erf * r12)) / r12 @@ -138,6 +176,28 @@ subroutine grad1_j12_mu(r1, r2, grad) grad(2) = tmp * dy grad(3) = tmp * dz + elseif((j1b_type .ge. 200) .and. (j1b_type .lt. 300)) then + + double precision :: mu_val, mu_tmp, mu_der(3) + + dx = r1(1) - r2(1) + dy = r1(2) - r2(2) + dz = r1(3) - r2(3) + r12 = dsqrt(dx * dx + dy * dy + dz * dz) + + call mu_r_val_and_grad(r1, r2, mu_val, mu_der) + mu_tmp = mu_val * r12 + tmp = inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / (mu_val * mu_val) + grad(1) = tmp * mu_der(1) + grad(2) = tmp * mu_der(2) + grad(3) = tmp * mu_der(3) + + if(r12 .lt. 1d-10) return + tmp = 0.5d0 * (1.d0 - derf(mu_tmp)) / r12 + grad(1) = grad(1) + tmp * dx + grad(2) = grad(2) + tmp * dy + grad(3) = grad(3) + tmp * dz + else print *, ' j1b_type = ', j1b_type, 'not implemented yet' @@ -343,3 +403,38 @@ end subroutine grad1_j1b_nucl ! --- +subroutine mu_r_val_and_grad(r1, r2, mu_val, mu_der) + + implicit none + double precision, intent(in) :: r1(3), r2(3) + double precision, intent(out) :: mu_val, mu_der(3) + + if(j1b_type .eq. 200) then + + double precision :: r(3), dm_a(1), dm_b(1), grad_dm_a(3,1), grad_dm_b(3,1) + double precision :: dm_tot, tmp + + PROVIDE mu_r_ct + r(1) = 0.5d0 * (r1(1) + r2(1)) + r(2) = 0.5d0 * (r1(2) + r2(2)) + r(3) = 0.5d0 * (r1(3) + r2(3)) + + call density_and_grad_alpha_beta(r, dm_a, dm_b, grad_dm_a, grad_dm_b) + dm_tot = dm_a(1) + dm_b(1) + mu_val = mu_r_ct * dsqrt(dm_tot) + tmp = 0.25d0 * mu_r_ct / dm_tot + mu_der(1) = tmp * (grad_dm_a(1,1) + grad_dm_b(1,1)) + mu_der(2) = tmp * (grad_dm_a(2,1) + grad_dm_b(2,1)) + mu_der(3) = tmp * (grad_dm_a(3,1) + grad_dm_b(3,1)) + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + + return +end subroutine mu_r_val_and_grad + +! --- diff --git a/src/non_h_ints_mu/tc_integ.irp.f b/src/non_h_ints_mu/tc_integ.irp.f index f725d134..784947b4 100644 --- a/src/non_h_ints_mu/tc_integ.irp.f +++ b/src/non_h_ints_mu/tc_integ.irp.f @@ -35,13 +35,13 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f PROVIDE j1b_type if(read_tc_integ) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="read") read(11) int2_grad1_u12_ao - endif - if(j1b_type .eq. 3) then + else - if(.not.read_tc_integ) then + if(j1b_type .eq. 3) then PROVIDE v_1b_grad v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b x_v_ij_erf_rk_cst_mu_j1b @@ -73,32 +73,29 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f !$OMP END DO !$OMP END PARALLEL - endif + elseif(j1b_type .ge. 100) then - elseif(j1b_type .ge. 100) then + PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra + PROVIDE grad1_u12_num - PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra - PROVIDE grad1_u12_num - - double precision, allocatable :: tmp(:,:,:) - allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) - tmp = 0.d0 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (j, i, jpoint) & - !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) - !$OMP DO SCHEDULE (static) - do j = 1, ao_num - do i = 1, ao_num - do jpoint = 1, n_points_extra_final_grid - tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) + double precision, allocatable :: tmp(:,:,:) + allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) + tmp = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, i, jpoint) & + !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) + !$OMP DO SCHEDULE (static) + do j = 1, ao_num + do i = 1, ao_num + do jpoint = 1, n_points_extra_final_grid + tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) + enddo enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL - if(.not.read_tc_integ) then int2_grad1_u12_ao = 0.d0 do m = 1, 3 !call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, +1.d0 & @@ -108,7 +105,7 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f , 0.d0, int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num) enddo - !! these dgemm are equivalen to + !! these dgemm are equivalent to !!$OMP PARALLEL & !!$OMP DEFAULT (NONE) & !!$OMP PRIVATE (j, i, ipoint, jpoint, w) & @@ -132,15 +129,14 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f !enddo !!$OMP END DO !!$OMP END PARALLEL + + deallocate(tmp) + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + endif - - deallocate(tmp) - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented yet' - stop - endif if(write_tc_integ.and.mpi_master) then diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index 85c8dac3..2e14488e 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -124,6 +124,12 @@ doc: type of 1-body Jastrow interface: ezfio, provider, ocaml default: 0 +[mu_r_ct] +type: double precision +doc: a parameter used to define mu(r) +interface: ezfio, provider, ocaml +default: 6.203504908994001e-1 + [thr_degen_tc] type: Threshold doc: Threshold to determine if two orbitals are degenerate in TCSCF in order to avoid random quasi orthogonality between the right- and left-eigenvector for the same eigenvalue From bfdfb546bd29d6d16d3cb2024f707f758c89a68f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 4 May 2023 11:41:17 +0200 Subject: [PATCH 071/337] Fix pt2_max extra iterations --- src/cipsi/cipsi.irp.f | 8 +------- src/cipsi/stochastic_cipsi.irp.f | 8 +------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/src/cipsi/cipsi.irp.f b/src/cipsi/cipsi.irp.f index 88aaeae0..1279b5cd 100644 --- a/src/cipsi/cipsi.irp.f +++ b/src/cipsi/cipsi.irp.f @@ -128,13 +128,7 @@ subroutine run_cipsi if (qp_stop()) exit enddo - if (.not.qp_stop()) then - if (N_det < N_det_max) then - call diagonalize_CI - call save_wavefunction - call save_energy(psi_energy_with_nucl_rep, zeros) - endif - + if ((.not.qp_stop()).and.(N_det > N_det_max)) then if (do_pt2) then call pt2_dealloc(pt2_data) call pt2_dealloc(pt2_data_err) diff --git a/src/cipsi/stochastic_cipsi.irp.f b/src/cipsi/stochastic_cipsi.irp.f index b83e658a..deffaeb7 100644 --- a/src/cipsi/stochastic_cipsi.irp.f +++ b/src/cipsi/stochastic_cipsi.irp.f @@ -118,13 +118,7 @@ subroutine run_stochastic_cipsi if (qp_stop()) exit enddo - if (.not.qp_stop()) then - if (N_det < N_det_max) then - call diagonalize_CI - call save_wavefunction - call save_energy(psi_energy_with_nucl_rep, zeros) - endif - + if ((.not.qp_stop()).and.(N_det > N_det_max)) then call pt2_dealloc(pt2_data) call pt2_dealloc(pt2_data_err) call pt2_alloc(pt2_data, N_states) From 0330ac6ccc4eb24deaef70632aa9dcd1e8fabfaa Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 4 May 2023 15:50:40 +0200 Subject: [PATCH 072/337] 4idx transformation with cholesky --- config/ifort_2021_xHost.cfg | 2 +- src/ao_two_e_ints/EZFIO.cfg | 5 ++ src/hartree_fock/fock_matrix_hf.irp.f | 2 +- src/mo_two_e_ints/cholesky.irp.f | 16 ++++++ src/mo_two_e_ints/mo_bi_integrals.irp.f | 74 +++++++++++++++++++++++-- 5 files changed, 91 insertions(+), 8 deletions(-) create mode 100644 src/mo_two_e_ints/cholesky.irp.f diff --git a/config/ifort_2021_xHost.cfg b/config/ifort_2021_xHost.cfg index 1161833b..9170b059 100644 --- a/config/ifort_2021_xHost.cfg +++ b/config/ifort_2021_xHost.cfg @@ -6,7 +6,7 @@ # --align=32 : Align all provided arrays on a 32-byte boundary # [COMMON] -FC : ifort -fpic +FC : ifort -fpic -diag-disable 5462 LAPACK_LIB : -mkl=parallel IRPF90 : irpf90 IRPF90_FLAGS : --ninja --align=64 -DINTEL diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg index b18c65d1..caed4698 100644 --- a/src/ao_two_e_ints/EZFIO.cfg +++ b/src/ao_two_e_ints/EZFIO.cfg @@ -18,3 +18,8 @@ interface: ezfio,provider,ocaml default: False ezfio_name: direct +[do_ao_cholesky] +type: logical +doc: Perform Cholesky decomposition of AO integrals +interface: ezfio,provider,ocaml +default: True diff --git a/src/hartree_fock/fock_matrix_hf.irp.f b/src/hartree_fock/fock_matrix_hf.irp.f index 12641516..8c6658c5 100644 --- a/src/hartree_fock/fock_matrix_hf.irp.f +++ b/src/hartree_fock/fock_matrix_hf.irp.f @@ -15,7 +15,7 @@ double precision, allocatable :: ao_two_e_integral_alpha_tmp(:,:) double precision, allocatable :: ao_two_e_integral_beta_tmp(:,:) - if (.True.) then ! Use Cholesky-decomposed integrals + if (do_ao_cholesky) then ! Use Cholesky-decomposed integrals ao_two_e_integral_alpha(:,:) = ao_two_e_integral_alpha_chol(:,:) ao_two_e_integral_beta (:,:) = ao_two_e_integral_beta_chol (:,:) diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f new file mode 100644 index 00000000..14d3c696 --- /dev/null +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -0,0 +1,16 @@ +BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_ao_num) ] + implicit none + BEGIN_DOC + ! Cholesky vectors in MO basis + END_DOC + + integer :: k + + !$OMP PARALLEL DO PRIVATE(k) + do k=1,cholesky_ao_num + call ao_to_mo(cholesky_ao(1,1,k),ao_num,cholesky_mo(1,1,k),mo_num) + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index ae299e9f..b7ef901d 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -50,13 +50,16 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ] call cpu_time(cpu_1) if(no_vvvv_integrals)then -! call four_idx_novvvv call four_idx_novvvv_old else - if (dble(ao_num)**4 * 32.d-9 < dble(qp_max_mem)) then - call four_idx_dgemm + if (do_ao_cholesky) then + call add_integrals_to_map_cholesky else - call add_integrals_to_map(full_ijkl_bitmask_4) + if (dble(ao_num)**4 * 32.d-9 < dble(qp_max_mem)) then + call four_idx_dgemm + else + call add_integrals_to_map(full_ijkl_bitmask_4) + endif endif endif @@ -175,7 +178,7 @@ subroutine add_integrals_to_map(mask_ijkl) implicit none BEGIN_DOC - ! Adds integrals to tha MO map according to some bitmask + ! Adds integrals to the MO map according to some bitmask END_DOC integer(bit_kind), intent(in) :: mask_ijkl(N_int,4) @@ -450,13 +453,72 @@ subroutine add_integrals_to_map(mask_ijkl) end +subroutine add_integrals_to_map_cholesky + use bitmasks + implicit none + + BEGIN_DOC + ! Adds integrals to the MO map using Cholesky vectors + END_DOC + + integer :: i,j,k,l,m + integer :: size_buffer, n_integrals + size_buffer = min(mo_num*mo_num*mo_num,16000000) + + double precision, allocatable :: Vtmp(:,:,:,:) + integer(key_kind) , allocatable :: buffer_i(:) + real(integral_kind), allocatable :: buffer_value(:) + + if (.True.) then + ! In-memory transformation + + allocate (Vtmp(mo_num,mo_num,mo_num,mo_num)) + + call dgemm('N','T',mo_num*mo_num,mo_num*mo_num,cholesky_ao_num,1.d0, & + cholesky_mo, mo_num*mo_num, & + cholesky_mo, mo_num*mo_num, 0.d0, & + Vtmp, mo_num*mo_num) + + !$OMP PARALLEL PRIVATE(i,j,k,l,n_integrals,buffer_value, buffer_i) + allocate (buffer_i(size_buffer), buffer_value(size_buffer)) + n_integrals = 0 + !$OMP DO + do l=1,mo_num + do k=1,l + do j=1,mo_num + do i=1,j + if (abs(Vtmp(i,j,k,l)) > mo_integrals_threshold) then + n_integrals += 1 + buffer_value(n_integrals) = Vtmp(i,j,k,l) + !DIR$ FORCEINLINE + call mo_two_e_integrals_index(i,k,j,l,buffer_i(n_integrals)) + if (n_integrals == size_buffer) then + call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals) + n_integrals = 0 + endif + endif + enddo + enddo + enddo + enddo + !$OMP END DO + call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals) + deallocate(buffer_i, buffer_value) + !$OMP END PARALLEL + + deallocate(Vtmp) + call map_unique(mo_integrals_map) + + endif + +end subroutine add_integrals_to_map_three_indices(mask_ijk) use bitmasks implicit none BEGIN_DOC - ! Adds integrals to tha MO map according to some bitmask + ! Adds integrals to the MO map according to some bitmask END_DOC integer(bit_kind), intent(in) :: mask_ijk(N_int,3) From 868d5a162b6101bd897b5f659a06009d3115b17f Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Sat, 6 May 2023 18:25:06 +0200 Subject: [PATCH 073/337] jast 201 added --- src/mo_basis/EZFIO.cfg | 6 +++ src/mo_basis/mos_aux.irp.f | 53 ++++++++++++++++++++++ src/non_h_ints_mu/jast_deriv.irp.f | 65 +++++++++++++++++++++++---- src/tc_bi_ortho/print_tc_energy.irp.f | 4 ++ src/tc_keywords/EZFIO.cfg | 2 +- 5 files changed, 120 insertions(+), 10 deletions(-) create mode 100644 src/mo_basis/mos_aux.irp.f diff --git a/src/mo_basis/EZFIO.cfg b/src/mo_basis/EZFIO.cfg index 81ffba5c..4c4f1eca 100644 --- a/src/mo_basis/EZFIO.cfg +++ b/src/mo_basis/EZFIO.cfg @@ -9,6 +9,12 @@ doc: Coefficient of the i-th |AO| on the j-th |MO| interface: ezfio size: (ao_basis.ao_num,mo_basis.mo_num) +[mo_coef_aux] +type: double precision +doc: AUX Coefficient of the i-th |AO| on the j-th |MO| +interface: ezfio +size: (ao_basis.ao_num,mo_basis.mo_num) + [mo_coef_imag] type: double precision doc: Imaginary part of the MO coefficient of the i-th |AO| on the j-th |MO| diff --git a/src/mo_basis/mos_aux.irp.f b/src/mo_basis/mos_aux.irp.f new file mode 100644 index 00000000..27a874b1 --- /dev/null +++ b/src/mo_basis/mos_aux.irp.f @@ -0,0 +1,53 @@ + +! --- + +BEGIN_PROVIDER [double precision, mo_coef_aux, (ao_num,mo_num)] + + implicit none + integer :: i, j + logical :: exists + double precision, allocatable :: buffer(:,:) + + PROVIDE ezfio_filename + + if (mpi_master) then + ! Coefs + call ezfio_has_mo_basis_mo_coef_aux(exists) + endif + + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF + + IRP_IF MPI + include 'mpif.h' + integer :: ierr + call MPI_BCAST(exists, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + stop 'Unable to read mo_coef_aux with MPI' + endif + IRP_ENDIF + + if (exists) then + if (mpi_master) then + call ezfio_get_mo_basis_mo_coef_aux(mo_coef_aux) + write(*,*) 'Read mo_coef_aux' + endif + IRP_IF MPI + call MPI_BCAST(mo_coef_aux, mo_num*ao_num, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + stop 'Unable to read mo_coef_aux with MPI' + endif + IRP_ENDIF + else + ! Orthonormalized AO basis + do i = 1, mo_num + do j = 1, ao_num + mo_coef_aux(j,i) = ao_ortho_canonical_coef(j,i) + enddo + enddo + endif + +END_PROVIDER + diff --git a/src/non_h_ints_mu/jast_deriv.irp.f b/src/non_h_ints_mu/jast_deriv.irp.f index 0adc49fb..d1a044cb 100644 --- a/src/non_h_ints_mu/jast_deriv.irp.f +++ b/src/non_h_ints_mu/jast_deriv.irp.f @@ -408,24 +408,71 @@ subroutine mu_r_val_and_grad(r1, r2, mu_val, mu_der) implicit none double precision, intent(in) :: r1(3), r2(3) double precision, intent(out) :: mu_val, mu_der(3) + double precision :: r(3) + double precision :: dm_a(1), dm_b(1), grad_dm_a(3,1), grad_dm_b(3,1) + double precision :: dm_tot, tmp1, tmp2, tmp3 if(j1b_type .eq. 200) then - double precision :: r(3), dm_a(1), dm_b(1), grad_dm_a(3,1), grad_dm_b(3,1) - double precision :: dm_tot, tmp + ! + ! r = 0.5 (r1 + r2) + ! + ! mu[rho(r)] = alpha sqrt(rho) + mu0 exp(-rho) + ! + ! d mu[rho(r)] / dx1 = 0.5 d mu[rho(r)] / dx + ! d mu[rho(r)] / dx = [0.5 alpha / sqrt(rho) - mu0 exp(-rho)] (d rho(r) / dx) + ! + + PROVIDE mu_r_ct mu_erf - PROVIDE mu_r_ct r(1) = 0.5d0 * (r1(1) + r2(1)) r(2) = 0.5d0 * (r1(2) + r2(2)) r(3) = 0.5d0 * (r1(3) + r2(3)) call density_and_grad_alpha_beta(r, dm_a, dm_b, grad_dm_a, grad_dm_b) - dm_tot = dm_a(1) + dm_b(1) - mu_val = mu_r_ct * dsqrt(dm_tot) - tmp = 0.25d0 * mu_r_ct / dm_tot - mu_der(1) = tmp * (grad_dm_a(1,1) + grad_dm_b(1,1)) - mu_der(2) = tmp * (grad_dm_a(2,1) + grad_dm_b(2,1)) - mu_der(3) = tmp * (grad_dm_a(3,1) + grad_dm_b(3,1)) + + dm_tot = dm_a(1) + dm_b(1) + tmp1 = dsqrt(dm_tot) + tmp2 = mu_erf * dexp(-dm_tot) + + mu_val = mu_r_ct * tmp1 + tmp2 + + mu_der = 0.d0 + if(dm_tot .lt. 1d-7) return + + tmp3 = 0.25d0 * mu_r_ct / tmp1 - 0.5d0 * tmp2 + mu_der(1) = tmp3 * (grad_dm_a(1,1) + grad_dm_b(1,1)) + mu_der(2) = tmp3 * (grad_dm_a(2,1) + grad_dm_b(2,1)) + mu_der(3) = tmp3 * (grad_dm_a(3,1) + grad_dm_b(3,1)) + + elseif(j1b_type .eq. 201) then + + ! + ! r = 0.5 (r1 + r2) + ! + ! mu[rho(r)] = alpha rho + mu0 exp(-rho) + ! + ! d mu[rho(r)] / dx1 = 0.5 d mu[rho(r)] / dx + ! d mu[rho(r)] / dx = [0.5 alpha / sqrt(rho) - mu0 exp(-rho)] (d rho(r) / dx) + ! + + PROVIDE mu_r_ct mu_erf + + r(1) = 0.5d0 * (r1(1) + r2(1)) + r(2) = 0.5d0 * (r1(2) + r2(2)) + r(3) = 0.5d0 * (r1(3) + r2(3)) + + call density_and_grad_alpha_beta(r, dm_a, dm_b, grad_dm_a, grad_dm_b) + + dm_tot = dm_a(1) + dm_b(1) + tmp2 = mu_erf * dexp(-dm_tot) + + mu_val = mu_r_ct * dm_tot + tmp2 + + tmp3 = 0.5d0 * (mu_r_ct - tmp2) + mu_der(1) = tmp3 * (grad_dm_a(1,1) + grad_dm_b(1,1)) + mu_der(2) = tmp3 * (grad_dm_a(2,1) + grad_dm_b(2,1)) + mu_der(3) = tmp3 * (grad_dm_a(3,1) + grad_dm_b(3,1)) else diff --git a/src/tc_bi_ortho/print_tc_energy.irp.f b/src/tc_bi_ortho/print_tc_energy.irp.f index e5f123a7..980d12de 100644 --- a/src/tc_bi_ortho/print_tc_energy.irp.f +++ b/src/tc_bi_ortho/print_tc_energy.irp.f @@ -9,6 +9,10 @@ program print_tc_energy my_n_pt_a_grid = 50 read_wf = .True. touch read_wf + + PROVIDE j1b_type + print*, 'j1b_type = ', j1b_type + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid call write_tc_energy end diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index 2e14488e..fad6f1c2 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -176,7 +176,7 @@ default: 1.e-7 type: logical doc: If |true|, the integrals of the three-body jastrow are computed with cycles interface: ezfio,provider,ocaml -default: True +default: Flase [thresh_biorthog_diag] type: Threshold From 402a6e89885d4501d3101a65d0afc99590e7ce47 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sun, 7 May 2023 12:44:59 +0200 Subject: [PATCH 074/337] added jast_type 0 --- src/ao_many_one_e_ints/grad2_jmu_modif.irp.f | 68 ++++++++++++++++++++ src/ao_tc_eff_map/compute_ints_eff_pot.irp.f | 14 ++-- src/bi_ort_ints/one_e_bi_ort.irp.f | 26 ++++---- src/non_h_ints_mu/jast_deriv.irp.f | 49 ++++++++++++-- src/non_h_ints_mu/tc_integ.irp.f | 51 ++++++++++++++- src/non_h_ints_mu/total_tc_int.irp.f | 16 +++++ src/tc_keywords/EZFIO.cfg | 2 +- 7 files changed, 196 insertions(+), 30 deletions(-) diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f index 8196614f..722ff2ff 100644 --- a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f +++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f @@ -1,4 +1,72 @@ + +! --- + +BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2, (ao_num, ao_num, n_points_final_grid)] + + BEGIN_DOC + ! + ! -\frac{1}{4} x int dr2 phi_i(r2) phi_j(r2) [1 - erf(mu r12)]^2 + ! + END_DOC + + implicit none + integer :: i, j, ipoint, i_fit + double precision :: r(3), expo_fit, coef_fit + double precision :: tmp + double precision :: wall0, wall1 + + double precision, external :: overlap_gauss_r12_ao + + print*, ' providing int2_grad1u2_grad2u2 ...' + call wall_time(wall0) + + provide mu_erf final_grid_points j1b_pen + + int2_grad1u2_grad2u2 = 0.d0 + + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, i_fit, r, coef_fit, expo_fit, tmp) & + !$OMP SHARED (n_points_final_grid, ao_num, final_grid_points, ng_fit_jast, & + !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,int2_grad1u2_grad2u2) + !$OMP DO + do ipoint = 1, n_points_final_grid + r(1) = final_grid_points(1,ipoint) + r(2) = final_grid_points(2,ipoint) + r(3) = final_grid_points(3,ipoint) + + do i = 1, ao_num + do j = i, ao_num + + tmp = 0.d0 + do i_fit = 1, ng_fit_jast + + expo_fit = expo_gauss_1_erf_x_2(i_fit) + coef_fit = coef_gauss_1_erf_x_2(i_fit) + + tmp += -0.25d0 * coef_fit * overlap_gauss_r12_ao(r, expo_fit, i, j) + enddo + + int2_grad1u2_grad2u2(j,i,ipoint) = tmp + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + do ipoint = 1, n_points_final_grid + do i = 2, ao_num + do j = 1, i-1 + int2_grad1u2_grad2u2(j,i,ipoint) = int2_grad1u2_grad2u2(i,j,ipoint) + enddo + enddo + enddo + + call wall_time(wall1) + print*, ' wall time for int2_grad1u2_grad2u2 =', wall1 - wall0 + +END_PROVIDER + ! --- BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n_points_final_grid)] diff --git a/src/ao_tc_eff_map/compute_ints_eff_pot.irp.f b/src/ao_tc_eff_map/compute_ints_eff_pot.irp.f index 7a567979..963a49a6 100644 --- a/src/ao_tc_eff_map/compute_ints_eff_pot.irp.f +++ b/src/ao_tc_eff_map/compute_ints_eff_pot.irp.f @@ -53,13 +53,13 @@ subroutine compute_ao_tc_sym_two_e_pot_jl(j, l, n_integrals, buffer_i, buffer_va integral_erf = ao_two_e_integral_erf(i, k, j, l) integral = integral_erf + integral_pot - if( j1b_type .eq. 1 ) then - !print *, ' j1b type 1 is added' - integral = integral + j1b_gauss_2e_j1(i, k, j, l) - elseif( j1b_type .eq. 2 ) then - !print *, ' j1b type 2 is added' - integral = integral + j1b_gauss_2e_j2(i, k, j, l) - endif + !if( j1b_type .eq. 1 ) then + ! !print *, ' j1b type 1 is added' + ! integral = integral + j1b_gauss_2e_j1(i, k, j, l) + !elseif( j1b_type .eq. 2 ) then + ! !print *, ' j1b type 2 is added' + ! integral = integral + j1b_gauss_2e_j2(i, k, j, l) + !endif if(abs(integral) < thr) then cycle diff --git a/src/bi_ort_ints/one_e_bi_ort.irp.f b/src/bi_ort_ints/one_e_bi_ort.irp.f index b0455570..5f2795f1 100644 --- a/src/bi_ort_ints/one_e_bi_ort.irp.f +++ b/src/bi_ort_ints/one_e_bi_ort.irp.f @@ -8,22 +8,22 @@ BEGIN_PROVIDER [double precision, ao_one_e_integrals_tc_tot, (ao_num,ao_num)] ao_one_e_integrals_tc_tot = ao_one_e_integrals - provide j1b_type + !provide j1b_type - if( (j1b_type .eq. 1) .or. (j1b_type .eq. 2) ) then - - print *, ' do things properly !' - stop + !if( (j1b_type .eq. 1) .or. (j1b_type .eq. 2) ) then + ! + ! print *, ' do things properly !' + ! stop - !do i = 1, ao_num - ! do j = 1, ao_num - ! ao_one_e_integrals_tc_tot(j,i) += ( j1b_gauss_hermI (j,i) & - ! + j1b_gauss_hermII (j,i) & - ! + j1b_gauss_nonherm(j,i) ) - ! enddo - !enddo + ! !do i = 1, ao_num + ! ! do j = 1, ao_num + ! ! ao_one_e_integrals_tc_tot(j,i) += ( j1b_gauss_hermI (j,i) & + ! ! + j1b_gauss_hermII (j,i) & + ! ! + j1b_gauss_nonherm(j,i) ) + ! ! enddo + ! !enddo - endif + !endif END_PROVIDER diff --git a/src/non_h_ints_mu/jast_deriv.irp.f b/src/non_h_ints_mu/jast_deriv.irp.f index d1a044cb..4cfd13d2 100644 --- a/src/non_h_ints_mu/jast_deriv.irp.f +++ b/src/non_h_ints_mu/jast_deriv.irp.f @@ -19,8 +19,12 @@ END_DOC implicit none - integer :: ipoint, jpoint - double precision :: r1(3), r2(3) + integer :: ipoint, jpoint + double precision :: r1(3), r2(3) + double precision :: v1b_r1, v1b_r2, u2b_r12 + double precision :: grad1_v1b(3), grad1_u2b(3) + double precision :: dx, dy, dz + double precision, external :: j12_mu, j1b_nucl PROVIDE j1b_type PROVIDE final_grid_points_extra @@ -28,12 +32,43 @@ grad1_u12_num = 0.d0 grad1_u12_squared_num = 0.d0 - if((j1b_type .ge. 100) .and. (j1b_type .lt. 200)) then + if(j1b_type .eq. 100) then - double precision :: v1b_r1, v1b_r2, u2b_r12 - double precision :: grad1_v1b(3), grad1_u2b(3) - double precision :: dx, dy, dz - double precision, external :: j12_mu, j1b_nucl + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, jpoint, r1, r2, v1b_r1, v1b_r2, u2b_r12, grad1_v1b, grad1_u2b, dx, dy, dz) & + !$OMP SHARED (n_points_final_grid, n_points_extra_final_grid, final_grid_points, & + !$OMP final_grid_points_extra, grad1_u12_num, grad1_u12_squared_num) + !$OMP DO SCHEDULE (static) + do ipoint = 1, n_points_final_grid ! r1 + + r1(1) = final_grid_points(1,ipoint) + r1(2) = final_grid_points(2,ipoint) + r1(3) = final_grid_points(3,ipoint) + + do jpoint = 1, n_points_extra_final_grid ! r2 + + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + + call grad1_j12_mu(r1, r2, grad1_u2b) + + dx = grad1_u2b(1) + dy = grad1_u2b(2) + dz = grad1_u2b(3) + + grad1_u12_num(jpoint,ipoint,1) = dx + grad1_u12_num(jpoint,ipoint,2) = dy + grad1_u12_num(jpoint,ipoint,3) = dz + + grad1_u12_squared_num(jpoint,ipoint) = dx*dx + dy*dy + dz*dz + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + elseif((j1b_type .gt. 100) .and. (j1b_type .lt. 200)) then !$OMP PARALLEL & !$OMP DEFAULT (NONE) & diff --git a/src/non_h_ints_mu/tc_integ.irp.f b/src/non_h_ints_mu/tc_integ.irp.f index 784947b4..1a86a2e7 100644 --- a/src/non_h_ints_mu/tc_integ.irp.f +++ b/src/non_h_ints_mu/tc_integ.irp.f @@ -41,7 +41,34 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f else - if(j1b_type .eq. 3) then + if(j1b_type .eq. 0) then + + PROVIDE v_ij_erf_rk_cst_mu x_v_ij_erf_rk_cst_mu + + int2_grad1_u12_ao = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, x, y, z, tmp1) & + !$OMP SHARED ( ao_num, n_points_final_grid, final_grid_points & + !$OMP , v_ij_erf_rk_cst_mu, x_v_ij_erf_rk_cst_mu, int2_grad1_u12_ao) + !$OMP DO SCHEDULE (static) + do ipoint = 1, n_points_final_grid + x = final_grid_points(1,ipoint) + y = final_grid_points(2,ipoint) + z = final_grid_points(3,ipoint) + do j = 1, ao_num + do i = 1, ao_num + tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint) + int2_grad1_u12_ao(i,j,ipoint,1) = 0.5d0 * (tmp1 * x - x_v_ij_erf_rk_cst_mu(i,j,ipoint,1)) + int2_grad1_u12_ao(i,j,ipoint,2) = 0.5d0 * (tmp1 * y - x_v_ij_erf_rk_cst_mu(i,j,ipoint,2)) + int2_grad1_u12_ao(i,j,ipoint,3) = 0.5d0 * (tmp1 * z - x_v_ij_erf_rk_cst_mu(i,j,ipoint,3)) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + elseif(j1b_type .eq. 3) then PROVIDE v_1b_grad v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b x_v_ij_erf_rk_cst_mu_j1b @@ -172,7 +199,27 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p PROVIDE j1b_type - if(j1b_type .eq. 3) then + if(j1b_type .eq. 0) then + + PROVIDE int2_grad1u2_grad2u2 + + int2_grad1_u12_square_ao = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, ipoint) & + !$OMP SHARED (int2_grad1_u12_square_ao, ao_num, n_points_final_grid, int2_grad1u2_grad2u2) + !$OMP DO SCHEDULE (static) + do ipoint = 1, n_points_final_grid + do j = 1, ao_num + do i = 1, ao_num + int2_grad1_u12_square_ao(i,j,ipoint) = int2_grad1u2_grad2u2(i,j,ipoint) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + elseif(j1b_type .eq. 3) then PROVIDE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12 diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f index a60c99da..450bbef0 100644 --- a/src/non_h_ints_mu/total_tc_int.irp.f +++ b/src/non_h_ints_mu/total_tc_int.irp.f @@ -11,6 +11,13 @@ BEGIN_PROVIDER [double precision, ao_vartc_int_chemist, (ao_num, ao_num, ao_num, call wall_time(wall0) if(test_cycle_tc) then + + PROVIDE j1b_type + if(j1b_type .ne. 3) then + print*, ' TC integrals with cycle can not be used for j1b_type =', j1b_type + stop + endif + do j = 1, ao_num do l = 1, ao_num do i = 1, ao_num @@ -20,7 +27,9 @@ BEGIN_PROVIDER [double precision, ao_vartc_int_chemist, (ao_num, ao_num, ao_num, enddo enddo enddo + else + do j = 1, ao_num do l = 1, ao_num do i = 1, ao_num @@ -30,6 +39,7 @@ BEGIN_PROVIDER [double precision, ao_vartc_int_chemist, (ao_num, ao_num, ao_num, enddo enddo enddo + endif call wall_time(wall1) @@ -50,6 +60,12 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao if(test_cycle_tc) then + PROVIDE j1b_type + if(j1b_type .ne. 3) then + print*, ' TC integrals with cycle can not be used for j1b_type =', j1b_type + stop + endif + ao_tc_int_chemist = ao_tc_int_chemist_test else diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index fad6f1c2..41c07d0b 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -176,7 +176,7 @@ default: 1.e-7 type: logical doc: If |true|, the integrals of the three-body jastrow are computed with cycles interface: ezfio,provider,ocaml -default: Flase +default: False [thresh_biorthog_diag] type: Threshold From f985af03953b2d25e6706cac2412eb38cf544075 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sun, 7 May 2023 15:07:51 +0200 Subject: [PATCH 075/337] jast 4 added --- src/ao_many_one_e_ints/NEED | 1 + src/ao_many_one_e_ints/grad2_jmu_modif.irp.f | 62 ++-- .../grad_lapl_jmu_modif.irp.f | 62 ++-- src/ao_many_one_e_ints/listj1b.irp.f | 296 +++++++++++++----- src/non_h_ints_mu/grad_squared.irp.f | 2 +- src/non_h_ints_mu/j12_nucl_utils.irp.f | 173 +++++++--- src/non_h_ints_mu/tc_integ.irp.f | 4 +- 7 files changed, 404 insertions(+), 196 deletions(-) diff --git a/src/ao_many_one_e_ints/NEED b/src/ao_many_one_e_ints/NEED index 0d08442c..c57219cd 100644 --- a/src/ao_many_one_e_ints/NEED +++ b/src/ao_many_one_e_ints/NEED @@ -3,3 +3,4 @@ ao_two_e_ints becke_numerical_grid mo_one_e_ints dft_utils_in_r +tc_keywords diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f index 722ff2ff..fb4d71f3 100644 --- a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f +++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f @@ -25,11 +25,11 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2, (ao_num, ao_num, n_poin int2_grad1u2_grad2u2 = 0.d0 - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, i_fit, r, coef_fit, expo_fit, tmp) & - !$OMP SHARED (n_points_final_grid, ao_num, final_grid_points, ng_fit_jast, & - !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,int2_grad1u2_grad2u2) - !$OMP DO + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, i_fit, r, coef_fit, expo_fit, tmp) & + !$OMP SHARED (n_points_final_grid, ao_num, final_grid_points, ng_fit_jast, & + !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,int2_grad1u2_grad2u2) + !$OMP DO do ipoint = 1, n_points_final_grid r(1) = final_grid_points(1,ipoint) r(2) = final_grid_points(2,ipoint) @@ -51,8 +51,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2, (ao_num, ao_num, n_poin enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL do ipoint = 1, n_points_final_grid do i = 2, ao_num @@ -94,15 +94,15 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n int2_grad1u2_grad2u2_j1b2 = 0.d0 - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & - !$OMP coef_fit, expo_fit, int_fit, tmp) & - !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & - !$OMP final_grid_points, ng_fit_jast, & - !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & - !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & - !$OMP List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2) - !$OMP DO + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & + !$OMP coef_fit, expo_fit, int_fit, tmp) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & + !$OMP final_grid_points, ng_fit_jast, & + !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & + !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & + !$OMP List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2) + !$OMP DO do ipoint = 1, n_points_final_grid r(1) = final_grid_points(1,ipoint) r(2) = final_grid_points(2,ipoint) @@ -121,7 +121,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j) tmp += -0.25d0 * coef_fit * int_fit -! if(dabs(coef_fit*int_fit) .lt. 1d-12) cycle +! if(dabs(coef_fit*int_fit) .lt. 1d-12) cycle ! --- @@ -146,8 +146,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL do ipoint = 1, n_points_final_grid do i = 2, ao_num @@ -188,15 +188,15 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final int2_u2_j1b2 = 0.d0 - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & - !$OMP coef_fit, expo_fit, int_fit, tmp) & - !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & - !$OMP final_grid_points, ng_fit_jast, & - !$OMP expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2, & - !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & - !$OMP List_all_comb_b3_cent, int2_u2_j1b2) - !$OMP DO + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & + !$OMP coef_fit, expo_fit, int_fit, tmp) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, & + !$OMP final_grid_points, ng_fit_jast, & + !$OMP expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2, & + !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & + !$OMP List_all_comb_b3_cent, int2_u2_j1b2) + !$OMP DO do ipoint = 1, n_points_final_grid r(1) = final_grid_points(1,ipoint) r(2) = final_grid_points(2,ipoint) @@ -215,7 +215,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j) tmp += coef_fit * int_fit -! if(dabs(coef_fit*int_fit) .lt. 1d-12) cycle +! if(dabs(coef_fit*int_fit) .lt. 1d-12) cycle ! --- @@ -240,8 +240,8 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL do ipoint = 1, n_points_final_grid do i = 2, ao_num diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f index fc30cd83..25bb2f8b 100644 --- a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f +++ b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f @@ -24,12 +24,12 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_po v_ij_erf_rk_cst_mu_j1b = 0.d0 - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, int_mu, int_coulomb, tmp) & - !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b2_size, final_grid_points, & - !$OMP List_all_comb_b2_coef, List_all_comb_b2_expo, List_all_comb_b2_cent, & - !$OMP v_ij_erf_rk_cst_mu_j1b, mu_erf) - !$OMP DO + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, int_mu, int_coulomb, tmp) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b2_size, final_grid_points, & + !$OMP List_all_comb_b2_coef, List_all_comb_b2_expo, List_all_comb_b2_cent, & + !$OMP v_ij_erf_rk_cst_mu_j1b, mu_erf) + !$OMP DO !do ipoint = 1, 10 do ipoint = 1, n_points_final_grid r(1) = final_grid_points(1,ipoint) @@ -51,7 +51,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_po int_mu = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r) int_coulomb = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, 1.d+9, r) -! if(dabs(coef)*dabs(int_mu - int_coulomb) .lt. 1d-12) cycle +! if(dabs(coef)*dabs(int_mu - int_coulomb) .lt. 1d-12) cycle tmp += coef * (int_mu - int_coulomb) @@ -77,8 +77,8 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_po enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL do ipoint = 1, n_points_final_grid do i = 2, ao_num @@ -112,13 +112,13 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_ x_v_ij_erf_rk_cst_mu_j1b = 0.d0 - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, ints, ints_coulomb, & - !$OMP tmp_x, tmp_y, tmp_z) & - !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b2_size, final_grid_points,& - !$OMP List_all_comb_b2_coef, List_all_comb_b2_expo, List_all_comb_b2_cent, & - !$OMP x_v_ij_erf_rk_cst_mu_j1b, mu_erf) - !$OMP DO + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, ints, ints_coulomb, & + !$OMP tmp_x, tmp_y, tmp_z) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b2_size, final_grid_points,& + !$OMP List_all_comb_b2_coef, List_all_comb_b2_expo, List_all_comb_b2_cent, & + !$OMP x_v_ij_erf_rk_cst_mu_j1b, mu_erf) + !$OMP DO !do ipoint = 1, 10 do ipoint = 1, n_points_final_grid r(1) = final_grid_points(1,ipoint) @@ -143,7 +143,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_ call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints ) call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, 1.d+9, r, ints_coulomb) -! if( dabs(coef)*(dabs(ints(1)-ints_coulomb(1)) + dabs(ints(2)-ints_coulomb(2)) + dabs(ints(3)-ints_coulomb(3))) .lt. 3d-10) cycle +! if( dabs(coef)*(dabs(ints(1)-ints_coulomb(1)) + dabs(ints(2)-ints_coulomb(2)) + dabs(ints(3)-ints_coulomb(3))) .lt. 3d-10) cycle tmp_x += coef * (ints(1) - ints_coulomb(1)) tmp_y += coef * (ints(2) - ints_coulomb(2)) @@ -175,8 +175,8 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_ enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL do ipoint = 1, n_points_final_grid do i = 2, ao_num @@ -220,15 +220,15 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_ v_ij_u_cst_mu_j1b = 0.d0 - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & - !$OMP coef_fit, expo_fit, int_fit, tmp) & - !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b2_size, & - !$OMP final_grid_points, ng_fit_jast, & - !$OMP expo_gauss_j_mu_x, coef_gauss_j_mu_x, & - !$OMP List_all_comb_b2_coef, List_all_comb_b2_expo, & - !$OMP List_all_comb_b2_cent, v_ij_u_cst_mu_j1b) - !$OMP DO + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & + !$OMP coef_fit, expo_fit, int_fit, tmp) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b2_size, & + !$OMP final_grid_points, ng_fit_jast, & + !$OMP expo_gauss_j_mu_x, coef_gauss_j_mu_x, & + !$OMP List_all_comb_b2_coef, List_all_comb_b2_expo, & + !$OMP List_all_comb_b2_cent, v_ij_u_cst_mu_j1b) + !$OMP DO !do ipoint = 1, 10 do ipoint = 1, n_points_final_grid r(1) = final_grid_points(1,ipoint) @@ -253,7 +253,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_ B_center(3) = List_all_comb_b2_cent(3,1) int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) -! if(dabs(int_fit*coef) .lt. 1d-12) cycle +! if(dabs(int_fit*coef) .lt. 1d-12) cycle tmp += coef * coef_fit * int_fit @@ -280,8 +280,8 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_ enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL do ipoint = 1, n_points_final_grid do i = 2, ao_num diff --git a/src/ao_many_one_e_ints/listj1b.irp.f b/src/ao_many_one_e_ints/listj1b.irp.f index 4698cb27..c06d64bb 100644 --- a/src/ao_many_one_e_ints/listj1b.irp.f +++ b/src/ao_many_one_e_ints/listj1b.irp.f @@ -1,17 +1,34 @@ ! --- -BEGIN_PROVIDER [ integer, List_all_comb_b2_size] +BEGIN_PROVIDER [integer, List_all_comb_b2_size] implicit none - List_all_comb_b2_size = 2**nucl_num + PROVIDE j1b_type + + if(j1b_type .eq. 3) then + + List_all_comb_b2_size = 2**nucl_num + + elseif(j1b_type .eq. 4) then + + List_all_comb_b2_size = nucl_num + 1 + + else + + print *, 'j1b_type = ', j1b_pen, 'is not implemented' + stop + + endif + + print *, ' nb of linear terms in the envelope is ', List_all_comb_b2_size END_PROVIDER ! --- -BEGIN_PROVIDER [ integer, List_all_comb_b2, (nucl_num, List_all_comb_b2_size)] +BEGIN_PROVIDER [integer, List_all_comb_b2, (nucl_num, List_all_comb_b2_size)] implicit none integer :: i, j @@ -50,57 +67,79 @@ END_PROVIDER List_all_comb_b2_expo = 0.d0 List_all_comb_b2_cent = 0.d0 - do i = 1, List_all_comb_b2_size + if(j1b_type .eq. 3) then - tmp_cent_x = 0.d0 - tmp_cent_y = 0.d0 - tmp_cent_z = 0.d0 - do j = 1, nucl_num - tmp_alphaj = dble(List_all_comb_b2(j,i)) * j1b_pen(j) - List_all_comb_b2_expo(i) += tmp_alphaj - tmp_cent_x += tmp_alphaj * nucl_coord(j,1) - tmp_cent_y += tmp_alphaj * nucl_coord(j,2) - tmp_cent_z += tmp_alphaj * nucl_coord(j,3) - enddo + do i = 1, List_all_comb_b2_size - if(List_all_comb_b2_expo(i) .lt. 1d-10) cycle - - List_all_comb_b2_cent(1,i) = tmp_cent_x / List_all_comb_b2_expo(i) - List_all_comb_b2_cent(2,i) = tmp_cent_y / List_all_comb_b2_expo(i) - List_all_comb_b2_cent(3,i) = tmp_cent_z / List_all_comb_b2_expo(i) - enddo - - ! --- - - do i = 1, List_all_comb_b2_size - - do j = 2, nucl_num, 1 - tmp_alphaj = dble(List_all_comb_b2(j,i)) * j1b_pen(j) - do k = 1, j-1, 1 - tmp_alphak = dble(List_all_comb_b2(k,i)) * j1b_pen(k) - - List_all_comb_b2_coef(i) += tmp_alphaj * tmp_alphak * ( (nucl_coord(j,1) - nucl_coord(k,1)) * (nucl_coord(j,1) - nucl_coord(k,1)) & - + (nucl_coord(j,2) - nucl_coord(k,2)) * (nucl_coord(j,2) - nucl_coord(k,2)) & - + (nucl_coord(j,3) - nucl_coord(k,3)) * (nucl_coord(j,3) - nucl_coord(k,3)) ) + tmp_cent_x = 0.d0 + tmp_cent_y = 0.d0 + tmp_cent_z = 0.d0 + do j = 1, nucl_num + tmp_alphaj = dble(List_all_comb_b2(j,i)) * j1b_pen(j) + List_all_comb_b2_expo(i) += tmp_alphaj + tmp_cent_x += tmp_alphaj * nucl_coord(j,1) + tmp_cent_y += tmp_alphaj * nucl_coord(j,2) + tmp_cent_z += tmp_alphaj * nucl_coord(j,3) enddo + + if(List_all_comb_b2_expo(i) .lt. 1d-10) cycle + + List_all_comb_b2_cent(1,i) = tmp_cent_x / List_all_comb_b2_expo(i) + List_all_comb_b2_cent(2,i) = tmp_cent_y / List_all_comb_b2_expo(i) + List_all_comb_b2_cent(3,i) = tmp_cent_z / List_all_comb_b2_expo(i) enddo - if(List_all_comb_b2_expo(i) .lt. 1d-10) cycle + ! --- - List_all_comb_b2_coef(i) = List_all_comb_b2_coef(i) / List_all_comb_b2_expo(i) - enddo + do i = 1, List_all_comb_b2_size - ! --- + do j = 2, nucl_num, 1 + tmp_alphaj = dble(List_all_comb_b2(j,i)) * j1b_pen(j) + do k = 1, j-1, 1 + tmp_alphak = dble(List_all_comb_b2(k,i)) * j1b_pen(k) - do i = 1, List_all_comb_b2_size + List_all_comb_b2_coef(i) += tmp_alphaj * tmp_alphak * ( (nucl_coord(j,1) - nucl_coord(k,1)) * (nucl_coord(j,1) - nucl_coord(k,1)) & + + (nucl_coord(j,2) - nucl_coord(k,2)) * (nucl_coord(j,2) - nucl_coord(k,2)) & + + (nucl_coord(j,3) - nucl_coord(k,3)) * (nucl_coord(j,3) - nucl_coord(k,3)) ) + enddo + enddo - phase = 0 - do j = 1, nucl_num - phase += List_all_comb_b2(j,i) + if(List_all_comb_b2_expo(i) .lt. 1d-10) cycle + + List_all_comb_b2_coef(i) = List_all_comb_b2_coef(i) / List_all_comb_b2_expo(i) enddo - List_all_comb_b2_coef(i) = (-1.d0)**dble(phase) * dexp(-List_all_comb_b2_coef(i)) - enddo + ! --- + + do i = 1, List_all_comb_b2_size + + phase = 0 + do j = 1, nucl_num + phase += List_all_comb_b2(j,i) + enddo + + List_all_comb_b2_coef(i) = (-1.d0)**dble(phase) * dexp(-List_all_comb_b2_coef(i)) + enddo + + elseif(j1b_type .eq. 4) then + + List_all_comb_b2_coef( 1) = 1.d0 + List_all_comb_b2_expo( 1) = 0.d0 + List_all_comb_b2_cent(1:3,1) = 0.d0 + do i = 1, nucl_num + List_all_comb_b2_coef( i+1) = -1.d0 + List_all_comb_b2_expo( i+1) = j1b_pen( i) + List_all_comb_b2_cent(1,i+1) = nucl_coord(i,1) + List_all_comb_b2_cent(2,i+1) = nucl_coord(i,2) + List_all_comb_b2_cent(3,i+1) = nucl_coord(i,3) + enddo + + else + + print *, 'j1b_type = ', j1b_pen, 'is not implemented' + stop + + endif !print *, ' coeff, expo & cent of list b2' !do i = 1, List_all_comb_b2_size @@ -115,14 +154,31 @@ END_PROVIDER BEGIN_PROVIDER [ integer, List_all_comb_b3_size] implicit none + double precision :: tmp - List_all_comb_b3_size = 3**nucl_num + if(j1b_type .eq. 3) then + + List_all_comb_b3_size = 3**nucl_num + + elseif(j1b_type .eq. 4) then + + tmp = 0.5d0 * dble(nucl_num) * (dble(nucl_num) + 3.d0) + List_all_comb_b3_size = int(tmp) + 1 + + else + + print *, 'j1b_type = ', j1b_pen, 'is not implemented' + stop + + endif + + print *, ' nb of linear terms in the square of the envelope is ', List_all_comb_b3_size END_PROVIDER ! --- -BEGIN_PROVIDER [ integer, List_all_comb_b3, (nucl_num, List_all_comb_b3_size)] +BEGIN_PROVIDER [integer, List_all_comb_b3, (nucl_num, List_all_comb_b3_size)] implicit none integer :: i, j, ii, jj @@ -162,7 +218,11 @@ END_PROVIDER implicit none integer :: i, j, k, phase + integer :: ii double precision :: tmp_alphaj, tmp_alphak, facto + double precision :: tmp1, tmp2, tmp3, tmp4 + double precision :: xi, yi, zi, xj, yj, zj + double precision :: dx, dy, dz, r2 provide j1b_pen @@ -170,60 +230,126 @@ END_PROVIDER List_all_comb_b3_expo = 0.d0 List_all_comb_b3_cent = 0.d0 - do i = 1, List_all_comb_b3_size + if(j1b_type .eq. 3) then - do j = 1, nucl_num - tmp_alphaj = dble(List_all_comb_b3(j,i)) * j1b_pen(j) - List_all_comb_b3_expo(i) += tmp_alphaj - List_all_comb_b3_cent(1,i) += tmp_alphaj * nucl_coord(j,1) - List_all_comb_b3_cent(2,i) += tmp_alphaj * nucl_coord(j,2) - List_all_comb_b3_cent(3,i) += tmp_alphaj * nucl_coord(j,3) + do i = 1, List_all_comb_b3_size + do j = 1, nucl_num + tmp_alphaj = dble(List_all_comb_b3(j,i)) * j1b_pen(j) + List_all_comb_b3_expo(i) += tmp_alphaj + List_all_comb_b3_cent(1,i) += tmp_alphaj * nucl_coord(j,1) + List_all_comb_b3_cent(2,i) += tmp_alphaj * nucl_coord(j,2) + List_all_comb_b3_cent(3,i) += tmp_alphaj * nucl_coord(j,3) + + enddo + + if(List_all_comb_b3_expo(i) .lt. 1d-10) cycle + ASSERT(List_all_comb_b3_expo(i) .gt. 0d0) + + List_all_comb_b3_cent(1,i) = List_all_comb_b3_cent(1,i) / List_all_comb_b3_expo(i) + List_all_comb_b3_cent(2,i) = List_all_comb_b3_cent(2,i) / List_all_comb_b3_expo(i) + List_all_comb_b3_cent(3,i) = List_all_comb_b3_cent(3,i) / List_all_comb_b3_expo(i) enddo - if(List_all_comb_b3_expo(i) .lt. 1d-10) cycle - ASSERT(List_all_comb_b3_expo(i) .gt. 0d0) + ! --- - List_all_comb_b3_cent(1,i) = List_all_comb_b3_cent(1,i) / List_all_comb_b3_expo(i) - List_all_comb_b3_cent(2,i) = List_all_comb_b3_cent(2,i) / List_all_comb_b3_expo(i) - List_all_comb_b3_cent(3,i) = List_all_comb_b3_cent(3,i) / List_all_comb_b3_expo(i) - enddo + do i = 1, List_all_comb_b3_size - ! --- + do j = 2, nucl_num, 1 + tmp_alphaj = dble(List_all_comb_b3(j,i)) * j1b_pen(j) + do k = 1, j-1, 1 + tmp_alphak = dble(List_all_comb_b3(k,i)) * j1b_pen(k) - do i = 1, List_all_comb_b3_size + List_all_comb_b3_coef(i) += tmp_alphaj * tmp_alphak * ( (nucl_coord(j,1) - nucl_coord(k,1)) * (nucl_coord(j,1) - nucl_coord(k,1)) & + + (nucl_coord(j,2) - nucl_coord(k,2)) * (nucl_coord(j,2) - nucl_coord(k,2)) & + + (nucl_coord(j,3) - nucl_coord(k,3)) * (nucl_coord(j,3) - nucl_coord(k,3)) ) + enddo + enddo - do j = 2, nucl_num, 1 - tmp_alphaj = dble(List_all_comb_b3(j,i)) * j1b_pen(j) - do k = 1, j-1, 1 - tmp_alphak = dble(List_all_comb_b3(k,i)) * j1b_pen(k) + if(List_all_comb_b3_expo(i) .lt. 1d-10) cycle - List_all_comb_b3_coef(i) += tmp_alphaj * tmp_alphak * ( (nucl_coord(j,1) - nucl_coord(k,1)) * (nucl_coord(j,1) - nucl_coord(k,1)) & - + (nucl_coord(j,2) - nucl_coord(k,2)) * (nucl_coord(j,2) - nucl_coord(k,2)) & - + (nucl_coord(j,3) - nucl_coord(k,3)) * (nucl_coord(j,3) - nucl_coord(k,3)) ) + List_all_comb_b3_coef(i) = List_all_comb_b3_coef(i) / List_all_comb_b3_expo(i) + enddo + + ! --- + + do i = 1, List_all_comb_b3_size + + facto = 1.d0 + phase = 0 + do j = 1, nucl_num + tmp_alphaj = dble(List_all_comb_b3(j,i)) + + facto *= 2.d0 / (gamma(tmp_alphaj+1.d0) * gamma(3.d0-tmp_alphaj)) + phase += List_all_comb_b3(j,i) + enddo + + List_all_comb_b3_coef(i) = (-1.d0)**dble(phase) * facto * dexp(-List_all_comb_b3_coef(i)) + enddo + + elseif(j1b_type .eq. 4) then + + ii = 1 + List_all_comb_b3_coef( ii) = 1.d0 + List_all_comb_b3_expo( ii) = 0.d0 + List_all_comb_b3_cent(1:3,ii) = 0.d0 + + do i = 1, nucl_num + ii = ii + 1 + List_all_comb_b3_coef( ii) = -2.d0 + List_all_comb_b3_expo( ii) = j1b_pen( i) + List_all_comb_b3_cent(1,ii) = nucl_coord(i,1) + List_all_comb_b3_cent(2,ii) = nucl_coord(i,2) + List_all_comb_b3_cent(3,ii) = nucl_coord(i,3) + enddo + + do i = 1, nucl_num + ii = ii + 1 + List_all_comb_b3_coef( ii) = 1.d0 + List_all_comb_b3_expo( ii) = 2.d0 * j1b_pen(i) + List_all_comb_b3_cent(1,ii) = nucl_coord(i,1) + List_all_comb_b3_cent(2,ii) = nucl_coord(i,2) + List_all_comb_b3_cent(3,ii) = nucl_coord(i,3) + enddo + + do i = 1, nucl_num-1 + + tmp1 = j1b_pen(i) + + xi = nucl_coord(i,1) + yi = nucl_coord(i,2) + zi = nucl_coord(i,3) + + do j = i+1, nucl_num + + tmp2 = j1b_pen(j) + tmp3 = tmp1 + tmp2 + tmp4 = 1.d0 / tmp3 + + xj = nucl_coord(j,1) + yj = nucl_coord(j,2) + zj = nucl_coord(j,3) + + dx = xi - xj + dy = yi - yj + dz = zi - zj + r2 = dx*dx + dy*dy + dz*dz + + ii = ii + 1 + List_all_comb_b3_coef( ii) = dexp(-tmp1*tmp2*tmp4*r2) + List_all_comb_b3_expo( ii) = tmp3 + List_all_comb_b3_cent(1,ii) = tmp4 * (tmp1 * xi + tmp2 * xj) + List_all_comb_b3_cent(2,ii) = tmp4 * (tmp1 * yi + tmp2 * yj) + List_all_comb_b3_cent(3,ii) = tmp4 * (tmp1 * zi + tmp2 * zj) enddo enddo - if(List_all_comb_b3_expo(i) .lt. 1d-10) cycle + else - List_all_comb_b3_coef(i) = List_all_comb_b3_coef(i) / List_all_comb_b3_expo(i) - enddo + print *, 'j1b_type = ', j1b_pen, 'is not implemented' + stop - ! --- - - do i = 1, List_all_comb_b3_size - - facto = 1.d0 - phase = 0 - do j = 1, nucl_num - tmp_alphaj = dble(List_all_comb_b3(j,i)) - - facto *= 2.d0 / (gamma(tmp_alphaj+1.d0) * gamma(3.d0-tmp_alphaj)) - phase += List_all_comb_b3(j,i) - enddo - - List_all_comb_b3_coef(i) = (-1.d0)**dble(phase) * facto * dexp(-List_all_comb_b3_coef(i)) - enddo + endif !print *, ' coeff, expo & cent of list b3' !do i = 1, List_all_comb_b3_size diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f index ea4bd36c..c93f62de 100644 --- a/src/non_h_ints_mu/grad_squared.irp.f +++ b/src/non_h_ints_mu/grad_squared.irp.f @@ -267,7 +267,7 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, u12sq_j1bsq, (ao_num, ao_num, n_points_final_grid) ] +BEGIN_PROVIDER [double precision, u12sq_j1bsq, (ao_num, ao_num, n_points_final_grid)] implicit none integer :: ipoint, i, j diff --git a/src/non_h_ints_mu/j12_nucl_utils.irp.f b/src/non_h_ints_mu/j12_nucl_utils.irp.f index c9f62b18..63a9db4b 100644 --- a/src/non_h_ints_mu/j12_nucl_utils.irp.f +++ b/src/non_h_ints_mu/j12_nucl_utils.irp.f @@ -8,79 +8,160 @@ BEGIN_PROVIDER [ double precision, v_1b, (n_points_final_grid)] double precision :: x, y, z, dx, dy, dz double precision :: a, d, e, fact_r - do ipoint = 1, n_points_final_grid + if(j1b_type .eq. 3) then - x = final_grid_points(1,ipoint) - y = final_grid_points(2,ipoint) - z = final_grid_points(3,ipoint) + ! v(r) = \Pi_{a} [1 - \exp(-\alpha_a (r - r_a)^2)] - fact_r = 1.d0 - do j = 1, nucl_num - a = j1b_pen(j) - dx = x - nucl_coord(j,1) - dy = y - nucl_coord(j,2) - dz = z - nucl_coord(j,3) - d = dx*dx + dy*dy + dz*dz - e = 1.d0 - dexp(-a*d) + do ipoint = 1, n_points_final_grid - fact_r = fact_r * e + x = final_grid_points(1,ipoint) + y = final_grid_points(2,ipoint) + z = final_grid_points(3,ipoint) + + fact_r = 1.d0 + do j = 1, nucl_num + a = j1b_pen(j) + dx = x - nucl_coord(j,1) + dy = y - nucl_coord(j,2) + dz = z - nucl_coord(j,3) + d = dx*dx + dy*dy + dz*dz + e = 1.d0 - dexp(-a*d) + + fact_r = fact_r * e + enddo + + v_1b(ipoint) = fact_r enddo - v_1b(ipoint) = fact_r - enddo + elseif(j1b_type .eq. 4) then + + ! v(r) = 1 - \sum_{a} \exp(-\alpha_a (r - r_a)^2) + + do ipoint = 1, n_points_final_grid + + x = final_grid_points(1,ipoint) + y = final_grid_points(2,ipoint) + z = final_grid_points(3,ipoint) + + fact_r = 1.d0 + do j = 1, nucl_num + a = j1b_pen(j) + dx = x - nucl_coord(j,1) + dy = y - nucl_coord(j,2) + dz = z - nucl_coord(j,3) + d = dx*dx + dy*dy + dz*dz + + fact_r = fact_r - dexp(-a*d) + enddo + + v_1b(ipoint) = fact_r + enddo + + else + + print*, 'j1b_type = ', j1b_pen, 'is not implemented' + stop + + endif END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, v_1b_grad, (3, n_points_final_grid)] +BEGIN_PROVIDER [double precision, v_1b_grad, (3, n_points_final_grid)] implicit none integer :: ipoint, i, j, phase - double precision :: x, y, z, dx, dy, dz + double precision :: x, y, z, dx, dy, dz, r2 double precision :: a, d, e double precision :: fact_x, fact_y, fact_z double precision :: ax_der, ay_der, az_der, a_expo - do ipoint = 1, n_points_final_grid + PROVIDE j1b_type - x = final_grid_points(1,ipoint) - y = final_grid_points(2,ipoint) - z = final_grid_points(3,ipoint) + if(j1b_type .eq. 3) then - fact_x = 0.d0 - fact_y = 0.d0 - fact_z = 0.d0 - do i = 1, List_all_comb_b2_size + ! v(r) = \Pi_{a} [1 - \exp(-\alpha_a (r - r_a)^2)] - phase = 0 - a_expo = 0.d0 - ax_der = 0.d0 - ay_der = 0.d0 - az_der = 0.d0 + do ipoint = 1, n_points_final_grid + + x = final_grid_points(1,ipoint) + y = final_grid_points(2,ipoint) + z = final_grid_points(3,ipoint) + + fact_x = 0.d0 + fact_y = 0.d0 + fact_z = 0.d0 + do i = 1, List_all_comb_b2_size + + phase = 0 + a_expo = 0.d0 + ax_der = 0.d0 + ay_der = 0.d0 + az_der = 0.d0 + do j = 1, nucl_num + a = dble(List_all_comb_b2(j,i)) * j1b_pen(j) + dx = x - nucl_coord(j,1) + dy = y - nucl_coord(j,2) + dz = z - nucl_coord(j,3) + + phase += List_all_comb_b2(j,i) + a_expo += a * (dx*dx + dy*dy + dz*dz) + ax_der += a * dx + ay_der += a * dy + az_der += a * dz + enddo + e = -2.d0 * (-1.d0)**dble(phase) * dexp(-a_expo) + + fact_x += e * ax_der + fact_y += e * ay_der + fact_z += e * az_der + enddo + + v_1b_grad(1,ipoint) = fact_x + v_1b_grad(2,ipoint) = fact_y + v_1b_grad(3,ipoint) = fact_z + enddo + + elseif(j1b_type .eq. 4) then + + ! v(r) = 1 - \sum_{a} \exp(-\alpha_a (r - r_a)^2) + + do ipoint = 1, n_points_final_grid + + x = final_grid_points(1,ipoint) + y = final_grid_points(2,ipoint) + z = final_grid_points(3,ipoint) + + ax_der = 0.d0 + ay_der = 0.d0 + az_der = 0.d0 do j = 1, nucl_num - a = dble(List_all_comb_b2(j,i)) * j1b_pen(j) + dx = x - nucl_coord(j,1) dy = y - nucl_coord(j,2) dz = z - nucl_coord(j,3) - - phase += List_all_comb_b2(j,i) - a_expo += a * (dx*dx + dy*dy + dz*dz) - ax_der += a * dx - ay_der += a * dy - az_der += a * dz - enddo - e = -2.d0 * (-1.d0)**dble(phase) * dexp(-a_expo) + r2 = dx*dx + dy*dy + dz*dz - fact_x += e * ax_der - fact_y += e * ay_der - fact_z += e * az_der + a = j1b_pen(j) + e = a * dexp(-a * r2) + + ax_der += e * dx + ay_der += e * dy + az_der += e * dz + enddo + + v_1b_grad(1,ipoint) = 2.d0 * ax_der + v_1b_grad(2,ipoint) = 2.d0 * ay_der + v_1b_grad(3,ipoint) = 2.d0 * az_der enddo - v_1b_grad(1,ipoint) = fact_x - v_1b_grad(2,ipoint) = fact_y - v_1b_grad(3,ipoint) = fact_z - enddo + else + + print*, 'j1b_type = ', j1b_pen, 'is not implemented' + stop + + endif END_PROVIDER diff --git a/src/non_h_ints_mu/tc_integ.irp.f b/src/non_h_ints_mu/tc_integ.irp.f index 1a86a2e7..17c24d4b 100644 --- a/src/non_h_ints_mu/tc_integ.irp.f +++ b/src/non_h_ints_mu/tc_integ.irp.f @@ -68,7 +68,7 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f !$OMP END DO !$OMP END PARALLEL - elseif(j1b_type .eq. 3) then + elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 4)) then PROVIDE v_1b_grad v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b x_v_ij_erf_rk_cst_mu_j1b @@ -219,7 +219,7 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p !$OMP END DO !$OMP END PARALLEL - elseif(j1b_type .eq. 3) then + elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 4)) then PROVIDE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12 From 6d4975041201aab26f3dd6a4f019b6eeaa189822 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sun, 7 May 2023 18:52:03 +0200 Subject: [PATCH 076/337] rotate lr angles separately --- src/tc_scf/minimize_tc_angles.irp.f | 4 ++++ src/tc_scf/tc_scf.irp.f | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/tc_scf/minimize_tc_angles.irp.f b/src/tc_scf/minimize_tc_angles.irp.f index 1363e62b..5d3ff7f0 100644 --- a/src/tc_scf/minimize_tc_angles.irp.f +++ b/src/tc_scf/minimize_tc_angles.irp.f @@ -8,6 +8,10 @@ program print_angles my_n_pt_a_grid = 50 touch my_n_pt_r_grid my_n_pt_a_grid ! call sort_by_tc_fock + + ! TODO + ! check if rotations of orbitals affect the TC energy + ! and refuse the step call minimize_tc_orb_angles end diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f index 88ddd26c..2485ee8b 100644 --- a/src/tc_scf/tc_scf.irp.f +++ b/src/tc_scf/tc_scf.irp.f @@ -53,7 +53,7 @@ program tc_scf stop endif - call minimize_tc_orb_angles() + !call minimize_tc_orb_angles() endif From b9732d78dea26e5abf850370f275ab68d8ff481e Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Mon, 8 May 2023 23:31:20 +0200 Subject: [PATCH 077/337] IPP astice: OK --- src/ao_many_one_e_ints/grad2_jmu_modif.irp.f | 2 +- src/non_h_ints_mu/debug_fit.irp.f | 156 ++++++- src/non_h_ints_mu/grad_squared.irp.f | 65 ++- src/non_h_ints_mu/j12_nucl_utils.irp.f | 54 ++- src/non_h_ints_mu/jast_deriv.irp.f | 190 +++++++- src/non_h_ints_mu/tc_integ.irp.f | 9 +- src/non_h_ints_mu/test_non_h_ints.irp.f | 458 ++++++++++++++++++- src/tc_scf/tc_scf_energy.irp.f | 2 +- 8 files changed, 894 insertions(+), 42 deletions(-) diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f index fb4d71f3..7c68de75 100644 --- a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f +++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f @@ -164,7 +164,7 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final_grid)] +BEGIN_PROVIDER [double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final_grid)] BEGIN_DOC ! diff --git a/src/non_h_ints_mu/debug_fit.irp.f b/src/non_h_ints_mu/debug_fit.irp.f index 5995bffa..547fd198 100644 --- a/src/non_h_ints_mu/debug_fit.irp.f +++ b/src/non_h_ints_mu/debug_fit.irp.f @@ -18,13 +18,13 @@ program debug_fit PROVIDE mu_erf j1b_pen !call test_j1b_nucl() - call test_grad_j1b_nucl() + !call test_grad_j1b_nucl() !call test_lapl_j1b_nucl() !call test_list_b2() - !call test_list_b3() + call test_list_b3() - call test_fit_u() + !call test_fit_u() !call test_fit_u2() !call test_fit_ugradu() @@ -236,16 +236,25 @@ subroutine test_list_b3() integer :: ipoint double precision :: acc_ij, acc_tot, eps_ij, i_exc, i_tmp, i_num, normalz double precision :: r(3) - double precision, external :: j1b_nucl + double precision :: grad_num(3), eps_der, eps_lap, tmp_der, tmp_lap, i0, ip, im + double precision, external :: j1b_nucl_square print*, ' test_list_b3 ...' + eps_ij = 1d-7 + + eps_der = 1d-5 + tmp_der = 0.5d0 / eps_der + + eps_lap = 1d-4 + tmp_lap = 1.d0 / (eps_lap*eps_lap) + + ! --- + PROVIDE v_1b_list_b3 - eps_ij = 1d-7 acc_tot = 0.d0 normalz = 0.d0 - do ipoint = 1, n_points_final_grid r(1) = final_grid_points(1,ipoint) @@ -253,8 +262,7 @@ subroutine test_list_b3() r(3) = final_grid_points(3,ipoint) i_exc = v_1b_list_b3(ipoint) - i_tmp = j1b_nucl(r) - i_num = i_tmp * i_tmp + i_num = j1b_nucl_square(r) acc_ij = dabs(i_exc - i_num) if(acc_ij .gt. eps_ij) then print *, ' problem in list_b3 on', ipoint @@ -267,8 +275,136 @@ subroutine test_list_b3() normalz += dabs(i_num) enddo - print*, ' acc_tot = ', acc_tot - print*, ' normalz = ', normalz + print*, ' acc_tot on val = ', acc_tot + print*, ' normalz on val = ', normalz + + ! --- + + PROVIDE v_1b_square_grad + + acc_tot = 0.d0 + normalz = 0.d0 + do ipoint = 1, n_points_final_grid + + r(1) = final_grid_points(1,ipoint) + r(2) = final_grid_points(2,ipoint) + r(3) = final_grid_points(3,ipoint) + + i_exc = v_1b_square_grad(ipoint,1) + r(1) = r(1) + eps_der + ip = j1b_nucl_square(r) + r(1) = r(1) - 2.d0 * eps_der + im = j1b_nucl_square(r) + r(1) = r(1) + eps_der + i_num = tmp_der * (ip - im) + acc_ij = dabs(i_exc - i_num) + if(acc_ij .gt. eps_ij) then + print *, ' problem in grad_x list_b3 on', ipoint + print *, ' r = ', r + print *, ' r2 = ', r(1)*r(1) + r(2)*r(2) + r(3)*r(3) + print *, ' analyt = ', i_exc + print *, ' numeri = ', i_num + print *, ' diff = ', acc_ij + endif + acc_tot += acc_ij + normalz += dabs(i_num) + + i_exc = v_1b_square_grad(ipoint,2) + r(2) = r(2) + eps_der + ip = j1b_nucl_square(r) + r(2) = r(2) - 2.d0 * eps_der + im = j1b_nucl_square(r) + r(2) = r(2) + eps_der + i_num = tmp_der * (ip - im) + acc_ij = dabs(i_exc - i_num) + if(acc_ij .gt. eps_ij) then + print *, ' problem in grad_y list_b3 on', ipoint + print *, ' r = ', r + print *, ' r2 = ', r(1)*r(1) + r(2)*r(2) + r(3)*r(3) + print *, ' analyt = ', i_exc + print *, ' numeri = ', i_num + print *, ' diff = ', acc_ij + endif + acc_tot += acc_ij + normalz += dabs(i_num) + + i_exc = v_1b_square_grad(ipoint,3) + r(3) = r(3) + eps_der + ip = j1b_nucl_square(r) + r(3) = r(3) - 2.d0 * eps_der + im = j1b_nucl_square(r) + r(3) = r(3) + eps_der + i_num = tmp_der * (ip - im) + acc_ij = dabs(i_exc - i_num) + if(acc_ij .gt. eps_ij) then + print *, ' problem in grad_z list_b3 on', ipoint + print *, ' r = ', r + print *, ' r2 = ', r(1)*r(1) + r(2)*r(2) + r(3)*r(3) + print *, ' analyt = ', i_exc + print *, ' numeri = ', i_num + print *, ' diff = ', acc_ij + endif + acc_tot += acc_ij + normalz += dabs(i_num) + enddo + + print*, ' acc_tot on grad = ', acc_tot + print*, ' normalz on grad = ', normalz + + ! --- + + PROVIDE v_1b_square_lapl + + acc_tot = 0.d0 + normalz = 0.d0 + do ipoint = 1, n_points_final_grid + + r(1) = final_grid_points(1,ipoint) + r(2) = final_grid_points(2,ipoint) + r(3) = final_grid_points(3,ipoint) + i0 = j1b_nucl_square(r) + + i_exc = v_1b_square_lapl(ipoint) + + r(1) = r(1) + eps_lap + ip = j1b_nucl_square(r) + r(1) = r(1) - 2.d0 * eps_lap + im = j1b_nucl_square(r) + r(1) = r(1) + eps_lap + i_num = tmp_lap * (ip - 2.d0 * i0 + im) + + r(2) = r(2) + eps_lap + ip = j1b_nucl_square(r) + r(2) = r(2) - 2.d0 * eps_lap + im = j1b_nucl_square(r) + r(2) = r(2) + eps_lap + i_num = i_num + tmp_lap * (ip - 2.d0 * i0 + im) + + r(3) = r(3) + eps_lap + ip = j1b_nucl_square(r) + r(3) = r(3) - 2.d0 * eps_lap + im = j1b_nucl_square(r) + r(3) = r(3) + eps_lap + i_num = i_num + tmp_lap * (ip - 2.d0 * i0 + im) + + acc_ij = dabs(i_exc - i_num) + if(acc_ij .gt. eps_ij) then + print *, ' problem in lapl list_b3 on', ipoint + print *, ' r = ', r + print *, ' r2 = ', r(1)*r(1) + r(2)*r(2) + r(3)*r(3) + print *, ' analyt = ', i_exc + print *, ' numeri = ', i_num + print *, ' diff = ', acc_ij + endif + + acc_tot += acc_ij + normalz += dabs(i_num) + enddo + + print*, ' acc_tot on lapl = ', acc_tot + print*, ' normalz on lapl = ', normalz + + ! --- return end subroutine test_list_b3 diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f index c93f62de..b4ddb606 100644 --- a/src/non_h_ints_mu/grad_squared.irp.f +++ b/src/non_h_ints_mu/grad_squared.irp.f @@ -17,7 +17,7 @@ BEGIN_PROVIDER [ double precision, gradu_squared_u_ij_mu, (ao_num, ao_num, n_poi ! ! if J(r1,r2) = u12 x v1 x v2 ! - ! gradu_squared_u_ij_mu = -0.50 x \int r2 \phi_i(2) \phi_j(2) [ v1^2 v2^2 ((grad_1 u12)^2 + (grad_2 u12^2)]) + u12^2 v2^2 (grad_1 v1)^2 + 2 u12 v1 v2^2 (grad_1 u12) . (grad_1 v1) ] + ! gradu_squared_u_ij_mu = -0.50 x \int r2 \phi_i(2) \phi_j(2) [ v1^2 v2^2 ((grad_1 u12)^2 + (grad_2 u12^2)) + u12^2 v2^2 (grad_1 v1)^2 + 2 u12 v1 v2^2 (grad_1 u12) . (grad_1 v1) ] ! = -0.25 x v1^2 \int r2 \phi_i(2) \phi_j(2) [1 - erf(mu r12)]^2 v2^2 ! + -0.50 x (grad_1 v1)^2 \int r2 \phi_i(2) \phi_j(2) u12^2 v2^2 ! + -1.00 x v1 (grad_1 v1) \int r2 \phi_i(2) \phi_j(2) (grad_1 u12) v2^2 @@ -358,7 +358,8 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao implicit none integer :: ipoint, i, j, k, l - double precision :: weight1, ao_ik_r, ao_i_r + double precision :: weight1, ao_k_r, ao_i_r + double precision :: der_envsq_x, der_envsq_y, der_envsq_z, lap_envsq double precision :: time0, time1 double precision, allocatable :: b_mat(:,:,:), tmp(:,:,:) @@ -373,16 +374,18 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao else + ! --- + PROVIDE int2_grad1_u12_square_ao allocate(b_mat(n_points_final_grid,ao_num,ao_num)) b_mat = 0.d0 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint) & - !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) - !$OMP DO SCHEDULE (static) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint) & + !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) + !$OMP DO SCHEDULE (static) do i = 1, ao_num do k = 1, ao_num do ipoint = 1, n_points_final_grid @@ -390,13 +393,57 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL tc_grad_square_ao = 0.d0 call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & , int2_grad1_u12_square_ao(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & , 0.d0, tc_grad_square_ao, ao_num*ao_num) + + ! --- + + if((j1b_type .eq. 3) .or. (j1b_type .eq. 4)) then + + ! an additional term is added here directly instead of + ! being added in int2_grad1_u12_square_ao for performance + ! note that the factor + + PROVIDE int2_u2_j1b2 + + b_mat = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & + !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector, & + !$OMP v_1b_square_grad, v_1b_square_lapl, aos_grad_in_r_array_transp_bis) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + + weight1 = 0.25d0 * final_weight_at_r_vector(ipoint) + + ao_i_r = aos_in_r_array_transp(ipoint,i) + ao_k_r = aos_in_r_array_transp(ipoint,k) + + b_mat(ipoint,k,i) = weight1 * ( ao_k_r * ao_i_r * v_1b_square_lapl(ipoint) & + + (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) + ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) * v_1b_square_grad(ipoint,1) & + + (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) + ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) * v_1b_square_grad(ipoint,2) & + + (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) + ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) * v_1b_square_grad(ipoint,3) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & + , int2_u2_j1b2(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & + , 1.d0, tc_grad_square_ao, ao_num*ao_num) + endif + + ! --- + deallocate(b_mat) call sum_A_At(tc_grad_square_ao(1,1,1,1), ao_num*ao_num) diff --git a/src/non_h_ints_mu/j12_nucl_utils.irp.f b/src/non_h_ints_mu/j12_nucl_utils.irp.f index 63a9db4b..079cb388 100644 --- a/src/non_h_ints_mu/j12_nucl_utils.irp.f +++ b/src/non_h_ints_mu/j12_nucl_utils.irp.f @@ -59,7 +59,7 @@ BEGIN_PROVIDER [ double precision, v_1b, (n_points_final_grid)] else - print*, 'j1b_type = ', j1b_pen, 'is not implemented' + print*, 'j1b_type = ', j1b_pen, 'is not implemented for v_1b' stop endif @@ -172,7 +172,7 @@ BEGIN_PROVIDER [ double precision, v_1b_lapl, (n_points_final_grid)] implicit none integer :: ipoint, i, j, phase double precision :: x, y, z, dx, dy, dz - double precision :: a, d, e, b + double precision :: a, e, b double precision :: fact_r double precision :: ax_der, ay_der, az_der, a_expo @@ -283,6 +283,56 @@ BEGIN_PROVIDER [ double precision, v_1b_list_b3, (n_points_final_grid)] END_PROVIDER +! --- + + BEGIN_PROVIDER [double precision, v_1b_square_grad, (n_points_final_grid,3)] +&BEGIN_PROVIDER [double precision, v_1b_square_lapl, (n_points_final_grid) ] + + implicit none + integer :: ipoint, i + double precision :: x, y, z, dx, dy, dz, r2 + double precision :: coef, expo, a_expo, tmp + double precision :: fact_x, fact_y, fact_z, fact_r + + PROVIDE List_all_comb_b3_coef List_all_comb_b3_expo List_all_comb_b3_cent + + do ipoint = 1, n_points_final_grid + + x = final_grid_points(1,ipoint) + y = final_grid_points(2,ipoint) + z = final_grid_points(3,ipoint) + + fact_x = 0.d0 + fact_y = 0.d0 + fact_z = 0.d0 + fact_r = 0.d0 + do i = 1, List_all_comb_b3_size + + coef = List_all_comb_b3_coef(i) + expo = List_all_comb_b3_expo(i) + + dx = x - List_all_comb_b3_cent(1,i) + dy = y - List_all_comb_b3_cent(2,i) + dz = z - List_all_comb_b3_cent(3,i) + r2 = dx * dx + dy * dy + dz * dz + + a_expo = expo * r2 + tmp = coef * expo * dexp(-a_expo) + + fact_x += tmp * dx + fact_y += tmp * dy + fact_z += tmp * dz + fact_r += tmp * (3.d0 - 2.d0 * a_expo) + enddo + + v_1b_square_grad(ipoint,1) = -2.d0 * fact_x + v_1b_square_grad(ipoint,2) = -2.d0 * fact_y + v_1b_square_grad(ipoint,3) = -2.d0 * fact_z + v_1b_square_lapl(ipoint) = -2.d0 * fact_r + enddo + +END_PROVIDER + ! --- double precision function j12_mu_r12(r12) diff --git a/src/non_h_ints_mu/jast_deriv.irp.f b/src/non_h_ints_mu/jast_deriv.irp.f index 4cfd13d2..cbd0b406 100644 --- a/src/non_h_ints_mu/jast_deriv.irp.f +++ b/src/non_h_ints_mu/jast_deriv.irp.f @@ -164,7 +164,7 @@ double precision function j12_mu(r1, r2) double precision, intent(in) :: r1(3), r2(3) double precision :: mu_tmp, r12 - if((j1b_type .ge. 100) .and. (j1b_type .lt. 200)) then + if((j1b_type .ge. 0) .and. (j1b_type .lt. 200)) then r12 = dsqrt( (r1(1) - r2(1)) * (r1(1) - r2(1)) & + (r1(2) - r2(2)) * (r1(2) - r2(2)) & @@ -175,7 +175,7 @@ double precision function j12_mu(r1, r2) else - print *, ' j1b_type = ', j1b_type, 'not implemented yet' + print *, ' j1b_type = ', j1b_type, 'not implemented for j12_mu' stop endif @@ -196,7 +196,7 @@ subroutine grad1_j12_mu(r1, r2, grad) grad = 0.d0 - if((j1b_type .ge. 100) .and. (j1b_type .lt. 200)) then + if((j1b_type .ge. 0) .and. (j1b_type .lt. 200)) then dx = r1(1) - r2(1) dy = r1(2) - r2(2) @@ -252,7 +252,7 @@ double precision function j1b_nucl(r) integer :: i double precision :: a, d, e, x, y, z - if(j1b_type .eq. 102) then + if((j1b_type .eq. 2) .or. (j1b_type .eq. 102)) then j1b_nucl = 1.d0 do i = 1, nucl_num @@ -263,7 +263,7 @@ double precision function j1b_nucl(r) j1b_nucl = j1b_nucl - dexp(-a*dsqrt(d)) enddo - elseif(j1b_type .eq. 103) then + elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then j1b_nucl = 1.d0 do i = 1, nucl_num @@ -275,7 +275,7 @@ double precision function j1b_nucl(r) j1b_nucl = j1b_nucl * e enddo - elseif(j1b_type .eq. 104) then + elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then j1b_nucl = 1.d0 do i = 1, nucl_num @@ -286,7 +286,7 @@ double precision function j1b_nucl(r) j1b_nucl = j1b_nucl - dexp(-a*d) enddo - elseif(j1b_type .eq. 105) then + elseif((j1b_type .eq. 5) .or. (j1b_type .eq. 105)) then j1b_nucl = 1.d0 do i = 1, nucl_num @@ -300,7 +300,7 @@ double precision function j1b_nucl(r) else - print *, ' j1b_type = ', j1b_type, 'not implemented yet' + print *, ' j1b_type = ', j1b_type, 'not implemented for j1b_nucl' stop endif @@ -310,6 +310,75 @@ end function j1b_nucl ! --- +double precision function j1b_nucl_square(r) + + implicit none + double precision, intent(in) :: r(3) + integer :: i + double precision :: a, d, e, x, y, z + + if((j1b_type .eq. 2) .or. (j1b_type .eq. 102)) then + + j1b_nucl_square = 1.d0 + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + j1b_nucl_square = j1b_nucl_square - dexp(-a*dsqrt(d)) + enddo + j1b_nucl_square = j1b_nucl_square * j1b_nucl_square + + elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then + + j1b_nucl_square = 1.d0 + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + e = 1.d0 - dexp(-a*d) + j1b_nucl_square = j1b_nucl_square * e + enddo + j1b_nucl_square = j1b_nucl_square * j1b_nucl_square + + elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then + + j1b_nucl_square = 1.d0 + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + j1b_nucl_square = j1b_nucl_square - dexp(-a*d) + enddo + j1b_nucl_square = j1b_nucl_square * j1b_nucl_square + + elseif((j1b_type .eq. 5) .or. (j1b_type .eq. 105)) then + + j1b_nucl_square = 1.d0 + do i = 1, nucl_num + a = j1b_pen(i) + x = r(1) - nucl_coord(i,1) + y = r(2) - nucl_coord(i,2) + z = r(3) - nucl_coord(i,3) + d = x*x + y*y + z*z + j1b_nucl_square = j1b_nucl_square - dexp(-a*d*d) + enddo + j1b_nucl_square = j1b_nucl_square * j1b_nucl_square + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented for j1b_nucl_square' + stop + + endif + + return +end function j1b_nucl_square + +! --- + subroutine grad1_j1b_nucl(r, grad) implicit none @@ -321,7 +390,7 @@ subroutine grad1_j1b_nucl(r, grad) double precision :: fact_x, fact_y, fact_z double precision :: ax_der, ay_der, az_der, a_expo - if(j1b_type .eq. 102) then + if((j1b_type .eq. 2) .or. (j1b_type .eq. 102)) then fact_x = 0.d0 fact_y = 0.d0 @@ -343,7 +412,7 @@ subroutine grad1_j1b_nucl(r, grad) grad(2) = fact_y grad(3) = fact_z - elseif(j1b_type .eq. 103) then + elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then x = r(1) y = r(2) @@ -382,7 +451,7 @@ subroutine grad1_j1b_nucl(r, grad) grad(2) = fact_y grad(3) = fact_z - elseif(j1b_type .eq. 104) then + elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then fact_x = 0.d0 fact_y = 0.d0 @@ -404,7 +473,7 @@ subroutine grad1_j1b_nucl(r, grad) grad(2) = 2.d0 * fact_y grad(3) = 2.d0 * fact_z - elseif(j1b_type .eq. 105) then + elseif((j1b_type .eq. 5) .or. (j1b_type .eq. 105)) then fact_x = 0.d0 fact_y = 0.d0 @@ -428,7 +497,7 @@ subroutine grad1_j1b_nucl(r, grad) else - print *, ' j1b_type = ', j1b_type, 'not implemented yet' + print *, ' j1b_type = ', j1b_type, 'not implemented for grad1_j1b_nucl' stop endif @@ -520,3 +589,98 @@ subroutine mu_r_val_and_grad(r1, r2, mu_val, mu_der) end subroutine mu_r_val_and_grad ! --- + +subroutine grad1_j1b_nucl_square_num(r1, grad) + + implicit none + double precision, intent(in) :: r1(3) + double precision, intent(out) :: grad(3) + double precision :: r(3), eps, tmp_eps, vp, vm + double precision, external :: j1b_nucl_square + + eps = 1d-5 + tmp_eps = 0.5d0 / eps + + r(1:3) = r1(1:3) + + r(1) = r(1) + eps + vp = j1b_nucl_square(r) + r(1) = r(1) - 2.d0 * eps + vm = j1b_nucl_square(r) + r(1) = r(1) + eps + grad(1) = tmp_eps * (vp - vm) + + r(2) = r(2) + eps + vp = j1b_nucl_square(r) + r(2) = r(2) - 2.d0 * eps + vm = j1b_nucl_square(r) + r(2) = r(2) + eps + grad(2) = tmp_eps * (vp - vm) + + r(3) = r(3) + eps + vp = j1b_nucl_square(r) + r(3) = r(3) - 2.d0 * eps + vm = j1b_nucl_square(r) + r(3) = r(3) + eps + grad(3) = tmp_eps * (vp - vm) + + return +end subroutine grad1_j1b_nucl_square_num + +! --- + +subroutine grad1_j12_mu_square_num(r1, r2, grad) + + include 'constants.include.F' + + implicit none + double precision, intent(in) :: r1(3), r2(3) + double precision, intent(out) :: grad(3) + double precision :: r(3) + double precision :: eps, tmp_eps, vp, vm + double precision, external :: j12_mu_square + + eps = 1d-5 + tmp_eps = 0.5d0 / eps + + r(1:3) = r1(1:3) + + r(1) = r(1) + eps + vp = j12_mu_square(r, r2) + r(1) = r(1) - 2.d0 * eps + vm = j12_mu_square(r, r2) + r(1) = r(1) + eps + grad(1) = tmp_eps * (vp - vm) + + r(2) = r(2) + eps + vp = j12_mu_square(r, r2) + r(2) = r(2) - 2.d0 * eps + vm = j12_mu_square(r, r2) + r(2) = r(2) + eps + grad(2) = tmp_eps * (vp - vm) + + r(3) = r(3) + eps + vp = j12_mu_square(r, r2) + r(3) = r(3) - 2.d0 * eps + vm = j12_mu_square(r, r2) + r(3) = r(3) + eps + grad(3) = tmp_eps * (vp - vm) + + return +end subroutine grad1_j12_mu_square_num + +! --- + +double precision function j12_mu_square(r1, r2) + + implicit none + double precision, intent(in) :: r1(3), r2(3) + double precision, external :: j12_mu + + j12_mu_square = j12_mu(r1, r2) * j12_mu(r1, r2) + + return +end function j12_mu_square + +! --- + diff --git a/src/non_h_ints_mu/tc_integ.irp.f b/src/non_h_ints_mu/tc_integ.irp.f index 17c24d4b..efa9be43 100644 --- a/src/non_h_ints_mu/tc_integ.irp.f +++ b/src/non_h_ints_mu/tc_integ.irp.f @@ -221,18 +221,21 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 4)) then - PROVIDE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12 + ! the term u12_grad1_u12_j1b_grad1_j1b is added directly for performance + !PROVIDE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12 + PROVIDE u12sq_j1bsq grad12_j12 int2_grad1_u12_square_ao = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i, j, ipoint) & - !$OMP SHARED (int2_grad1_u12_square_ao, ao_num, n_points_final_grid, u12sq_j1bsq, u12_grad1_u12_j1b_grad1_j1b, grad12_j12) + !$OMP SHARED (int2_grad1_u12_square_ao, ao_num, n_points_final_grid, u12sq_j1bsq, grad12_j12) !$OMP DO SCHEDULE (static) do ipoint = 1, n_points_final_grid do j = 1, ao_num do i = 1, ao_num - int2_grad1_u12_square_ao(i,j,ipoint) = u12sq_j1bsq(i,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) + 0.5d0 * grad12_j12(i,j,ipoint) + !int2_grad1_u12_square_ao(i,j,ipoint) = u12sq_j1bsq(i,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) + 0.5d0 * grad12_j12(i,j,ipoint) + int2_grad1_u12_square_ao(i,j,ipoint) = u12sq_j1bsq(i,j,ipoint) + 0.5d0 * grad12_j12(i,j,ipoint) enddo enddo enddo diff --git a/src/non_h_ints_mu/test_non_h_ints.irp.f b/src/non_h_ints_mu/test_non_h_ints.irp.f index c535d0c5..a6e0a311 100644 --- a/src/non_h_ints_mu/test_non_h_ints.irp.f +++ b/src/non_h_ints_mu/test_non_h_ints.irp.f @@ -1,15 +1,25 @@ program test_non_h - implicit none + + implicit none + my_grid_becke = .True. my_n_pt_r_grid = 50 my_n_pt_a_grid = 74 + !my_n_pt_r_grid = 400 + !my_n_pt_a_grid = 974 + ! my_n_pt_r_grid = 10 ! small grid for quick debug ! my_n_pt_a_grid = 26 ! small grid for quick debug touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid -!call routine_grad_squared - call routine_fit + + !call routine_grad_squared + !call routine_fit + + call test_ipp() end +! --- + subroutine routine_lapl_grad implicit none integer :: i,j,k,l @@ -100,3 +110,445 @@ subroutine routine_fit enddo end + + +subroutine test_ipp() + + implicit none + integer :: i, j, k, l, ipoint + double precision :: accu, norm, diff, old, new, eps, int_num + double precision :: weight1, ao_i_r, ao_k_r + double precision, allocatable :: b_mat(:,:,:), I1(:,:,:,:), I2(:,:,:,:) + + eps = 1d-7 + + allocate(b_mat(n_points_final_grid,ao_num,ao_num)) + b_mat = 0.d0 + + ! --- + + ! first way + + allocate(I1(ao_num,ao_num,ao_num,ao_num)) + I1 = 0.d0 + + PROVIDE u12_grad1_u12_j1b_grad1_j1b + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint) & + !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + b_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & + , u12_grad1_u12_j1b_grad1_j1b(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & + , 0.d0, I1, ao_num*ao_num) + + ! --- + + ! 2nd way + + allocate(I2(ao_num,ao_num,ao_num,ao_num)) + I2 = 0.d0 + + PROVIDE int2_u2_j1b2 + + b_mat = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & + !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector, & + !$OMP v_1b_square_grad, v_1b_square_lapl, aos_grad_in_r_array_transp_bis) + !$OMP DO SCHEDULE (static) + do i = 1, ao_num + do k = 1, ao_num + do ipoint = 1, n_points_final_grid + + weight1 = 0.25d0 * final_weight_at_r_vector(ipoint) + + ao_i_r = aos_in_r_array_transp(ipoint,i) + ao_k_r = aos_in_r_array_transp(ipoint,k) + + b_mat(ipoint,k,i) = weight1 * ( ao_k_r * ao_i_r * v_1b_square_lapl(ipoint) & + + (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) + ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) * v_1b_square_grad(ipoint,1) & + + (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) + ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) * v_1b_square_grad(ipoint,2) & + + (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) + ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) * v_1b_square_grad(ipoint,3) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & + , int2_u2_j1b2(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & + , 0.d0, I2, ao_num*ao_num) + + ! --- + + deallocate(b_mat) + + accu = 0.d0 + norm = 0.d0 + do i = 1, ao_num + do k = 1, ao_num + do l = 1, ao_num + do j = 1, ao_num + + old = I1(j,l,k,i) + new = I2(j,l,k,i) + + !print*, l, k, j, i + !print*, old, new + + diff = new - old + if(dabs(diff) .gt. eps) then + print*, ' problem on :', j, l, k, i + print*, ' diff = ', diff + print*, ' old value = ', old + print*, ' new value = ', new + call I_grade_gradu_naive1(i, j, k, l, int_num) + print*, ' full num1 = ', int_num + call I_grade_gradu_naive2(i, j, k, l, int_num) + print*, ' full num2 = ', int_num + call I_grade_gradu_naive3(i, j, k, l, int_num) + print*, ' full num3 = ', int_num + call I_grade_gradu_naive4(i, j, k, l, int_num) + print*, ' full num4 = ', int_num + call I_grade_gradu_seminaive(i, j, k, l, int_num) + print*, ' semi num = ', int_num + endif + + accu += dabs(diff) + norm += dabs(old) + enddo + enddo + enddo + enddo + + deallocate(I1, I2) + + print*, ' accu = ', accu + print*, ' norm = ', norm + + return +end subroutine test_ipp + +! --- + +subroutine I_grade_gradu_naive1(i, j, k, l, int) + + implicit none + integer, intent(in) :: i, j, k, l + double precision, intent(out) :: int + integer :: ipoint, jpoint + double precision :: r1(3), r2(3) + double precision :: weight1_x, weight1_y, weight1_z + double precision :: weight2_x, weight2_y, weight2_z + double precision :: aor_i, aor_j, aor_k, aor_l + double precision :: e1_val, e2_val, e1_der(3), u12_val, u12_der(3) + double precision, external :: j1b_nucl, j12_mu + + int = 0.d0 + + do ipoint = 1, n_points_final_grid ! r1 + + r1(1) = final_grid_points(1,ipoint) + r1(2) = final_grid_points(2,ipoint) + r1(3) = final_grid_points(3,ipoint) + + aor_i = aos_in_r_array_transp(ipoint,i) + aor_k = aos_in_r_array_transp(ipoint,k) + + e1_val = j1b_nucl(r1) + call grad1_j1b_nucl(r1, e1_der) + + weight1_x = aor_i * aor_k * e1_val * final_weight_at_r_vector(ipoint) * e1_der(1) + weight1_y = aor_i * aor_k * e1_val * final_weight_at_r_vector(ipoint) * e1_der(2) + weight1_z = aor_i * aor_k * e1_val * final_weight_at_r_vector(ipoint) * e1_der(3) + + do jpoint = 1, n_points_extra_final_grid ! r2 + + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + + aor_j = aos_in_r_array_extra_transp(jpoint,j) + aor_l = aos_in_r_array_extra_transp(jpoint,l) + + e2_val = j1b_nucl(r2) + + u12_val = j12_mu(r1, r2) + call grad1_j12_mu(r1, r2, u12_der) + + weight2_x = aor_j * aor_l * e2_val * e2_val * u12_val * final_weight_at_r_vector_extra(jpoint) * u12_der(1) + weight2_y = aor_j * aor_l * e2_val * e2_val * u12_val * final_weight_at_r_vector_extra(jpoint) * u12_der(2) + weight2_z = aor_j * aor_l * e2_val * e2_val * u12_val * final_weight_at_r_vector_extra(jpoint) * u12_der(3) + + int = int - (weight1_x * weight2_x + weight1_y * weight2_y + weight1_z * weight2_z) + enddo + enddo + + return +end subroutine I_grade_gradu_naive1 + +! --- + +subroutine I_grade_gradu_naive2(i, j, k, l, int) + + implicit none + integer, intent(in) :: i, j, k, l + double precision, intent(out) :: int + integer :: ipoint, jpoint + double precision :: r1(3), r2(3) + double precision :: weight1_x, weight1_y, weight1_z + double precision :: weight2_x, weight2_y, weight2_z + double precision :: aor_i, aor_j, aor_k, aor_l + double precision :: e1_square_der(3), e2_val, u12_square_der(3) + double precision, external :: j1b_nucl + + int = 0.d0 + + do ipoint = 1, n_points_final_grid ! r1 + + r1(1) = final_grid_points(1,ipoint) + r1(2) = final_grid_points(2,ipoint) + r1(3) = final_grid_points(3,ipoint) + + aor_i = aos_in_r_array_transp(ipoint,i) + aor_k = aos_in_r_array_transp(ipoint,k) + + call grad1_j1b_nucl_square_num(r1, e1_square_der) + + weight1_x = aor_i * aor_k * final_weight_at_r_vector(ipoint) * e1_square_der(1) + weight1_y = aor_i * aor_k * final_weight_at_r_vector(ipoint) * e1_square_der(2) + weight1_z = aor_i * aor_k * final_weight_at_r_vector(ipoint) * e1_square_der(3) + + do jpoint = 1, n_points_extra_final_grid ! r2 + + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + + aor_j = aos_in_r_array_extra_transp(jpoint,j) + aor_l = aos_in_r_array_extra_transp(jpoint,l) + + e2_val = j1b_nucl(r2) + call grad1_j12_mu_square_num(r1, r2, u12_square_der) + + weight2_x = aor_j * aor_l * e2_val * e2_val * final_weight_at_r_vector_extra(jpoint) * u12_square_der(1) + weight2_y = aor_j * aor_l * e2_val * e2_val * final_weight_at_r_vector_extra(jpoint) * u12_square_der(2) + weight2_z = aor_j * aor_l * e2_val * e2_val * final_weight_at_r_vector_extra(jpoint) * u12_square_der(3) + + int = int - 0.25d0 * (weight1_x * weight2_x + weight1_y * weight2_y + weight1_z * weight2_z) + enddo + enddo + + return +end subroutine I_grade_gradu_naive2 + +! --- + +subroutine I_grade_gradu_naive3(i, j, k, l, int) + + implicit none + integer, intent(in) :: i, j, k, l + double precision, intent(out) :: int + integer :: ipoint, jpoint + double precision :: r1(3), r2(3) + double precision :: weight1, weight2 + double precision :: aor_j, aor_l + double precision :: grad(3), e2_val, u12_val + double precision, external :: j1b_nucl, j12_mu + + int = 0.d0 + + do ipoint = 1, n_points_final_grid ! r1 + + r1(1) = final_grid_points(1,ipoint) + r1(2) = final_grid_points(2,ipoint) + r1(3) = final_grid_points(3,ipoint) + + call grad1_aos_ik_grad1_esquare(i, k, r1, grad) + + weight1 = final_weight_at_r_vector(ipoint) * (grad(1) + grad(2) + grad(3)) + + do jpoint = 1, n_points_extra_final_grid ! r2 + + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + + aor_j = aos_in_r_array_extra_transp(jpoint,j) + aor_l = aos_in_r_array_extra_transp(jpoint,l) + + e2_val = j1b_nucl(r2) + u12_val = j12_mu(r1, r2) + + weight2 = aor_j * aor_l * e2_val * e2_val * u12_val * u12_val * final_weight_at_r_vector_extra(jpoint) + + int = int + 0.25d0 * weight1 * weight2 + enddo + enddo + + return +end subroutine I_grade_gradu_naive3 + +! --- + +subroutine I_grade_gradu_naive4(i, j, k, l, int) + + implicit none + integer, intent(in) :: i, j, k, l + double precision, intent(out) :: int + integer :: ipoint, jpoint + double precision :: r1(3), r2(3) + double precision :: weight1, weight2 + double precision :: aor_j, aor_l, aor_k, aor_i + double precision :: grad(3), e2_val, u12_val + double precision, external :: j1b_nucl, j12_mu + + int = 0.d0 + + do ipoint = 1, n_points_final_grid ! r1 + + r1(1) = final_grid_points(1,ipoint) + r1(2) = final_grid_points(2,ipoint) + r1(3) = final_grid_points(3,ipoint) + + aor_i = aos_in_r_array_transp(ipoint,i) + aor_k = aos_in_r_array_transp(ipoint,k) + + weight1 = final_weight_at_r_vector(ipoint) * ( aor_k * aor_i * v_1b_square_lapl(ipoint) & + + (aor_k * aos_grad_in_r_array_transp_bis(ipoint,i,1) + aor_i * aos_grad_in_r_array_transp_bis(ipoint,k,1)) * v_1b_square_grad(ipoint,1) & + + (aor_k * aos_grad_in_r_array_transp_bis(ipoint,i,2) + aor_i * aos_grad_in_r_array_transp_bis(ipoint,k,2)) * v_1b_square_grad(ipoint,2) & + + (aor_k * aos_grad_in_r_array_transp_bis(ipoint,i,3) + aor_i * aos_grad_in_r_array_transp_bis(ipoint,k,3)) * v_1b_square_grad(ipoint,3) ) + + do jpoint = 1, n_points_extra_final_grid ! r2 + + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + + aor_j = aos_in_r_array_extra_transp(jpoint,j) + aor_l = aos_in_r_array_extra_transp(jpoint,l) + + e2_val = j1b_nucl(r2) + u12_val = j12_mu(r1, r2) + + weight2 = aor_j * aor_l * e2_val * e2_val * u12_val * u12_val * final_weight_at_r_vector_extra(jpoint) + + int = int + 0.25d0 * weight1 * weight2 + enddo + enddo + + return +end subroutine I_grade_gradu_naive4 + +! --- + +subroutine I_grade_gradu_seminaive(i, j, k, l, int) + + implicit none + integer, intent(in) :: i, j, k, l + double precision, intent(out) :: int + integer :: ipoint + double precision :: r1(3) + double precision :: weight1 + double precision :: aor_i, aor_k + + int = 0.d0 + + do ipoint = 1, n_points_final_grid ! r1 + + aor_i = aos_in_r_array_transp(ipoint,i) + aor_k = aos_in_r_array_transp(ipoint,k) + + weight1 = 0.25d0 * final_weight_at_r_vector(ipoint) * ( aor_k * aor_i * v_1b_square_lapl(ipoint) & + + (aor_k * aos_grad_in_r_array_transp_bis(ipoint,i,1) + aor_i * aos_grad_in_r_array_transp_bis(ipoint,k,1)) * v_1b_square_grad(ipoint,1) & + + (aor_k * aos_grad_in_r_array_transp_bis(ipoint,i,2) + aor_i * aos_grad_in_r_array_transp_bis(ipoint,k,2)) * v_1b_square_grad(ipoint,2) & + + (aor_k * aos_grad_in_r_array_transp_bis(ipoint,i,3) + aor_i * aos_grad_in_r_array_transp_bis(ipoint,k,3)) * v_1b_square_grad(ipoint,3) ) + + int = int + weight1 * int2_u2_j1b2(j,l,ipoint) + enddo + + return +end subroutine I_grade_gradu_seminaive + +! --- + +subroutine aos_ik_grad1_esquare(i, k, r1, val) + + implicit none + integer, intent(in) :: i, k + double precision, intent(in) :: r1(3) + double precision, intent(out) :: val(3) + double precision :: tmp + double precision :: der(3), aos_array(ao_num), aos_grad_array(3,ao_num) + + call give_all_aos_and_grad_at_r(r1, aos_array, aos_grad_array) + call grad1_j1b_nucl_square_num(r1, der) + + tmp = aos_array(i) * aos_array(k) + val(1) = tmp * der(1) + val(2) = tmp * der(2) + val(3) = tmp * der(3) + + return +end subroutine phi_ik_grad1_esquare + +! --- + +subroutine grad1_aos_ik_grad1_esquare(i, k, r1, grad) + + implicit none + integer, intent(in) :: i, k + double precision, intent(in) :: r1(3) + double precision, intent(out) :: grad(3) + double precision :: r(3), eps, tmp_eps, val_p(3), val_m(3) + + eps = 1d-5 + tmp_eps = 0.5d0 / eps + + r(1:3) = r1(1:3) + + r(1) = r(1) + eps + call aos_ik_grad1_esquare(i, k, r, val_p) + r(1) = r(1) - 2.d0 * eps + call aos_ik_grad1_esquare(i, k, r, val_m) + r(1) = r(1) + eps + grad(1) = tmp_eps * (val_p(1) - val_m(1)) + + r(2) = r(2) + eps + call aos_ik_grad1_esquare(i, k, r, val_p) + r(2) = r(2) - 2.d0 * eps + call aos_ik_grad1_esquare(i, k, r, val_m) + r(2) = r(2) + eps + grad(2) = tmp_eps * (val_p(2) - val_m(2)) + + r(3) = r(3) + eps + call aos_ik_grad1_esquare(i, k, r, val_p) + r(3) = r(3) - 2.d0 * eps + call aos_ik_grad1_esquare(i, k, r, val_m) + r(3) = r(3) + eps + grad(3) = tmp_eps * (val_p(3) - val_m(3)) + + return +end subroutine grad1_aos_ik_grad1_esquare + +! --- + + + + + + + diff --git a/src/tc_scf/tc_scf_energy.irp.f b/src/tc_scf/tc_scf_energy.irp.f index 5c643f19..833b48aa 100644 --- a/src/tc_scf/tc_scf_energy.irp.f +++ b/src/tc_scf/tc_scf_energy.irp.f @@ -1,5 +1,5 @@ - BEGIN_PROVIDER [ double precision, TC_HF_energy] + BEGIN_PROVIDER [ double precision, TC_HF_energy ] &BEGIN_PROVIDER [ double precision, TC_HF_one_e_energy] &BEGIN_PROVIDER [ double precision, TC_HF_two_e_energy] From 2d06e8fdaefff2905933a0b5ff52e93f8193f0a7 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 9 May 2023 10:52:36 +0200 Subject: [PATCH 078/337] Cholesky false by default --- external/qp2-dependencies | 2 +- src/ao_two_e_ints/EZFIO.cfg | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/external/qp2-dependencies b/external/qp2-dependencies index e0d0e02e..6e23ebac 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit e0d0e02e9f5ece138d1520106954a881ab0b8db2 +Subproject commit 6e23ebac001acae91d1c762ca934e09a9b7d614a diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg index caed4698..4ab080ec 100644 --- a/src/ao_two_e_ints/EZFIO.cfg +++ b/src/ao_two_e_ints/EZFIO.cfg @@ -22,4 +22,4 @@ ezfio_name: direct type: logical doc: Perform Cholesky decomposition of AO integrals interface: ezfio,provider,ocaml -default: True +default: False From 79182e30230eebd7a1ba651539b497bcc3b8892a Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Tue, 9 May 2023 15:49:34 +0200 Subject: [PATCH 079/337] added use_ipp keyword --- src/non_h_ints_mu/grad_squared.irp.f | 2 +- src/non_h_ints_mu/tc_integ.irp.f | 58 +++++++++++++++++++--------- src/tc_keywords/EZFIO.cfg | 5 +++ 3 files changed, 46 insertions(+), 19 deletions(-) diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f index b4ddb606..debca4ce 100644 --- a/src/non_h_ints_mu/grad_squared.irp.f +++ b/src/non_h_ints_mu/grad_squared.irp.f @@ -403,7 +403,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao ! --- - if((j1b_type .eq. 3) .or. (j1b_type .eq. 4)) then + if(((j1b_type .eq. 3) .or. (j1b_type .eq. 4)) .and. use_ipp) then ! an additional term is added here directly instead of ! being added in int2_grad1_u12_square_ao for performance diff --git a/src/non_h_ints_mu/tc_integ.irp.f b/src/non_h_ints_mu/tc_integ.irp.f index efa9be43..d5995ae5 100644 --- a/src/non_h_ints_mu/tc_integ.irp.f +++ b/src/non_h_ints_mu/tc_integ.irp.f @@ -219,28 +219,50 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p !$OMP END DO !$OMP END PARALLEL - elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 4)) then + elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 4)) then - ! the term u12_grad1_u12_j1b_grad1_j1b is added directly for performance - !PROVIDE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12 - PROVIDE u12sq_j1bsq grad12_j12 + if(use_ipp) then - int2_grad1_u12_square_ao = 0.d0 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, j, ipoint) & - !$OMP SHARED (int2_grad1_u12_square_ao, ao_num, n_points_final_grid, u12sq_j1bsq, grad12_j12) - !$OMP DO SCHEDULE (static) - do ipoint = 1, n_points_final_grid - do j = 1, ao_num - do i = 1, ao_num - !int2_grad1_u12_square_ao(i,j,ipoint) = u12sq_j1bsq(i,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) + 0.5d0 * grad12_j12(i,j,ipoint) - int2_grad1_u12_square_ao(i,j,ipoint) = u12sq_j1bsq(i,j,ipoint) + 0.5d0 * grad12_j12(i,j,ipoint) + ! the term u12_grad1_u12_j1b_grad1_j1b is added directly for performance + PROVIDE u12sq_j1bsq grad12_j12 + + int2_grad1_u12_square_ao = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, ipoint) & + !$OMP SHARED (int2_grad1_u12_square_ao, ao_num, n_points_final_grid, u12sq_j1bsq, grad12_j12) + !$OMP DO SCHEDULE (static) + do ipoint = 1, n_points_final_grid + do j = 1, ao_num + do i = 1, ao_num + int2_grad1_u12_square_ao(i,j,ipoint) = u12sq_j1bsq(i,j,ipoint) + 0.5d0 * grad12_j12(i,j,ipoint) + enddo enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL + + else + + PROVIDE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12 + + int2_grad1_u12_square_ao = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, ipoint) & + !$OMP SHARED (int2_grad1_u12_square_ao, ao_num, n_points_final_grid, u12sq_j1bsq, grad12_j12, u12_grad1_u12_j1b_grad1_j1b) + !$OMP DO SCHEDULE (static) + do ipoint = 1, n_points_final_grid + do j = 1, ao_num + do i = 1, ao_num + int2_grad1_u12_square_ao(i,j,ipoint) = u12sq_j1bsq(i,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) + 0.5d0 * grad12_j12(i,j,ipoint) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + endif elseif(j1b_type .ge. 100) then diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index 41c07d0b..c2d1a048 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -220,4 +220,9 @@ doc: If |true|, save the bi-ortho wave functions in a sorted way interface: ezfio,provider,ocaml default: True +[use_ipp] +type: logical +doc: If |true|, use Manu IPP for j1b_type=3,4 +interface: ezfio,provider,ocaml +default: True From f314c5abc291144eab0d76a591e73166ce90fa05 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 10 May 2023 11:14:57 +0200 Subject: [PATCH 080/337] Added qp_json --- scripts/qp_exc_energy.py | 18 ++++++----- scripts/utility/qp_json.py | 66 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 8 deletions(-) create mode 100644 scripts/utility/qp_json.py diff --git a/scripts/qp_exc_energy.py b/scripts/qp_exc_energy.py index ba9d7917..7e7f1d67 100755 --- a/scripts/qp_exc_energy.py +++ b/scripts/qp_exc_energy.py @@ -42,13 +42,15 @@ import sys, os import scipy import scipy.stats from math import sqrt, gamma, exp -import json +import qp_json -def read_data(filename,state): +def read_data(ezfio_filename,state): """ Read energies and PT2 from input file """ - with open(filename,'r') as f: - lines = json.load(f)['fci'] + data = qp_json.load_last(ezfio_filename) + for method in data.keys(): + x = data[method] + lines = x print(f"State: {state}") @@ -138,15 +140,15 @@ def compute(data): return mu, err, bias, p -filename = sys.argv[1] -print(filename) +ezfio_filename = sys.argv[1] +print(ezfio_filename) if len(sys.argv) > 2: state = int(sys.argv[2]) else: state = 1 -data = read_data(filename,state) +data = read_data(ezfio_filename,state) mu, err, bias, _ = compute(data) -print(" %s: %8.3f +/- %5.3f eV\n"%(filename, mu, err)) +print(" %s: %8.3f +/- %5.3f eV\n"%(ezfio_filename, mu, err)) import numpy as np A = np.array( [ [ data[-1][1], 1. ], diff --git a/scripts/utility/qp_json.py b/scripts/utility/qp_json.py new file mode 100644 index 00000000..09ffe1be --- /dev/null +++ b/scripts/utility/qp_json.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python +import os +import json + +def fix_json(s): + """Properly termitates an incomplete JSON file""" + + s = s.replace(' ','') + s = s.replace('\n','') + s = s.replace('\t','') + s = s.replace(",{}",'') + tmp = [ c for c in s if c in "[]{}" ] + tmp = "".join(tmp) + tmp_old = "" + while tmp != tmp_old: + tmp_old = tmp + tmp = tmp.replace("{}","") + tmp = tmp.replace("[]","") + while s[-1] in [ ',', '\n', ' ', '\t' ]: + s = s[:-1] + tmp = [ c for c in tmp ] + tmp.reverse() + for c in tmp: + if c == '[': s += "]" + elif c == '{': s += "}" + return s + + +def load(filename): + """Loads a JSON file after calling the fix_json function.""" + with open(filename,'r') as f: + data = f.read() + new_data = fix_json(data) + return json.loads(new_data) + + +def load_all(ezfio_filename): + """Loads all JSON files of an EZFIO.""" + d = {} + prefix = ezfio_filename+'/json/' + for filename in [ x for x in os.listdir(prefix) if x.endswith(".json")]: + d[filename] = load(prefix+filename) + return d + + +def load_last(ezfio_filename): + """Loads last JSON file of an EZFIO.""" + d = {} + prefix = ezfio_filename+'/json/' + l = [ x for x in os.listdir(prefix) if x.endswith(".json")] + l.sort() + filename = l[-1] + print(filename) + return load(prefix+filename) + + +def fix(ezfio_filename): + """Fixes all JSON files in an EZFIO.""" + d = load_all(ezfio_filename) + prefix = ezfio_filename+'/json/' + for filename in d.keys(): + with open(prefix+filename, 'w') as json_file: + json.dump(d[filename], json_file) + + + From 20bed4f44a85e5717ac535cd98360d55119518ac Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 10 May 2023 12:22:56 +0200 Subject: [PATCH 081/337] Fix reversed print of minimum PT2 in extrapolations --- src/iterations/print_extrapolation.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/iterations/print_extrapolation.irp.f b/src/iterations/print_extrapolation.irp.f index 111429bf..a7f85693 100644 --- a/src/iterations/print_extrapolation.irp.f +++ b/src/iterations/print_extrapolation.irp.f @@ -25,7 +25,7 @@ subroutine print_extrapolated_energy write(*,*) 'minimum PT2 ', 'Extrapolated energy' write(*,*) '=========== ', '===================' do k=2,N_iter_p - write(*,'(F11.4,2X,F18.8)') pt2_iterations(1,k), extrapolated_energy(k,1) + write(*,'(F11.4,2X,F18.8)') pt2_iterations(1,N_iter_p+1-k), extrapolated_energy(k,1) enddo write(*,*) '=========== ', '===================' From 52da1de877934f398d76d557457a0b1ae5c8e345 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 10 May 2023 12:54:02 +0200 Subject: [PATCH 082/337] qp_extract_cipsi_data.py uses qp_json --- scripts/qp_extract_cipsi_data.py | 80 +++++++++++++------------------- 1 file changed, 31 insertions(+), 49 deletions(-) diff --git a/scripts/qp_extract_cipsi_data.py b/scripts/qp_extract_cipsi_data.py index 70935d73..200ab7aa 100755 --- a/scripts/qp_extract_cipsi_data.py +++ b/scripts/qp_extract_cipsi_data.py @@ -1,55 +1,37 @@ #!/usr/bin/env python3 -import re +import qp_json import sys -# Read output file -with open(sys.argv[1], 'r') as file: - output = file.read() +if len(sys.argv) == 1: + print(f"syntax: {sys.argv[0]} EZFIO_FILE") + +d = qp_json.load_all(sys.argv[1]) + +k = [ x for x in d.keys() ] +k.sort() + +print("# Energy PT2 PT2_err rPT2 rPT2_err exFCI\n") +for f in k: + try: + j = d[f]["fci"] + except: + continue + + print(f"# {f}") + for e in j: + + out = f" {e['n_det']:8d}" + + nstates = len(e["states"]) + for ee in e["states"]: + try: + exc_energy = ee['ex_energy'][0] + except: + exc_energy = 0. + out += f" {ee['energy']:16.8f} {ee['pt2']:e} {ee['pt2_err']:e} {ee['rpt2']:e} {ee['rpt2_err']:e} {exc_energy:16.8f}" + print(out) + + print("\n") -def extract_data(output): - lines = output.split("\n") - data = [] - - n_det = None - e = None - pt2 = None - err_pt2 = None - rpt2 = None - err_rpt2 = None - e_ex = None - - - reading = False - for iline, line in enumerate(lines): - if line.startswith("Summary at N_det"): - reading = False - - if not reading and line.startswith(" N_det "): - n_det = int(re.search(r"N_det\s+=\s+(\d+)", line).group(1)) - reading = True - - if reading: - if line.startswith(" E "): - e = float(re.search(r"E\s+=\s+(-?\d+\.\d+)", line).group(1)) - elif line.startswith(" PT2 "): - pt2 = float(re.search(r"PT2\s+=\s+(-?\d+\.\d+E?.\d*)", line).group(1)) - err_pt2 = float(re.search(r"\+/-\s+(-?\d+\.\d+E?.\d*)", line).group(1)) - elif line.startswith(" rPT2 "): - rpt2 = float(re.search(r"rPT2\s+=\s+(-?\d+\.\d+E?.\d*)", line).group(1)) - err_rpt2 = float(re.search(r"\+/-\s+(-?\d+\.\d+E?.\d*)", line).group(1)) - elif "minimum PT2 Extrapolated energy" in line: - e_ex_line = lines[iline+2] - e_ex = float(e_ex_line.split()[1]) - reading = False - new_data = " {:8d} {:16.8f} {:e} {:e} {:e} {:e} {:16.8f}".format(n_det, e, pt2, err_pt2, rpt2, err_rpt2, e_ex) - data.append(new_data) - n_det = e = pt2 = err_pt2 = rpt2 = err_rpt2 = e_ex = None - - return data - -data = extract_data(output) - -for item in data: - print(item) From 46e3faed3cdc90fa3c6a82bbd40655378b438ea7 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 10 May 2023 14:44:45 +0200 Subject: [PATCH 083/337] allow no basis set --- data/basis/none | 5 + ocaml/Input_ao_basis.ml | 8 +- ocaml/qp_create_ezfio.ml | 262 ++++++++++++----------- scripts/ezfio_interface/qp_edit_template | 36 ++-- src/ao_two_e_ints/EZFIO.cfg | 2 +- src/mo_basis/utils.irp.f | 5 +- 6 files changed, 161 insertions(+), 157 deletions(-) create mode 100644 data/basis/none diff --git a/data/basis/none b/data/basis/none new file mode 100644 index 00000000..df5d59f1 --- /dev/null +++ b/data/basis/none @@ -0,0 +1,5 @@ +$DATA + +HYDROGEN + +$END diff --git a/ocaml/Input_ao_basis.ml b/ocaml/Input_ao_basis.ml index 95d37a7a..841089ea 100644 --- a/ocaml/Input_ao_basis.ml +++ b/ocaml/Input_ao_basis.ml @@ -247,8 +247,7 @@ end = struct let read () = - if (Ezfio.has_ao_basis_ao_basis ()) then - begin + try let result = { ao_basis = read_ao_basis (); ao_num = read_ao_num () ; @@ -267,9 +266,8 @@ end = struct |> MD5.to_string |> Ezfio.set_ao_basis_ao_md5 ; Some result - end - else - None + with + | _ -> (Ezfio.set_ao_basis_ao_md5 "None" ; None) ;; diff --git a/ocaml/qp_create_ezfio.ml b/ocaml/qp_create_ezfio.ml index 4583b118..8e452762 100644 --- a/ocaml/qp_create_ezfio.ml +++ b/ocaml/qp_create_ezfio.ml @@ -478,6 +478,7 @@ let run ?o b au c d m p cart xyz_file = let nmax = Nucl_number.get_max () in + let rec do_work (accu:(Atom.t*Nucl_number.t) list) (n:int) = function | [] -> accu | e::tail -> @@ -520,141 +521,144 @@ let run ?o b au c d m p cart xyz_file = in let long_basis = Long_basis.of_basis basis in let ao_num = List.length long_basis in - Ezfio.set_ao_basis_ao_num ao_num; - Ezfio.set_ao_basis_ao_basis b; - Ezfio.set_basis_basis b; - let ao_prim_num = list_map (fun (_,g,_) -> List.length g.Gto.lc) long_basis - and ao_nucl = list_map (fun (_,_,n) -> Nucl_number.to_int n) long_basis - and ao_power= - let l = list_map (fun (x,_,_) -> x) long_basis in - (list_map (fun t -> Positive_int.to_int Angmom.Xyz.(t.x)) l)@ - (list_map (fun t -> Positive_int.to_int Angmom.Xyz.(t.y)) l)@ - (list_map (fun t -> Positive_int.to_int Angmom.Xyz.(t.z)) l) - in - let ao_prim_num_max = List.fold_left (fun s x -> - if x > s then x - else s) 0 ao_prim_num - in - let gtos = - list_map (fun (_,x,_) -> x) long_basis - in - - let create_expo_coef ec = - let coefs = - begin match ec with - | `Coefs -> list_map (fun x-> - list_map (fun (_,coef) -> - AO_coef.to_float coef) x.Gto.lc) gtos - | `Expos -> list_map (fun x-> - list_map (fun (prim,_) -> AO_expo.to_float - prim.GaussianPrimitive.expo) x.Gto.lc) gtos - end + if ao_num > 0 then + begin + Ezfio.set_ao_basis_ao_num ao_num; + Ezfio.set_ao_basis_ao_basis b; + Ezfio.set_basis_basis b; + let ao_prim_num = list_map (fun (_,g,_) -> List.length g.Gto.lc) long_basis + and ao_nucl = list_map (fun (_,_,n) -> Nucl_number.to_int n) long_basis + and ao_power= + let l = list_map (fun (x,_,_) -> x) long_basis in + (list_map (fun t -> Positive_int.to_int Angmom.Xyz.(t.x)) l)@ + (list_map (fun t -> Positive_int.to_int Angmom.Xyz.(t.y)) l)@ + (list_map (fun t -> Positive_int.to_int Angmom.Xyz.(t.z)) l) in - let rec get_n n accu = function - | [] -> List.rev accu - | h::tail -> - let y = - begin match List.nth_opt h n with - | Some x -> x - | None -> 0. + let ao_prim_num_max = List.fold_left (fun s x -> + if x > s then x + else s) 0 ao_prim_num + in + let gtos = + list_map (fun (_,x,_) -> x) long_basis + in + + let create_expo_coef ec = + let coefs = + begin match ec with + | `Coefs -> list_map (fun x-> + list_map (fun (_,coef) -> + AO_coef.to_float coef) x.Gto.lc) gtos + | `Expos -> list_map (fun x-> + list_map (fun (prim,_) -> AO_expo.to_float + prim.GaussianPrimitive.expo) x.Gto.lc) gtos end - in - get_n n (y::accu) tail + in + let rec get_n n accu = function + | [] -> List.rev accu + | h::tail -> + let y = + begin match List.nth_opt h n with + | Some x -> x + | None -> 0. + end + in + get_n n (y::accu) tail + in + let rec build accu = function + | n when n=ao_prim_num_max -> accu + | n -> build ( accu @ (get_n n [] coefs) ) (n+1) + in + build [] 0 in - let rec build accu = function - | n when n=ao_prim_num_max -> accu - | n -> build ( accu @ (get_n n [] coefs) ) (n+1) - in - build [] 0 - in - let ao_coef = create_expo_coef `Coefs - and ao_expo = create_expo_coef `Expos - in - let () = - let shell_num = List.length basis in - let lc : (GaussianPrimitive.t * Qptypes.AO_coef.t) list list = - list_map ( fun (g,_) -> g.Gto.lc ) basis - in - let ang_mom = - list_map (fun (l : (GaussianPrimitive.t * Qptypes.AO_coef.t) list) -> - let x, _ = List.hd l in - Angmom.to_l x.GaussianPrimitive.sym |> Qptypes.Positive_int.to_int - ) lc - in - let expo = - list_map (fun l -> list_map (fun (x,_) -> Qptypes.AO_expo.to_float x.GaussianPrimitive.expo) l ) lc - |> List.concat - in - let coef = - list_map (fun l -> - list_map (fun (_,x) -> Qptypes.AO_coef.to_float x) l - ) lc - |> List.concat - in - let shell_prim_num = - list_map List.length lc - in - let shell_idx = - let rec make_list n accu = function - | 0 -> accu - | i -> make_list n (n :: accu) (i-1) + let ao_coef = create_expo_coef `Coefs + and ao_expo = create_expo_coef `Expos in - let rec aux count accu = function - | [] -> List.rev accu - | l::rest -> - let new_l = make_list count accu (List.length l) in - aux (count+1) new_l rest - in - aux 1 [] lc - in - let prim_num = List.length coef in - Ezfio.set_basis_typ "Gaussian"; - Ezfio.set_basis_shell_num shell_num; - Ezfio.set_basis_prim_num prim_num ; - Ezfio.set_basis_shell_prim_num (Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| shell_num |] ~data:shell_prim_num); - Ezfio.set_basis_shell_ang_mom (Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| shell_num |] ~data:ang_mom ) ; - Ezfio.set_basis_shell_index (Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| prim_num |] ~data:shell_idx) ; - Ezfio.set_basis_basis_nucleus_index (Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| shell_num |] - ~data:( list_map (fun (_,n) -> Nucl_number.to_int n) basis) - ) ; - Ezfio.set_basis_nucleus_shell_num(Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| nucl_num |] - ~data:( - list_map (fun (_,n) -> Nucl_number.to_int n) basis - |> List.fold_left (fun accu i -> - match accu with - | [] -> [(1,i)] - | (h,j) :: rest -> if j == i then ((h+1,j)::rest) else ((1,i)::(h,j)::rest) - ) [] - |> List.rev - |> List.map fst - )) ; - Ezfio.set_basis_prim_coef (Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| prim_num |] ~data:coef) ; - Ezfio.set_basis_prim_expo (Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| prim_num |] ~data:expo) ; + let () = + let shell_num = List.length basis in + let lc : (GaussianPrimitive.t * Qptypes.AO_coef.t) list list = + list_map ( fun (g,_) -> g.Gto.lc ) basis + in + let ang_mom = + list_map (fun (l : (GaussianPrimitive.t * Qptypes.AO_coef.t) list) -> + let x, _ = List.hd l in + Angmom.to_l x.GaussianPrimitive.sym |> Qptypes.Positive_int.to_int + ) lc + in + let expo = + list_map (fun l -> list_map (fun (x,_) -> Qptypes.AO_expo.to_float x.GaussianPrimitive.expo) l ) lc + |> List.concat + in + let coef = + list_map (fun l -> + list_map (fun (_,x) -> Qptypes.AO_coef.to_float x) l + ) lc + |> List.concat + in + let shell_prim_num = + list_map List.length lc + in + let shell_idx = + let rec make_list n accu = function + | 0 -> accu + | i -> make_list n (n :: accu) (i-1) + in + let rec aux count accu = function + | [] -> List.rev accu + | l::rest -> + let new_l = make_list count accu (List.length l) in + aux (count+1) new_l rest + in + aux 1 [] lc + in + let prim_num = List.length coef in + Ezfio.set_basis_typ "Gaussian"; + Ezfio.set_basis_shell_num shell_num; + Ezfio.set_basis_prim_num prim_num ; + Ezfio.set_basis_shell_prim_num (Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| shell_num |] ~data:shell_prim_num); + Ezfio.set_basis_shell_ang_mom (Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| shell_num |] ~data:ang_mom ) ; + Ezfio.set_basis_shell_index (Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| prim_num |] ~data:shell_idx) ; + Ezfio.set_basis_basis_nucleus_index (Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| shell_num |] + ~data:( list_map (fun (_,n) -> Nucl_number.to_int n) basis) + ) ; + Ezfio.set_basis_nucleus_shell_num(Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| nucl_num |] + ~data:( + list_map (fun (_,n) -> Nucl_number.to_int n) basis + |> List.fold_left (fun accu i -> + match accu with + | [] -> [(1,i)] + | (h,j) :: rest -> if j == i then ((h+1,j)::rest) else ((1,i)::(h,j)::rest) + ) [] + |> List.rev + |> List.map fst + )) ; + Ezfio.set_basis_prim_coef (Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| prim_num |] ~data:coef) ; + Ezfio.set_basis_prim_expo (Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| prim_num |] ~data:expo) ; - Ezfio.set_ao_basis_ao_prim_num (Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| ao_num |] ~data:ao_prim_num) ; - Ezfio.set_ao_basis_ao_nucl(Ezfio.ezfio_array_of_list - ~rank:1 ~dim:[| ao_num |] ~data:ao_nucl) ; - Ezfio.set_ao_basis_ao_power(Ezfio.ezfio_array_of_list - ~rank:2 ~dim:[| ao_num ; 3 |] ~data:ao_power) ; - Ezfio.set_ao_basis_ao_coef(Ezfio.ezfio_array_of_list - ~rank:2 ~dim:[| ao_num ; ao_prim_num_max |] ~data:ao_coef) ; - Ezfio.set_ao_basis_ao_expo(Ezfio.ezfio_array_of_list - ~rank:2 ~dim:[| ao_num ; ao_prim_num_max |] ~data:ao_expo) ; - Ezfio.set_ao_basis_ao_cartesian(cart); - in - match Input.Ao_basis.read () with - | None -> failwith "Error in basis" - | Some x -> Input.Ao_basis.write x + Ezfio.set_ao_basis_ao_prim_num (Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| ao_num |] ~data:ao_prim_num) ; + Ezfio.set_ao_basis_ao_nucl(Ezfio.ezfio_array_of_list + ~rank:1 ~dim:[| ao_num |] ~data:ao_nucl) ; + Ezfio.set_ao_basis_ao_power(Ezfio.ezfio_array_of_list + ~rank:2 ~dim:[| ao_num ; 3 |] ~data:ao_power) ; + Ezfio.set_ao_basis_ao_coef(Ezfio.ezfio_array_of_list + ~rank:2 ~dim:[| ao_num ; ao_prim_num_max |] ~data:ao_coef) ; + Ezfio.set_ao_basis_ao_expo(Ezfio.ezfio_array_of_list + ~rank:2 ~dim:[| ao_num ; ao_prim_num_max |] ~data:ao_expo) ; + Ezfio.set_ao_basis_ao_cartesian(cart); + in + match Input.Ao_basis.read () with + | None -> failwith "Error in basis" + | Some x -> Input.Ao_basis.write x + end in let () = try write_file () with @@ -781,7 +785,7 @@ If a file with the same name as the basis set exists, this file will be read. O run ?o:output basis au charge dummy multiplicity pseudo cart xyz_filename ) with - | Failure txt -> Printf.eprintf "Fatal error: %s\n%!" txt +(* | Failure txt -> Printf.eprintf "Fatal error: %s\n%!" txt *) | Command_line.Error txt -> Printf.eprintf "Command line error: %s\n%!" txt diff --git a/scripts/ezfio_interface/qp_edit_template b/scripts/ezfio_interface/qp_edit_template index 4218456d..fe718a50 100644 --- a/scripts/ezfio_interface/qp_edit_template +++ b/scripts/ezfio_interface/qp_edit_template @@ -172,25 +172,23 @@ let run check_only ?ndet ?state ezfio_filename = (* Reorder basis set *) begin - let aos = - match Input.Ao_basis.read() with - | Some x -> x - | _ -> assert false - in - let ordering = Input.Ao_basis.ordering aos in - let test = Array.copy ordering in - Array.sort compare test ; - if test <> ordering then - begin - Printf.eprintf "Warning: Basis set is not properly ordered. Redordering.\n"; - let new_aos = Input.Ao_basis.reorder aos in - Input.Ao_basis.write new_aos; - match Input.Mo_basis.read() with - | None -> () - | Some mos -> - let new_mos = Input.Mo_basis.reorder mos ordering in - Input.Mo_basis.write new_mos - end + match Input.Ao_basis.read() with + | Some aos -> + let ordering = Input.Ao_basis.ordering aos in + let test = Array.copy ordering in + Array.sort compare test ; + if test <> ordering then + begin + Printf.eprintf "Warning: Basis set is not properly ordered. Redordering.\n"; + let new_aos = Input.Ao_basis.reorder aos in + Input.Ao_basis.write new_aos; + match Input.Mo_basis.read() with + | None -> () + | Some mos -> + let new_mos = Input.Mo_basis.reorder mos ordering in + Input.Mo_basis.write new_mos + end + | _ -> () end; begin diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg index caed4698..4ab080ec 100644 --- a/src/ao_two_e_ints/EZFIO.cfg +++ b/src/ao_two_e_ints/EZFIO.cfg @@ -22,4 +22,4 @@ ezfio_name: direct type: logical doc: Perform Cholesky decomposition of AO integrals interface: ezfio,provider,ocaml -default: True +default: False diff --git a/src/mo_basis/utils.irp.f b/src/mo_basis/utils.irp.f index 12c6c79d..5f664c41 100644 --- a/src/mo_basis/utils.irp.f +++ b/src/mo_basis/utils.irp.f @@ -3,7 +3,6 @@ subroutine save_mos double precision, allocatable :: buffer(:,:) integer :: i,j - call system('$QP_ROOT/scripts/save_current_mos.sh '//trim(ezfio_filename)) call ezfio_set_mo_basis_mo_num(mo_num) call ezfio_set_mo_basis_mo_label(mo_label) call ezfio_set_mo_basis_ao_md5(ao_md5) @@ -27,7 +26,7 @@ subroutine save_mos_no_occ double precision, allocatable :: buffer(:,:) integer :: i,j - call system('$QP_ROOT/scripts/save_current_mos.sh '//trim(ezfio_filename)) +! call system('$QP_ROOT/scripts/save_current_mos.sh '//trim(ezfio_filename)) !call ezfio_set_mo_basis_mo_num(mo_num) !call ezfio_set_mo_basis_mo_label(mo_label) !call ezfio_set_mo_basis_ao_md5(ao_md5) @@ -48,7 +47,7 @@ subroutine save_mos_truncated(n) double precision, allocatable :: buffer(:,:) integer :: i,j,n - call system('$QP_ROOT/scripts/save_current_mos.sh '//trim(ezfio_filename)) +! call system('$QP_ROOT/scripts/save_current_mos.sh '//trim(ezfio_filename)) call ezfio_set_mo_basis_mo_num(n) call ezfio_set_mo_basis_mo_label(mo_label) From f0b71bc2b0ec38dc8d151f7bc9c410b781e28b03 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 10 May 2023 18:06:50 +0200 Subject: [PATCH 084/337] Add libtrexio in configure --- configure | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/configure b/configure index d3377093..4dd753ff 100755 --- a/configure +++ b/configure @@ -9,6 +9,8 @@ echo "QP_ROOT="$QP_ROOT unset CC unset CCXX +TREXIO_VERSION=2.3.1 + # Force GCC instead of ICC for dependencies export CC=gcc @@ -189,7 +191,7 @@ if [[ "${PACKAGES}.x" != ".x" ]] ; then fi if [[ ${PACKAGES} = all ]] ; then - PACKAGES="zlib ninja zeromq f77zmq gmp ocaml docopt resultsFile bats" + PACKAGES="zlib ninja zeromq f77zmq gmp ocaml docopt resultsFile bats trexio" fi @@ -203,6 +205,33 @@ for PACKAGE in ${PACKAGES} ; do mv ninja "\${QP_ROOT}"/bin/ EOF + elif [[ ${PACKAGE} = trexio-nohdf5 ]] ; then + + VERSION=$TREXIO_VERSION + execute << EOF + cd "\${QP_ROOT}"/external + wget https://github.com/TREX-CoE/trexio/releases/download/v${VERSION}/trexio-${VERSION}.tar.gz + tar -zxf trexio-${VERSION}.tar.gz + cd trexio-${VERSION} + ./configure --prefix=\${QP_ROOT} --without-hdf5 + make -j 8 && make -j 8 check && make -j 8 install + cp ${QP_ROOT}/include/trexio_f.f90 ${QP_ROOT}/src/ezfio_files + tar -zxvf "\${QP_ROOT}"/external/qp2-dependencies/${ARCHITECTURE}/ninja.tar.gz + mv ninja "\${QP_ROOT}"/bin/ +EOF + elif [[ ${PACKAGE} = trexio ]] ; then + + VERSION=$TREXIO_VERSION + execute << EOF + cd "\${QP_ROOT}"/external + wget https://github.com/TREX-CoE/trexio/releases/download/v${VERSION}/trexio-${VERSION}.tar.gz + tar -zxf trexio-${VERSION}.tar.gz + cd trexio-${VERSION} + ./configure --prefix=\${QP_ROOT} + make -j 8 && make -j 8 check && make -j 8 install + cp ${QP_ROOT}/include/trexio_f.f90 ${QP_ROOT}/src/ezfio_files +EOF + elif [[ ${PACKAGE} = gmp ]] ; then @@ -338,6 +367,12 @@ if [[ ${ZEROMQ} = $(not_found) ]] ; then fail fi +TREXIO=$(find_lib -ltrexio) +if [[ ${TREXIO} = $(not_found) ]] ; then + error "TREXIO (trexio,trexio-nohdf5) is not installed. If you don't have HDF5, use trexio-nohdf5" + fail +fi + F77ZMQ=$(find_lib -lzmq -lf77zmq -lpthread) if [[ ${F77ZMQ} = $(not_found) ]] ; then error "Fortran binding of ZeroMQ (f77zmq) is not installed." From 367e1a2d5a8e5dc841576cd1217eaab37a0dd794 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Wed, 10 May 2023 22:36:21 +0200 Subject: [PATCH 085/337] fixed bug in -s-Gauss List_b3 for molec --- src/ao_many_one_e_ints/listj1b.irp.f | 3 ++- src/non_h_ints_mu/debug_fit.irp.f | 2 ++ src/non_h_ints_mu/grad_squared.irp.f | 2 ++ src/tc_keywords/EZFIO.cfg | 2 +- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/ao_many_one_e_ints/listj1b.irp.f b/src/ao_many_one_e_ints/listj1b.irp.f index c06d64bb..93ac459e 100644 --- a/src/ao_many_one_e_ints/listj1b.irp.f +++ b/src/ao_many_one_e_ints/listj1b.irp.f @@ -336,7 +336,8 @@ END_PROVIDER r2 = dx*dx + dy*dy + dz*dz ii = ii + 1 - List_all_comb_b3_coef( ii) = dexp(-tmp1*tmp2*tmp4*r2) + ! x 2 to avoid doing integrals twice + List_all_comb_b3_coef( ii) = 2.d0 * dexp(-tmp1*tmp2*tmp4*r2) List_all_comb_b3_expo( ii) = tmp3 List_all_comb_b3_cent(1,ii) = tmp4 * (tmp1 * xi + tmp2 * xj) List_all_comb_b3_cent(2,ii) = tmp4 * (tmp1 * yi + tmp2 * yj) diff --git a/src/non_h_ints_mu/debug_fit.irp.f b/src/non_h_ints_mu/debug_fit.irp.f index 547fd198..146028d5 100644 --- a/src/non_h_ints_mu/debug_fit.irp.f +++ b/src/non_h_ints_mu/debug_fit.irp.f @@ -266,6 +266,8 @@ subroutine test_list_b3() acc_ij = dabs(i_exc - i_num) if(acc_ij .gt. eps_ij) then print *, ' problem in list_b3 on', ipoint + print *, ' r = ', r + print *, ' r2 = ', r(1)*r(1) + r(2)*r(2) + r(3)*r(3) print *, ' analyt = ', i_exc print *, ' numeri = ', i_num print *, ' diff = ', acc_ij diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f index debca4ce..3f1a9bf5 100644 --- a/src/non_h_ints_mu/grad_squared.irp.f +++ b/src/non_h_ints_mu/grad_squared.irp.f @@ -405,6 +405,8 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao if(((j1b_type .eq. 3) .or. (j1b_type .eq. 4)) .and. use_ipp) then + print*, " going through Manu's IPP" + ! an additional term is added here directly instead of ! being added in int2_grad1_u12_square_ao for performance ! note that the factor diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index c2d1a048..3a26a6eb 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -222,7 +222,7 @@ default: True [use_ipp] type: logical -doc: If |true|, use Manu IPP for j1b_type=3,4 +doc: If |true|, use Manu IPP interface: ezfio,provider,ocaml default: True From 2c1812e5cad4edcdfea85f2c9f4f2f0dd8271ca3 Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 11 May 2023 10:42:12 +0200 Subject: [PATCH 086/337] fixed extrpolation energy in fci_tc_bi --- src/dft_utils_in_r/dm_in_r_routines.irp.f | 2 ++ src/fci_tc_bi/diagonalize_ci.irp.f | 5 ++++- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 22 ---------------------- 3 files changed, 6 insertions(+), 23 deletions(-) diff --git a/src/dft_utils_in_r/dm_in_r_routines.irp.f b/src/dft_utils_in_r/dm_in_r_routines.irp.f index 9991289c..364b6767 100644 --- a/src/dft_utils_in_r/dm_in_r_routines.irp.f +++ b/src/dft_utils_in_r/dm_in_r_routines.irp.f @@ -140,6 +140,8 @@ end enddo enddo + ! TODO : build the vector of chi_i(r) chi_j(r) and conscequently grad_i(r) grad_j(r) + ! : the same for gamma_ij and big dot product do istate = 1, N_states ! alpha density ! aos_array_bis = \rho_ao * aos_array diff --git a/src/fci_tc_bi/diagonalize_ci.irp.f b/src/fci_tc_bi/diagonalize_ci.irp.f index c8369e93..b6ec073f 100644 --- a/src/fci_tc_bi/diagonalize_ci.irp.f +++ b/src/fci_tc_bi/diagonalize_ci.irp.f @@ -39,6 +39,9 @@ subroutine diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) write(*,'(A28,X,I10,X,100(F16.8,X))')'Ndet,E,E+PT2,E+RPT2,|PT2|=',ndet,E_tc ,E_tc + pt2_tmp/norm,E_tc + rpt2_tmp/norm,abs_pt2 print*,'*****' endif + psi_energy(1:N_states) = eigval_right_tc_bi_orth(1:N_states) + psi_s2(1:N_states) = s2_eigvec_tc_bi_orth(1:N_states) + E_tc = eigval_right_tc_bi_orth(1) norm = norm_ground_left_right_bi_orth ndet = N_det @@ -50,7 +53,7 @@ subroutine diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) enddo enddo SOFT_TOUCH eigval_left_tc_bi_orth eigval_right_tc_bi_orth leigvec_tc_bi_orth reigvec_tc_bi_orth norm_ground_left_right_bi_orth - SOFT_TOUCH psi_l_coef_bi_ortho psi_r_coef_bi_ortho psi_coef + SOFT_TOUCH psi_l_coef_bi_ortho psi_r_coef_bi_ortho psi_coef psi_energy psi_s2 call save_tc_bi_ortho_wavefunction end diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index a288810b..3140d229 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -233,8 +233,6 @@ end do istate = N_states+1, n_states_diag vec_tmp(istate,istate) = 1.d0 enddo - !call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, htcdag_bi_ortho_calc_tdav) - !call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_dagger_u_0_opt) integer :: n_it_max,i_it n_it_max = 1 converged = .False. @@ -300,26 +298,6 @@ end print*,' = ', s2_eigvec_tc_bi_orth(i) enddo - double precision, allocatable :: buffer(:,:) - allocate(buffer(N_det,N_states)) - do k = 1, N_states - do i = 1, N_det - psi_l_coef_bi_ortho(i,k) = leigvec_tc_bi_orth(i,k) - buffer(i,k) = leigvec_tc_bi_orth(i,k) - enddo - enddo - TOUCH psi_l_coef_bi_ortho - call ezfio_set_tc_bi_ortho_psi_l_coef_bi_ortho(buffer) - do k = 1, N_states - do i = 1, N_det - psi_r_coef_bi_ortho(i,k) = reigvec_tc_bi_orth(i,k) - buffer(i,k) = reigvec_tc_bi_orth(i,k) - enddo - enddo - TOUCH psi_r_coef_bi_ortho - call ezfio_set_tc_bi_ortho_psi_r_coef_bi_ortho(buffer) - deallocate(buffer) - END_PROVIDER From 01b70ffb17389485d32069d2f53041998c94763d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 11 May 2023 22:45:18 +0200 Subject: [PATCH 087/337] Removed penalty method from TCSCF: problem with normal ordering --- src/tc_bi_ortho/dav_h_tc_s2.irp.f | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/tc_bi_ortho/dav_h_tc_s2.irp.f b/src/tc_bi_ortho/dav_h_tc_s2.irp.f index ea9cacff..3e89bbe2 100644 --- a/src/tc_bi_ortho/dav_h_tc_s2.irp.f +++ b/src/tc_bi_ortho/dav_h_tc_s2.irp.f @@ -304,22 +304,23 @@ subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N ! Penalty method ! -------------- - if (s2_eig) then - h_p = s_ - do k=1,shift2 - h_p(k,k) = h_p(k,k) - expected_s2 - enddo - if (only_expected_s2) then - alpha = 0.1d0 - h_p = h + alpha*h_p - else - alpha = 0.0001d0 - h_p = h + alpha*h_p - endif - else +! if (s2_eig) then +! h_p = s_ +! do k=1,shift2 +! h_p(k,k) = h_p(k,k) - expected_s2 +! enddo +! if (only_expected_s2) then +! alpha = 0.1d0 +! h_p = h + alpha*h_p +! else +! alpha = 0.0001d0 +! h_p = h + alpha*h_p +! endif +! else h_p = h alpha = 0.d0 - endif +! endif + ! Diagonalize h y = lambda y ! --------------------------- From 49598822938da0ac9fbe9334f6f1d61d18de7f93 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 11 May 2023 22:48:48 +0200 Subject: [PATCH 088/337] Added TREXIO module --- src/trexio/EZFIO.cfg | 54 ++ src/trexio/README.rst | 6 + src/trexio/export_trexio.irp.f | 7 + src/trexio/export_trexio_routines.irp.f | 604 ++++++++++++++++++++ src/trexio/import_trexio_determinants.irp.f | 79 +++ src/trexio/import_trexio_integrals.irp.f | 146 +++++ src/trexio/qp_import_trexio.py | 415 ++++++++++++++ src/trexio/trexio_file.irp.f | 20 + src/trexio/trexio_module.F90 | 1 + 9 files changed, 1332 insertions(+) create mode 100644 src/trexio/EZFIO.cfg create mode 100644 src/trexio/README.rst create mode 100644 src/trexio/export_trexio.irp.f create mode 100644 src/trexio/export_trexio_routines.irp.f create mode 100644 src/trexio/import_trexio_determinants.irp.f create mode 100644 src/trexio/import_trexio_integrals.irp.f create mode 100755 src/trexio/qp_import_trexio.py create mode 100644 src/trexio/trexio_file.irp.f create mode 100644 src/trexio/trexio_module.F90 diff --git a/src/trexio/EZFIO.cfg b/src/trexio/EZFIO.cfg new file mode 100644 index 00000000..8606e908 --- /dev/null +++ b/src/trexio/EZFIO.cfg @@ -0,0 +1,54 @@ +[backend] +type: integer +doc: Back-end used in TREXIO. 0: HDF5, 1:Text +interface: ezfio, ocaml, provider +default: 0 + +[trexio_file] +type: character*(256) +doc: Name of the exported TREXIO file +interface: ezfio, ocaml, provider +default: None + +[export_rdm] +type: logical +doc: If True, export two-body reduced density matrix +interface: ezfio, ocaml, provider +default: False + +[export_ao_one_e_ints] +type: logical +doc: If True, export one-electron integrals in AO basis +interface: ezfio, ocaml, provider +default: False + +[export_mo_one_e_ints] +type: logical +doc: If True, export one-electron integrals in MO basis +interface: ezfio, ocaml, provider +default: False + +[export_ao_two_e_ints] +type: logical +doc: If True, export two-electron integrals in AO basis +interface: ezfio, ocaml, provider +default: False + +[export_ao_two_e_ints_cholesky] +type: logical +doc: If True, export Cholesky-decomposed two-electron integrals in AO basis +interface: ezfio, ocaml, provider +default: False + +[export_mo_two_e_ints] +type: logical +doc: If True, export two-electron integrals in MO basis +interface: ezfio, ocaml, provider +default: False + +[export_mo_two_e_ints_cholesky] +type: logical +doc: If True, export Cholesky-decomposed two-electron integrals in MO basis +interface: ezfio, ocaml, provider +default: False + diff --git a/src/trexio/README.rst b/src/trexio/README.rst new file mode 100644 index 00000000..7d7304c6 --- /dev/null +++ b/src/trexio/README.rst @@ -0,0 +1,6 @@ +====== +trexio +====== + +Module for handling TREXIO files. +See https://github.com/trex-coe/trexio diff --git a/src/trexio/export_trexio.irp.f b/src/trexio/export_trexio.irp.f new file mode 100644 index 00000000..3ae0dcb4 --- /dev/null +++ b/src/trexio/export_trexio.irp.f @@ -0,0 +1,7 @@ +program export_trexio_prog + implicit none + read_wf = .True. + SOFT_TOUCH read_wf + call export_trexio +end + diff --git a/src/trexio/export_trexio_routines.irp.f b/src/trexio/export_trexio_routines.irp.f new file mode 100644 index 00000000..d69e7a70 --- /dev/null +++ b/src/trexio/export_trexio_routines.irp.f @@ -0,0 +1,604 @@ +subroutine export_trexio + use trexio + implicit none + BEGIN_DOC + ! Exports the wave function in TREXIO format + END_DOC + + integer(trexio_t) :: f ! TREXIO file handle + integer(trexio_exit_code) :: rc + double precision, allocatable :: factor(:) + + print *, 'TREXIO file : '//trim(trexio_filename) + print *, '' + + call system('cp '//trim(trexio_filename)//' '//trim(trexio_filename)//'.bak') + if (backend == 0) then + f = trexio_open(trexio_filename, 'u', TREXIO_HDF5, rc) + else if (backend == 1) then + f = trexio_open(trexio_filename, 'u', TREXIO_TEXT, rc) + endif + if (f == 0_8) then + print *, 'Unable to open TREXIO file for writing' + print *, 'rc = ', rc + stop -1 + endif + call ezfio_set_trexio_trexio_file(trexio_filename) + +! ------------------------------------------------------------------------------ + +! Electrons +! --------- + + print *, 'Electrons' + + rc = trexio_write_electron_up_num(f, elec_alpha_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_electron_dn_num(f, elec_beta_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + +! Nuclei +! ------ + + print *, 'Nuclei' + + rc = trexio_write_nucleus_num(f, nucl_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_nucleus_charge(f, nucl_charge) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_nucleus_coord(f, nucl_coord_transp) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_nucleus_label(f, nucl_label, 32) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_nucleus_repulsion(f, nuclear_repulsion) + call trexio_assert(rc, TREXIO_SUCCESS) + + +! Pseudo-potentials +! ----------------- + + if (do_pseudo) then + + print *, 'ECP' + integer :: num + + num = 0 + do k=1,pseudo_klocmax + do i=1,nucl_num + if (pseudo_dz_k(i,k) /= 0.d0) then + num = num+1 + end if + end do + end do + + do l=0,pseudo_lmax + do k=1,pseudo_kmax + do i=1,nucl_num + if (pseudo_dz_kl(i,k,l) /= 0.d0) then + num = num+1 + end if + end do + end do + end do + + integer, allocatable :: ang_mom(:), nucleus_index(:), power(:), lmax(:) + double precision, allocatable :: exponent(:), coefficient(:) + + allocate(ang_mom(num), nucleus_index(num), exponent(num), coefficient(num), power(num), & + lmax(nucl_num) ) + + do i=1,nucl_num + lmax(i) = -1 + do l=0,pseudo_lmax + do k=1,pseudo_kmax + if (pseudo_dz_kl_transp(k,l,i) /= 0.d0) then + lmax(i) = max(lmax(i), l) + end if + end do + end do + end do + + j = 0 + do i=1,nucl_num + do k=1,pseudo_klocmax + if (pseudo_dz_k_transp(k,i) /= 0.d0) then + j = j+1 + ang_mom(j) = lmax(i)+1 + nucleus_index(j) = i + exponent(j) = pseudo_dz_k_transp(k,i) + coefficient(j) = pseudo_v_k_transp(k,i) + power(j) = pseudo_n_k_transp(k,i) + end if + end do + + do l=0,lmax(i) + do k=1,pseudo_kmax + if (pseudo_dz_kl_transp(k,l,i) /= 0.d0) then + j = j+1 + ang_mom(j) = l + nucleus_index(j) = i + exponent(j) = pseudo_dz_kl_transp(k,l,i) + coefficient(j) = pseudo_v_kl_transp(k,l,i) + power(j) = pseudo_n_kl_transp(k,l,i) + end if + end do + end do + end do + + + lmax(:) = lmax(:)+1 + rc = trexio_write_ecp_max_ang_mom_plus_1(f, lmax) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ecp_z_core(f, int(nucl_charge_remove)) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ecp_num(f, num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ecp_ang_mom(f, ang_mom) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ecp_nucleus_index(f, nucleus_index) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ecp_exponent(f, exponent) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ecp_coefficient(f, coefficient) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ecp_power(f, power) + call trexio_assert(rc, TREXIO_SUCCESS) + + endif + + +! Basis +! ----- + + print *, 'Basis' + + + rc = trexio_write_basis_type(f, 'Gaussian', len('Gaussian')) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_basis_prim_num(f, prim_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_basis_shell_num(f, shell_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_basis_nucleus_index(f, basis_nucleus_index) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_basis_shell_ang_mom(f, shell_ang_mom) + call trexio_assert(rc, TREXIO_SUCCESS) + + allocate(factor(shell_num)) + if (ao_normalized) then + factor(1:shell_num) = shell_normalization_factor(1:shell_num) + else + factor(1:shell_num) = 1.d0 + endif + rc = trexio_write_basis_shell_factor(f, factor) + call trexio_assert(rc, TREXIO_SUCCESS) + + deallocate(factor) + + rc = trexio_write_basis_shell_index(f, shell_index) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_basis_exponent(f, prim_expo) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_basis_coefficient(f, prim_coef) + call trexio_assert(rc, TREXIO_SUCCESS) + + allocate(factor(prim_num)) + if (primitives_normalized) then + factor(1:prim_num) = prim_normalization_factor(1:prim_num) + else + factor(1:prim_num) = 1.d0 + endif + rc = trexio_write_basis_prim_factor(f, factor) + call trexio_assert(rc, TREXIO_SUCCESS) + deallocate(factor) + + +! Atomic orbitals +! --------------- + + print *, 'AOs' + + rc = trexio_write_ao_num(f, ao_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ao_cartesian(f, 1) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ao_shell(f, ao_shell) + call trexio_assert(rc, TREXIO_SUCCESS) + + integer :: i, pow0(3), powA(3), j, k, l, nz + double precision :: normA, norm0, C_A(3), overlap_x, overlap_z, overlap_y, c + nz=100 + + C_A(1) = 0.d0 + C_A(2) = 0.d0 + C_A(3) = 0.d0 + + allocate(factor(ao_num)) + if (ao_normalized) then + do i=1,ao_num + l = ao_first_of_shell(ao_shell(i)) + factor(i) = (ao_coef_normalized(i,1)+tiny(1.d0))/(ao_coef_normalized(l,1)+tiny(1.d0)) + enddo + else + factor(:) = 1.d0 + endif + rc = trexio_write_ao_normalization(f, factor) + call trexio_assert(rc, TREXIO_SUCCESS) + deallocate(factor) + +! One-e AO integrals +! ------------------ + + if (export_ao_one_e_ints) then + print *, 'AO one-e integrals' + + rc = trexio_write_ao_1e_int_overlap(f,ao_overlap) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ao_1e_int_kinetic(f,ao_kinetic_integrals) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_ao_1e_int_potential_n_e(f,ao_integrals_n_e) + call trexio_assert(rc, TREXIO_SUCCESS) + + if (do_pseudo) then + rc = trexio_write_ao_1e_int_ecp(f, ao_pseudo_integrals_local + ao_pseudo_integrals_non_local) + call trexio_assert(rc, TREXIO_SUCCESS) + endif + + rc = trexio_write_ao_1e_int_core_hamiltonian(f,ao_one_e_integrals) + call trexio_assert(rc, TREXIO_SUCCESS) + end if + +! Two-e AO integrals +! ------------------ + + if (export_ao_two_e_ints) then + print *, 'AO two-e integrals' + PROVIDE ao_two_e_integrals_in_map + + integer(8), parameter :: BUFSIZE=100000_8 + double precision :: eri_buffer(BUFSIZE), integral + integer(4) :: eri_index(4,BUFSIZE) + integer(8) :: icount, offset + + double precision, external :: get_ao_two_e_integral + + + icount = 0_8 + offset = 0_8 + do l=1,ao_num + do k=1,ao_num + do j=l,ao_num + do i=k,ao_num + if (i==j .and. k= 0_8) then + rc = trexio_write_ao_2e_int_eri(f, offset, icount, eri_index, eri_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + end if + end if + +! Two-e AO integrals - Cholesky +! ----------------------------- + + integer(4) :: chol_index(3,BUFSIZE) + double precision :: chol_buffer(BUFSIZE) + + if (export_ao_two_e_ints_cholesky) then + print *, 'AO two-e integrals Cholesky' + + rc = trexio_write_ao_2e_int_eri_cholesky_num(f, cholesky_ao_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + icount = 0_8 + offset = 0_8 + do k=1,cholesky_ao_num + do j=1,ao_num + do i=1,ao_num + integral = cholesky_ao(i,j,k) + if (integral == 0.d0) cycle + icount += 1_8 + chol_buffer(icount) = integral + chol_index(1,icount) = i + chol_index(2,icount) = j + chol_index(3,icount) = k + if (icount == BUFSIZE) then + rc = trexio_write_ao_2e_int_eri_cholesky(f, offset, icount, chol_index, chol_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + offset += icount + icount = 0_8 + end if + end do + end do + end do + + if (icount > 0_8) then + rc = trexio_write_ao_2e_int_eri_cholesky(f, offset, icount, chol_index, chol_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + end if + end if + + + +! Molecular orbitals +! ------------------ + + print *, 'MOs' + + rc = trexio_write_mo_type(f, mo_label, len(trim(mo_label))) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_mo_num(f, mo_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_mo_coefficient(f, mo_coef) + call trexio_assert(rc, TREXIO_SUCCESS) + + if ( (trim(mo_label) == 'Canonical').and. & + (export_mo_two_e_ints_cholesky.or.export_mo_two_e_ints) ) then + rc = trexio_write_mo_energy(f, fock_matrix_diag_mo) + call trexio_assert(rc, TREXIO_SUCCESS) + endif + + rc = trexio_write_mo_class(f, mo_class, len(mo_class(1))) + call trexio_assert(rc, TREXIO_SUCCESS) + +! One-e MO integrals +! ------------------ + + if (export_mo_one_e_ints) then + print *, 'MO one-e integrals' + + rc = trexio_write_mo_1e_int_kinetic(f,mo_kinetic_integrals) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_mo_1e_int_potential_n_e(f,mo_integrals_n_e) + call trexio_assert(rc, TREXIO_SUCCESS) + + if (do_pseudo) then + rc = trexio_write_mo_1e_int_ecp(f,mo_pseudo_integrals_local) + call trexio_assert(rc, TREXIO_SUCCESS) + endif + + rc = trexio_write_mo_1e_int_core_hamiltonian(f,mo_one_e_integrals) + call trexio_assert(rc, TREXIO_SUCCESS) + end if + +! Two-e MO integrals +! ------------------ + + if (export_mo_two_e_ints) then + print *, 'MO two-e integrals' + PROVIDE mo_two_e_integrals_in_map + + double precision, external :: mo_two_e_integral + + + icount = 0_8 + offset = 0_8 + do l=1,mo_num + do k=1,mo_num + do j=l,mo_num + do i=k,mo_num + if (i==j .and. k 0_8) then + rc = trexio_write_mo_2e_int_eri(f, offset, icount, eri_index, eri_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + end if + end if + +! Two-e MO integrals - Cholesky +! ----------------------------- + + if (export_mo_two_e_ints_cholesky) then + print *, 'MO two-e integrals Cholesky' + + rc = trexio_write_mo_2e_int_eri_cholesky_num(f, cholesky_ao_num) + call trexio_assert(rc, TREXIO_SUCCESS) + + icount = 0_8 + offset = 0_8 + do k=1,cholesky_ao_num + do j=1,mo_num + do i=1,mo_num + integral = cholesky_mo(i,j,k) + if (integral == 0.d0) cycle + icount += 1_8 + chol_buffer(icount) = integral + chol_index(1,icount) = i + chol_index(2,icount) = j + chol_index(3,icount) = k + if (icount == BUFSIZE) then + rc = trexio_write_mo_2e_int_eri_cholesky(f, offset, icount, chol_index, chol_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + offset += icount + icount = 0_8 + end if + end do + end do + end do + + if (icount > 0_8) then + rc = trexio_write_mo_2e_int_eri_cholesky(f, offset, icount, chol_index, chol_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + end if + end if + + +! One-e RDM +! --------- + + rc = trexio_write_rdm_1e(f,one_e_dm_mo) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_rdm_1e_up(f,one_e_dm_mo_alpha_average) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_rdm_1e_dn(f,one_e_dm_mo_beta_average) + call trexio_assert(rc, TREXIO_SUCCESS) + + +! Two-e RDM +! --------- + + if (export_rdm) then + PROVIDE two_e_dm_mo + print *, 'Two-e RDM' + + icount = 0_8 + offset = 0_8 + do l=1,mo_num + do k=1,mo_num + do j=1,mo_num + do i=1,mo_num + integral = two_e_dm_mo(i,j,k,l) + if (integral == 0.d0) cycle + icount += 1_8 + eri_buffer(icount) = integral + eri_index(1,icount) = i + eri_index(2,icount) = j + eri_index(3,icount) = k + eri_index(4,icount) = l + if (icount == BUFSIZE) then + rc = trexio_write_rdm_2e(f, offset, icount, eri_index, eri_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + offset += icount + icount = 0_8 + end if + end do + end do + end do + end do + + if (icount >= 0_8) then + rc = trexio_write_rdm_2e(f, offset, icount, eri_index, eri_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + end if + end if + + +! ------------------------------------------------------------------------------ + + ! Determinants + ! ------------ + + integer*8, allocatable :: det_buffer(:,:,:) + double precision, allocatable :: coef_buffer(:,:) + integer :: nint + +! rc = trexio_read_determinant_int64_num(f, nint) +! call trexio_assert(rc, TREXIO_SUCCESS) + nint = N_int + if (nint /= N_int) then + stop 'Problem with N_int' + endif + allocate ( det_buffer(nint, 2, BUFSIZE), coef_buffer(BUFSIZE, n_states) ) + + icount = 0_8 + offset = 0_8 + rc = trexio_write_state_num(f, n_states) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_set_state (f, 0) + call trexio_assert(rc, TREXIO_SUCCESS) + do k=1,n_det + icount += 1_8 + det_buffer(1:nint, 1:2, icount) = psi_det(1:N_int, 1:2, k) + coef_buffer(icount,1:N_states) = psi_coef(k,1:N_states) + if (icount == BUFSIZE) then + call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_determinant_list(f, offset, icount, det_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + do i=1,N_states + rc = trexio_set_state (f, i-1) + call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_determinant_coefficient(f, offset, icount, coef_buffer(1,i)) + end do + rc = trexio_set_state (f, 0) + offset += icount + icount = 0_8 + end if + end do + + if (icount >= 0_8) then + call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_determinant_list(f, offset, icount, det_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + do i=1,N_states + rc = trexio_set_state (f, i-1) + call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_determinant_coefficient(f, offset, icount, coef_buffer(1,i)) + end do + rc = trexio_set_state (f, 0) + end if + + deallocate ( det_buffer, coef_buffer ) + + rc = trexio_close(f) + call trexio_assert(rc, TREXIO_SUCCESS) + +end + + +! -*- mode: f90 -*- diff --git a/src/trexio/import_trexio_determinants.irp.f b/src/trexio/import_trexio_determinants.irp.f new file mode 100644 index 00000000..1759bb94 --- /dev/null +++ b/src/trexio/import_trexio_determinants.irp.f @@ -0,0 +1,79 @@ +program import_determinants_ao + call run +end + +subroutine run + use trexio + use map_module + implicit none + BEGIN_DOC +! Program to import determinants from TREXIO + END_DOC + + integer(trexio_t) :: f ! TREXIO file handle + integer(trexio_exit_code) :: rc + + integer :: m + + double precision, allocatable :: coef_buffer(:,:) + integer*8 , allocatable :: det_buffer(:,:,:) + + f = trexio_open(trexio_filename, 'r', TREXIO_AUTO, rc) + if (f == 0_8) then + print *, 'Unable to open TREXIO file for reading' + print *, 'rc = ', rc + stop -1 + endif + + + + ! Determinants + ! ------------ + + integer :: nint, nstates + integer :: bufsize + + rc = trexio_read_state_num(f, nstates) + call trexio_assert(rc, TREXIO_SUCCESS) + +! rc = trexio_read_determinant_int64_num(f, nint) +! call trexio_assert(rc, TREXIO_SUCCESS) + nint = N_int + if (nint /= N_int) then + stop 'Problem with N_int' + endif + + integer*8 :: offset, icount + + rc = trexio_read_determinant_num(f, bufsize) + call trexio_assert(rc, TREXIO_SUCCESS) + print *, 'N_det = ', bufsize + + allocate ( det_buffer(nint, 2, bufsize), coef_buffer(bufsize, n_states) ) + + + offset = 0_8 + icount = bufsize + + rc = trexio_read_determinant_list(f, offset, icount, det_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + if (icount /= bufsize) then + print *, 'error: bufsize /= N_det: ', bufsize, icount + stop -1 + endif + + do m=1,nstates + rc = trexio_set_state(f, m-1) + call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_read_determinant_coefficient(f, offset, icount, coef_buffer(1,m)) + call trexio_assert(rc, TREXIO_SUCCESS) + if (icount /= bufsize) then + print *, 'error: bufsize /= N_det for state', m, ':', icount, bufsize + stop -1 + endif + enddo + + call save_wavefunction_general(bufsize,nstates,det_buffer,size(coef_buffer,1),coef_buffer) + + +end diff --git a/src/trexio/import_trexio_integrals.irp.f b/src/trexio/import_trexio_integrals.irp.f new file mode 100644 index 00000000..9f9ad9d6 --- /dev/null +++ b/src/trexio/import_trexio_integrals.irp.f @@ -0,0 +1,146 @@ +program import_integrals_ao + use trexio + implicit none + integer(trexio_t) :: f ! TREXIO file handle + integer(trexio_exit_code) :: rc + + f = trexio_open(trexio_filename, 'r', TREXIO_AUTO, rc) + if (f == 0_8) then + print *, 'Unable to open TREXIO file for reading' + print *, 'rc = ', rc + stop -1 + endif + + call run(f) + rc = trexio_close(f) + call trexio_assert(rc, TREXIO_SUCCESS) +end + +subroutine run(f) + use trexio + use map_module + implicit none + BEGIN_DOC +! Program to import integrals from TREXIO + END_DOC + + integer(trexio_t), intent(in) :: f ! TREXIO file handle + integer(trexio_exit_code) :: rc + + integer ::i,j,k,l + integer(8) :: m, n_integrals + double precision :: integral + + integer(key_kind), allocatable :: buffer_i(:) + real(integral_kind), allocatable :: buffer_values(:) + + + double precision, allocatable :: A(:,:) + double precision, allocatable :: V(:) + integer , allocatable :: Vi(:,:) + double precision :: s + + if (trexio_has_nucleus_repulsion(f) == TREXIO_SUCCESS) then + rc = trexio_read_nucleus_repulsion(f, s) + call trexio_assert(rc, TREXIO_SUCCESS) + if (rc /= TREXIO_SUCCESS) then + print *, irp_here, rc + print *, 'Error reading nuclear repulsion' + stop -1 + endif + call ezfio_set_nuclei_nuclear_repulsion(s) + call ezfio_set_nuclei_io_nuclear_repulsion('Read') + endif + + ! AO integrals + ! ------------ + + allocate(A(ao_num, ao_num)) + + + if (trexio_has_ao_1e_int_overlap(f) == TREXIO_SUCCESS) then + rc = trexio_read_ao_1e_int_overlap(f, A) + if (rc /= TREXIO_SUCCESS) then + print *, irp_here + print *, 'Error reading AO overlap' + stop -1 + endif + call ezfio_set_ao_one_e_ints_ao_integrals_overlap(A) + call ezfio_set_ao_one_e_ints_io_ao_integrals_overlap('Read') + endif + + if (trexio_has_ao_1e_int_kinetic(f) == TREXIO_SUCCESS) then + rc = trexio_read_ao_1e_int_kinetic(f, A) + if (rc /= TREXIO_SUCCESS) then + print *, irp_here + print *, 'Error reading AO kinetic integrals' + stop -1 + endif + call ezfio_set_ao_one_e_ints_ao_integrals_kinetic(A) + call ezfio_set_ao_one_e_ints_io_ao_integrals_kinetic('Read') + endif + +! if (trexio_has_ao_1e_int_ecp(f) == TREXIO_SUCCESS) then +! rc = trexio_read_ao_1e_int_ecp(f, A) +! if (rc /= TREXIO_SUCCESS) then +! print *, irp_here +! print *, 'Error reading AO ECP local integrals' +! stop -1 +! endif +! call ezfio_set_ao_one_e_ints_ao_integrals_pseudo(A) +! call ezfio_set_ao_one_e_ints_io_ao_integrals_pseudo('Read') +! endif + + if (trexio_has_ao_1e_int_potential_n_e(f) == TREXIO_SUCCESS) then + rc = trexio_read_ao_1e_int_potential_n_e(f, A) + if (rc /= TREXIO_SUCCESS) then + print *, irp_here + print *, 'Error reading AO potential N-e integrals' + stop -1 + endif + call ezfio_set_ao_one_e_ints_ao_integrals_n_e(A) + call ezfio_set_ao_one_e_ints_io_ao_integrals_n_e('Read') + endif + + deallocate(A) + + ! AO 2e integrals + ! --------------- + PROVIDE ao_integrals_map + + integer*4 :: BUFSIZE + BUFSIZE=ao_num**2 + allocate(buffer_i(BUFSIZE), buffer_values(BUFSIZE)) + allocate(Vi(4,BUFSIZE), V(BUFSIZE)) + + integer*8 :: offset, icount + + offset = 0_8 + icount = BUFSIZE + rc = TREXIO_SUCCESS + do while (icount == size(V)) + rc = trexio_read_ao_2e_int_eri(f, offset, icount, Vi, V) + do m=1,icount + i = Vi(1,m) + j = Vi(2,m) + k = Vi(3,m) + l = Vi(4,m) + integral = V(m) + call two_e_integrals_index(i, j, k, l, buffer_i(m) ) + buffer_values(m) = integral + enddo + call insert_into_ao_integrals_map(int(icount,4),buffer_i,buffer_values) + offset = offset + icount + if (rc /= TREXIO_SUCCESS) then + exit + endif + end do + n_integrals = offset + + call map_sort(ao_integrals_map) + call map_unique(ao_integrals_map) + + call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map) + call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals('Read') + +end diff --git a/src/trexio/qp_import_trexio.py b/src/trexio/qp_import_trexio.py new file mode 100755 index 00000000..de8d1269 --- /dev/null +++ b/src/trexio/qp_import_trexio.py @@ -0,0 +1,415 @@ +#!/usr/bin/env python3 +""" +convert TREXIO file to EZFIO + +Usage: + qp_import_trexio [-o EZFIO_DIR] FILE + +Options: + -o --output=EZFIO_DIR Produced directory + by default is FILE.ezfio + +""" + +import sys +import os +import trexio +import numpy as np +from functools import reduce +from ezfio import ezfio +from docopt import docopt + + +try: + QP_ROOT = os.environ["QP_ROOT"] + QP_EZFIO = os.environ["QP_EZFIO"] +except KeyError: + print("Error: QP_ROOT environment variable not found.") + sys.exit(1) +else: + sys.path = [QP_EZFIO + "/Python", + QP_ROOT + "/install/resultsFile", + QP_ROOT + "/install", + QP_ROOT + "/scripts"] + sys.path + + +def generate_xyz(l): + + def create_z(x,y,z): + return (x, y, l-(x+y)) + + def create_y(accu,x,y,z): + if y == 0: + result = [create_z(x,y,z)] + accu + else: + result = create_y([create_z(x,y,z)] + accu , x, y-1, z) + return result + + def create_x(accu,x,y,z): + if x == 0: + result = create_y([], x,y,z) + accu + else: + xnew = x-1 + ynew = l-xnew + result = create_x(create_y([],x,y,z) + accu , xnew, ynew, z) + return result + + result = create_x([], l, 0, 0) + result.reverse() + return result + + + +def write_ezfio(trexio_filename, filename): + + try: + trexio_file = trexio.File(trexio_filename,mode='r',back_end=trexio.TREXIO_TEXT) + except: + trexio_file = trexio.File(trexio_filename,mode='r',back_end=trexio.TREXIO_HDF5) + + ezfio.set_file(filename) + ezfio.set_trexio_trexio_file(trexio_filename) + + print("Nuclei\t\t...\t", end=' ') + + charge = [0.] + if trexio.has_nucleus(trexio_file): + charge = trexio.read_nucleus_charge(trexio_file) + ezfio.set_nuclei_nucl_num(len(charge)) + ezfio.set_nuclei_nucl_charge(charge) + + coord = trexio.read_nucleus_coord(trexio_file) + coord = np.transpose(coord) + ezfio.set_nuclei_nucl_coord(coord) + + label = trexio.read_nucleus_label(trexio_file) + nucl_num = trexio.read_nucleus_num(trexio_file) + + # Transformt H1 into H + import re + p = re.compile(r'(\d*)$') + label = [p.sub("", x).capitalize() for x in label] + ezfio.set_nuclei_nucl_label(label) + + else: + ezfio.set_nuclei_nucl_num(1) + ezfio.set_nuclei_nucl_charge([0.]) + ezfio.set_nuclei_nucl_coord([0.,0.,0.]) + ezfio.set_nuclei_nucl_label(["X"]) + + print("OK") + + + print("Electrons\t...\t", end=' ') + + try: + num_beta = trexio.read_electron_dn_num(trexio_file) + except: + num_beta = sum(charge)//2 + + try: + num_alpha = trexio.read_electron_up_num(trexio_file) + except: + num_alpha = sum(charge) - num_beta + + if num_alpha == 0: + print("\n\nError: There are zero electrons in the TREXIO file.\n\n") + sys.exit(1) + ezfio.set_electrons_elec_alpha_num(num_alpha) + ezfio.set_electrons_elec_beta_num(num_beta) + + print("OK") + + print("Basis\t\t...\t", end=' ') + + shell_num = 0 + try: + basis_type = trexio.read_basis_type(trexio_file) + + if basis_type.lower() not in ["gaussian", "slater"]: + raise TypeError + + shell_num = trexio.read_basis_shell_num(trexio_file) + prim_num = trexio.read_basis_prim_num(trexio_file) + ang_mom = trexio.read_basis_shell_ang_mom(trexio_file) + nucl_index = trexio.read_basis_nucleus_index(trexio_file) + exponent = trexio.read_basis_exponent(trexio_file) + coefficient = trexio.read_basis_coefficient(trexio_file) + shell_index = trexio.read_basis_shell_index(trexio_file) + ao_shell = trexio.read_ao_shell(trexio_file) + + ezfio.set_basis_basis("Read from TREXIO") + ezfio.set_basis_shell_num(shell_num) + ezfio.set_basis_prim_num(prim_num) + ezfio.set_basis_shell_ang_mom(ang_mom) + ezfio.set_basis_basis_nucleus_index([ x+1 for x in nucl_index ]) + ezfio.set_basis_prim_expo(exponent) + ezfio.set_basis_prim_coef(coefficient) + + nucl_shell_num = [] + prev = None + m = 0 + for i in ao_shell: + if i != prev: + m += 1 + if prev is None or nucl_index[i] != nucl_index[prev]: + nucl_shell_num.append(m) + m = 0 + prev = i + assert (len(nucl_shell_num) == nucl_num) + + shell_prim_num = [] + prev = shell_index[0] + count = 0 + for i in shell_index: + if i != prev: + shell_prim_num.append(count) + count = 0 + count += 1 + prev = i + shell_prim_num.append(count) + + assert (len(shell_prim_num) == shell_num) + + ezfio.set_basis_shell_prim_num(shell_prim_num) + ezfio.set_basis_shell_index([x+1 for x in shell_index]) + ezfio.set_basis_nucleus_shell_num(nucl_shell_num) + + + shell_factor = trexio.read_basis_shell_factor(trexio_file) + prim_factor = trexio.read_basis_prim_factor(trexio_file) + + print("OK") + except: + print("None") + ezfio.set_ao_basis_ao_cartesian(True) + + print("AOS\t\t...\t", end=' ') + + try: + cartesian = trexio.read_ao_cartesian(trexio_file) + except: + cartesian = True + + if not cartesian: + raise TypeError('Only cartesian TREXIO files can be converted') + + ao_num = trexio.read_ao_num(trexio_file) + ezfio.set_ao_basis_ao_num(ao_num) + + if shell_num > 0: + ao_shell = trexio.read_ao_shell(trexio_file) + at = [ nucl_index[i]+1 for i in ao_shell ] + ezfio.set_ao_basis_ao_nucl(at) + + num_prim0 = [ 0 for i in range(shell_num) ] + for i in shell_index: + num_prim0[i] += 1 + + coef = {} + expo = {} + for i,c in enumerate(coefficient): + idx = shell_index[i] + if idx in coef: + coef[idx].append(c) + expo[idx].append(exponent[i]) + else: + coef[idx] = [c] + expo[idx] = [exponent[i]] + + coefficient = [] + exponent = [] + power_x = [] + power_y = [] + power_z = [] + num_prim = [] + + for i in range(shell_num): + for x,y,z in generate_xyz(ang_mom[i]): + power_x.append(x) + power_y.append(y) + power_z.append(z) + coefficient.append(coef[i]) + exponent.append(expo[i]) + num_prim.append(num_prim0[i]) + + assert (len(coefficient) == ao_num) + ezfio.set_ao_basis_ao_power(power_x + power_y + power_z) + ezfio.set_ao_basis_ao_prim_num(num_prim) + + prim_num_max = max( [ len(x) for x in coefficient ] ) + + for i in range(ao_num): + coefficient[i] += [0. for j in range(len(coefficient[i]), prim_num_max)] + exponent [i] += [0. for j in range(len(exponent[i]), prim_num_max)] + + coefficient = reduce(lambda x, y: x + y, coefficient, []) + exponent = reduce(lambda x, y: x + y, exponent , []) + + coef = [] + expo = [] + for i in range(prim_num_max): + for j in range(i, len(coefficient), prim_num_max): + coef.append(coefficient[j]) + expo.append(exponent[j]) + +# ezfio.set_ao_basis_ao_prim_num_max(prim_num_max) + ezfio.set_ao_basis_ao_coef(coef) + ezfio.set_ao_basis_ao_expo(expo) + ezfio.set_ao_basis_ao_basis("Read from TREXIO") + + print("OK") + + + # _ + # |\/| _ _ |_) _. _ o _ + # | | (_) _> |_) (_| _> | _> + # + + print("MOS\t\t...\t", end=' ') + + labels = { "Canonical" : "Canonical", + "RHF" : "Canonical", + "BOYS" : "Localized", + "ROHF" : "Canonical", + "UHF" : "Canonical", + "Natural": "Natural" } + try: + label = labels[trexio.read_mo_type(trexio_file)] + except: + label = "None" + ezfio.set_mo_basis_mo_label(label) + + try: + clss = trexio.read_mo_class(trexio_file) + core = [ i for i in clss if i.lower() == "core" ] + inactive = [ i for i in clss if i.lower() == "inactive" ] + active = [ i for i in clss if i.lower() == "active" ] + virtual = [ i for i in clss if i.lower() == "virtual" ] + deleted = [ i for i in clss if i.lower() == "deleted" ] + except trexio.Error: + pass + + try: + mo_num = trexio.read_mo_num(trexio_file) + ezfio.set_mo_basis_mo_num(mo_num) + + MoMatrix = trexio.read_mo_coefficient(trexio_file) + ezfio.set_mo_basis_mo_coef(MoMatrix) + + mo_occ = [ 0. for i in range(mo_num) ] + for i in range(num_alpha): + mo_occ[i] += 1. + for i in range(num_beta): + mo_occ[i] += 1. + ezfio.set_mo_basis_mo_occ(mo_occ) + except: + pass + + print("OK") + + + print("Pseudos\t\t...\t", end=' ') + + ezfio.set_pseudo_do_pseudo(False) + + if trexio.has_ecp_ang_mom(trexio_file): + ezfio.set_pseudo_do_pseudo(True) + max_ang_mom_plus_1 = trexio.read_ecp_max_ang_mom_plus_1(trexio_file) + z_core = trexio.read_ecp_z_core(trexio_file) + ang_mom = trexio.read_ecp_ang_mom(trexio_file) + nucleus_index = trexio.read_ecp_nucleus_index(trexio_file) + exponent = trexio.read_ecp_exponent(trexio_file) + coefficient = trexio.read_ecp_coefficient(trexio_file) + power = trexio.read_ecp_power(trexio_file) + + lmax = max( max_ang_mom_plus_1 ) - 1 + ezfio.set_pseudo_pseudo_lmax(lmax) + ezfio.set_pseudo_nucl_charge_remove(z_core) + + prev_center = None + ecp = {} + for i in range(len(ang_mom)): + center = nucleus_index[i] + if center != prev_center: + ecp[center] = { "lmax": max_ang_mom_plus_1[center], + "zcore": z_core[center], + "contr": {} } + for j in range(max_ang_mom_plus_1[center]+1): + ecp[center]["contr"][j] = [] + + ecp[center]["contr"][ang_mom[i]].append( (coefficient[i], power[i], exponent[i]) ) + prev_center = center + + ecp_loc = {} + ecp_nl = {} + kmax = 0 + klocmax = 0 + for center in ecp: + ecp_nl [center] = {} + for k in ecp[center]["contr"]: + if k == ecp[center]["lmax"]: + ecp_loc[center] = ecp[center]["contr"][k] + klocmax = max(len(ecp_loc[center]), klocmax) + else: + ecp_nl [center][k] = ecp[center]["contr"][k] + kmax = max(len(ecp_nl [center][k]), kmax) + + ezfio.set_pseudo_pseudo_klocmax(klocmax) + ezfio.set_pseudo_pseudo_kmax(kmax) + + pseudo_n_k = [[0 for _ in range(nucl_num)] for _ in range(klocmax)] + pseudo_v_k = [[0. for _ in range(nucl_num)] for _ in range(klocmax)] + pseudo_dz_k = [[0. for _ in range(nucl_num)] for _ in range(klocmax)] + pseudo_n_kl = [[[0 for _ in range(nucl_num)] for _ in range(kmax)] for _ in range(lmax+1)] + pseudo_v_kl = [[[0. for _ in range(nucl_num)] for _ in range(kmax)] for _ in range(lmax+1)] + pseudo_dz_kl = [[[0. for _ in range(nucl_num)] for _ in range(kmax)] for _ in range(lmax+1)] + for center in ecp_loc: + for k in range( len(ecp_loc[center]) ): + v, n, dz = ecp_loc[center][k] + pseudo_n_k[k][center] = n + pseudo_v_k[k][center] = v + pseudo_dz_k[k][center] = dz + + ezfio.set_pseudo_pseudo_n_k(pseudo_n_k) + ezfio.set_pseudo_pseudo_v_k(pseudo_v_k) + ezfio.set_pseudo_pseudo_dz_k(pseudo_dz_k) + + for center in ecp_nl: + for l in range( len(ecp_nl[center]) ): + for k in range( len(ecp_nl[center][l]) ): + v, n, dz = ecp_nl[center][l][k] + pseudo_n_kl[l][k][center] = n + pseudo_v_kl[l][k][center] = v + pseudo_dz_kl[l][k][center] = dz + + ezfio.set_pseudo_pseudo_n_kl(pseudo_n_kl) + ezfio.set_pseudo_pseudo_v_kl(pseudo_v_kl) + ezfio.set_pseudo_pseudo_dz_kl(pseudo_dz_kl) + + + print("OK") + + + + +def get_full_path(file_path): + file_path = os.path.expanduser(file_path) + file_path = os.path.expandvars(file_path) + return file_path + + +if __name__ == '__main__': + ARGUMENTS = docopt(__doc__) + + FILE = get_full_path(ARGUMENTS['FILE']) + trexio_filename = FILE + + if ARGUMENTS["--output"]: + EZFIO_FILE = get_full_path(ARGUMENTS["--output"]) + else: + EZFIO_FILE = "{0}.ezfio".format(FILE) + + write_ezfio(trexio_filename, EZFIO_FILE) + sys.stdout.flush() + diff --git a/src/trexio/trexio_file.irp.f b/src/trexio/trexio_file.irp.f new file mode 100644 index 00000000..c9897748 --- /dev/null +++ b/src/trexio/trexio_file.irp.f @@ -0,0 +1,20 @@ +BEGIN_PROVIDER [ character*(1024), trexio_filename ] + implicit none + BEGIN_DOC + ! Name of the TREXIO file + END_DOC + character*(1024) :: prefix + + trexio_filename = trexio_file + + if (trexio_file == 'None') then + prefix = trim(ezfio_work_dir)//trim(ezfio_filename) + if (backend == 0) then + trexio_filename = trim(prefix)//'.h5' + else if (backend == 1) then + trexio_filename = trim(prefix) + endif + endif +END_PROVIDER + + diff --git a/src/trexio/trexio_module.F90 b/src/trexio/trexio_module.F90 new file mode 100644 index 00000000..acd08492 --- /dev/null +++ b/src/trexio/trexio_module.F90 @@ -0,0 +1 @@ +#include "trexio_f.f90" From a2627e79255f3b490beca663ae1f1464e76ab96d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 12 May 2023 16:04:45 +0200 Subject: [PATCH 089/337] Introduced TREXIO in QP --- configure | 2 +- scripts/compilation/qp_create_ninja | 2 +- src/trexio/export_trexio_routines.irp.f | 223 +++++++++++++----------- src/two_body_rdm/two_e_dm_mo.irp.f | 1 - 4 files changed, 123 insertions(+), 105 deletions(-) diff --git a/configure b/configure index 4dd753ff..66bc9419 100755 --- a/configure +++ b/configure @@ -9,7 +9,7 @@ echo "QP_ROOT="$QP_ROOT unset CC unset CCXX -TREXIO_VERSION=2.3.1 +TREXIO_VERSION=2.3.2 # Force GCC instead of ICC for dependencies export CC=gcc diff --git a/scripts/compilation/qp_create_ninja b/scripts/compilation/qp_create_ninja index 27b34901..606fd0f6 100755 --- a/scripts/compilation/qp_create_ninja +++ b/scripts/compilation/qp_create_ninja @@ -38,7 +38,7 @@ def comp_path(path): from qp_path import QP_ROOT, QP_SRC, QP_EZFIO -LIB = " -lz" +LIB = " -lz -ltrexio" EZFIO_LIB = join("$QP_ROOT", "lib", "libezfio_irp.a") ZMQ_LIB = join("$QP_ROOT", "lib", "libf77zmq.a") + " " + join("$QP_ROOT", "lib", "libzmq.a") + " -lstdc++ -lrt -ldl" ROOT_BUILD_NINJA = join("$QP_ROOT", "config", "build.ninja") diff --git a/src/trexio/export_trexio_routines.irp.f b/src/trexio/export_trexio_routines.irp.f index d69e7a70..c55ddc5e 100644 --- a/src/trexio/export_trexio_routines.irp.f +++ b/src/trexio/export_trexio_routines.irp.f @@ -5,24 +5,35 @@ subroutine export_trexio ! Exports the wave function in TREXIO format END_DOC - integer(trexio_t) :: f ! TREXIO file handle + integer(trexio_t) :: f(N_states) ! TREXIO file handle integer(trexio_exit_code) :: rc + integer :: k double precision, allocatable :: factor(:) + character*(256) :: filenames(N_states) - print *, 'TREXIO file : '//trim(trexio_filename) + filenames(1) = trexio_filename + do k=2,N_states + write(filenames(k),'(A,I3.3)') trim(trexio_filename)//'.', k-1 + enddo + + do k=1,N_states + print *, 'TREXIO file : ', trim(filenames(k)) + call system('test -f '//trim(filenames(k))//' && mv '//trim(filenames(k))//' '//trim(filenames(k))//'.bak') + enddo print *, '' - call system('cp '//trim(trexio_filename)//' '//trim(trexio_filename)//'.bak') - if (backend == 0) then - f = trexio_open(trexio_filename, 'u', TREXIO_HDF5, rc) - else if (backend == 1) then - f = trexio_open(trexio_filename, 'u', TREXIO_TEXT, rc) - endif - if (f == 0_8) then - print *, 'Unable to open TREXIO file for writing' - print *, 'rc = ', rc - stop -1 - endif + do k=1,N_states + if (backend == 0) then + f(k) = trexio_open(filenames(k), 'u', TREXIO_HDF5, rc) + else if (backend == 1) then + f(k) = trexio_open(filenames(k), 'u', TREXIO_TEXT, rc) + endif + if (f(k) == 0_8) then + print *, 'Unable to open TREXIO file for writing' + print *, 'rc = ', rc + stop -1 + endif + enddo call ezfio_set_trexio_trexio_file(trexio_filename) ! ------------------------------------------------------------------------------ @@ -32,10 +43,10 @@ subroutine export_trexio print *, 'Electrons' - rc = trexio_write_electron_up_num(f, elec_alpha_num) + rc = trexio_write_electron_up_num(f(1), elec_alpha_num) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_electron_dn_num(f, elec_beta_num) + rc = trexio_write_electron_dn_num(f(1), elec_beta_num) call trexio_assert(rc, TREXIO_SUCCESS) @@ -44,19 +55,19 @@ subroutine export_trexio print *, 'Nuclei' - rc = trexio_write_nucleus_num(f, nucl_num) + rc = trexio_write_nucleus_num(f(1), nucl_num) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_nucleus_charge(f, nucl_charge) + rc = trexio_write_nucleus_charge(f(1), nucl_charge) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_nucleus_coord(f, nucl_coord_transp) + rc = trexio_write_nucleus_coord(f(1), nucl_coord_transp) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_nucleus_label(f, nucl_label, 32) + rc = trexio_write_nucleus_label(f(1), nucl_label, 32) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_nucleus_repulsion(f, nuclear_repulsion) + rc = trexio_write_nucleus_repulsion(f(1), nuclear_repulsion) call trexio_assert(rc, TREXIO_SUCCESS) @@ -133,28 +144,28 @@ subroutine export_trexio lmax(:) = lmax(:)+1 - rc = trexio_write_ecp_max_ang_mom_plus_1(f, lmax) + rc = trexio_write_ecp_max_ang_mom_plus_1(f(1), lmax) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ecp_z_core(f, int(nucl_charge_remove)) + rc = trexio_write_ecp_z_core(f(1), int(nucl_charge_remove)) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ecp_num(f, num) + rc = trexio_write_ecp_num(f(1), num) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ecp_ang_mom(f, ang_mom) + rc = trexio_write_ecp_ang_mom(f(1), ang_mom) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ecp_nucleus_index(f, nucleus_index) + rc = trexio_write_ecp_nucleus_index(f(1), nucleus_index) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ecp_exponent(f, exponent) + rc = trexio_write_ecp_exponent(f(1), exponent) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ecp_coefficient(f, coefficient) + rc = trexio_write_ecp_coefficient(f(1), coefficient) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ecp_power(f, power) + rc = trexio_write_ecp_power(f(1), power) call trexio_assert(rc, TREXIO_SUCCESS) endif @@ -166,19 +177,19 @@ subroutine export_trexio print *, 'Basis' - rc = trexio_write_basis_type(f, 'Gaussian', len('Gaussian')) + rc = trexio_write_basis_type(f(1), 'Gaussian', len('Gaussian')) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_prim_num(f, prim_num) + rc = trexio_write_basis_prim_num(f(1), prim_num) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_shell_num(f, shell_num) + rc = trexio_write_basis_shell_num(f(1), shell_num) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_nucleus_index(f, basis_nucleus_index) + rc = trexio_write_basis_nucleus_index(f(1), basis_nucleus_index) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_shell_ang_mom(f, shell_ang_mom) + rc = trexio_write_basis_shell_ang_mom(f(1), shell_ang_mom) call trexio_assert(rc, TREXIO_SUCCESS) allocate(factor(shell_num)) @@ -187,18 +198,18 @@ subroutine export_trexio else factor(1:shell_num) = 1.d0 endif - rc = trexio_write_basis_shell_factor(f, factor) + rc = trexio_write_basis_shell_factor(f(1), factor) call trexio_assert(rc, TREXIO_SUCCESS) deallocate(factor) - rc = trexio_write_basis_shell_index(f, shell_index) + rc = trexio_write_basis_shell_index(f(1), shell_index) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_exponent(f, prim_expo) + rc = trexio_write_basis_exponent(f(1), prim_expo) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_coefficient(f, prim_coef) + rc = trexio_write_basis_coefficient(f(1), prim_coef) call trexio_assert(rc, TREXIO_SUCCESS) allocate(factor(prim_num)) @@ -207,7 +218,7 @@ subroutine export_trexio else factor(1:prim_num) = 1.d0 endif - rc = trexio_write_basis_prim_factor(f, factor) + rc = trexio_write_basis_prim_factor(f(1), factor) call trexio_assert(rc, TREXIO_SUCCESS) deallocate(factor) @@ -217,16 +228,16 @@ subroutine export_trexio print *, 'AOs' - rc = trexio_write_ao_num(f, ao_num) + rc = trexio_write_ao_num(f(1), ao_num) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ao_cartesian(f, 1) + rc = trexio_write_ao_cartesian(f(1), 1) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ao_shell(f, ao_shell) + rc = trexio_write_ao_shell(f(1), ao_shell) call trexio_assert(rc, TREXIO_SUCCESS) - integer :: i, pow0(3), powA(3), j, k, l, nz + integer :: i, pow0(3), powA(3), j, l, nz double precision :: normA, norm0, C_A(3), overlap_x, overlap_z, overlap_y, c nz=100 @@ -243,7 +254,7 @@ subroutine export_trexio else factor(:) = 1.d0 endif - rc = trexio_write_ao_normalization(f, factor) + rc = trexio_write_ao_normalization(f(1), factor) call trexio_assert(rc, TREXIO_SUCCESS) deallocate(factor) @@ -253,21 +264,21 @@ subroutine export_trexio if (export_ao_one_e_ints) then print *, 'AO one-e integrals' - rc = trexio_write_ao_1e_int_overlap(f,ao_overlap) + rc = trexio_write_ao_1e_int_overlap(f(1),ao_overlap) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ao_1e_int_kinetic(f,ao_kinetic_integrals) + rc = trexio_write_ao_1e_int_kinetic(f(1),ao_kinetic_integrals) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ao_1e_int_potential_n_e(f,ao_integrals_n_e) + rc = trexio_write_ao_1e_int_potential_n_e(f(1),ao_integrals_n_e) call trexio_assert(rc, TREXIO_SUCCESS) if (do_pseudo) then - rc = trexio_write_ao_1e_int_ecp(f, ao_pseudo_integrals_local + ao_pseudo_integrals_non_local) + rc = trexio_write_ao_1e_int_ecp(f(1), ao_pseudo_integrals_local + ao_pseudo_integrals_non_local) call trexio_assert(rc, TREXIO_SUCCESS) endif - rc = trexio_write_ao_1e_int_core_hamiltonian(f,ao_one_e_integrals) + rc = trexio_write_ao_1e_int_core_hamiltonian(f(1),ao_one_e_integrals) call trexio_assert(rc, TREXIO_SUCCESS) end if @@ -303,7 +314,7 @@ subroutine export_trexio eri_index(3,icount) = k eri_index(4,icount) = l if (icount == BUFSIZE) then - rc = trexio_write_ao_2e_int_eri(f, offset, icount, eri_index, eri_buffer) + rc = trexio_write_ao_2e_int_eri(f(1), offset, icount, eri_index, eri_buffer) call trexio_assert(rc, TREXIO_SUCCESS) offset += icount icount = 0_8 @@ -314,7 +325,7 @@ subroutine export_trexio end do if (icount >= 0_8) then - rc = trexio_write_ao_2e_int_eri(f, offset, icount, eri_index, eri_buffer) + rc = trexio_write_ao_2e_int_eri(f(1), offset, icount, eri_index, eri_buffer) call trexio_assert(rc, TREXIO_SUCCESS) end if end if @@ -328,7 +339,7 @@ subroutine export_trexio if (export_ao_two_e_ints_cholesky) then print *, 'AO two-e integrals Cholesky' - rc = trexio_write_ao_2e_int_eri_cholesky_num(f, cholesky_ao_num) + rc = trexio_write_ao_2e_int_eri_cholesky_num(f(1), cholesky_ao_num) call trexio_assert(rc, TREXIO_SUCCESS) icount = 0_8 @@ -344,7 +355,7 @@ subroutine export_trexio chol_index(2,icount) = j chol_index(3,icount) = k if (icount == BUFSIZE) then - rc = trexio_write_ao_2e_int_eri_cholesky(f, offset, icount, chol_index, chol_buffer) + rc = trexio_write_ao_2e_int_eri_cholesky(f(1), offset, icount, chol_index, chol_buffer) call trexio_assert(rc, TREXIO_SUCCESS) offset += icount icount = 0_8 @@ -354,7 +365,7 @@ subroutine export_trexio end do if (icount > 0_8) then - rc = trexio_write_ao_2e_int_eri_cholesky(f, offset, icount, chol_index, chol_buffer) + rc = trexio_write_ao_2e_int_eri_cholesky(f(1), offset, icount, chol_index, chol_buffer) call trexio_assert(rc, TREXIO_SUCCESS) end if end if @@ -366,22 +377,24 @@ subroutine export_trexio print *, 'MOs' - rc = trexio_write_mo_type(f, mo_label, len(trim(mo_label))) + rc = trexio_write_mo_type(f(1), mo_label, len(trim(mo_label))) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_mo_num(f, mo_num) - call trexio_assert(rc, TREXIO_SUCCESS) + do k=1,N_states + rc = trexio_write_mo_num(f(k), mo_num) + call trexio_assert(rc, TREXIO_SUCCESS) + enddo - rc = trexio_write_mo_coefficient(f, mo_coef) + rc = trexio_write_mo_coefficient(f(1), mo_coef) call trexio_assert(rc, TREXIO_SUCCESS) if ( (trim(mo_label) == 'Canonical').and. & (export_mo_two_e_ints_cholesky.or.export_mo_two_e_ints) ) then - rc = trexio_write_mo_energy(f, fock_matrix_diag_mo) + rc = trexio_write_mo_energy(f(1), fock_matrix_diag_mo) call trexio_assert(rc, TREXIO_SUCCESS) endif - rc = trexio_write_mo_class(f, mo_class, len(mo_class(1))) + rc = trexio_write_mo_class(f(1), mo_class, len(mo_class(1))) call trexio_assert(rc, TREXIO_SUCCESS) ! One-e MO integrals @@ -390,18 +403,18 @@ subroutine export_trexio if (export_mo_one_e_ints) then print *, 'MO one-e integrals' - rc = trexio_write_mo_1e_int_kinetic(f,mo_kinetic_integrals) + rc = trexio_write_mo_1e_int_kinetic(f(1),mo_kinetic_integrals) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_mo_1e_int_potential_n_e(f,mo_integrals_n_e) + rc = trexio_write_mo_1e_int_potential_n_e(f(1),mo_integrals_n_e) call trexio_assert(rc, TREXIO_SUCCESS) if (do_pseudo) then - rc = trexio_write_mo_1e_int_ecp(f,mo_pseudo_integrals_local) + rc = trexio_write_mo_1e_int_ecp(f(1),mo_pseudo_integrals_local) call trexio_assert(rc, TREXIO_SUCCESS) endif - rc = trexio_write_mo_1e_int_core_hamiltonian(f,mo_one_e_integrals) + rc = trexio_write_mo_1e_int_core_hamiltonian(f(1),mo_one_e_integrals) call trexio_assert(rc, TREXIO_SUCCESS) end if @@ -432,7 +445,7 @@ subroutine export_trexio eri_index(3,icount) = k eri_index(4,icount) = l if (icount == BUFSIZE) then - rc = trexio_write_mo_2e_int_eri(f, offset, icount, eri_index, eri_buffer) + rc = trexio_write_mo_2e_int_eri(f(1), offset, icount, eri_index, eri_buffer) call trexio_assert(rc, TREXIO_SUCCESS) offset += icount icount = 0_8 @@ -443,7 +456,7 @@ subroutine export_trexio end do if (icount > 0_8) then - rc = trexio_write_mo_2e_int_eri(f, offset, icount, eri_index, eri_buffer) + rc = trexio_write_mo_2e_int_eri(f(1), offset, icount, eri_index, eri_buffer) call trexio_assert(rc, TREXIO_SUCCESS) end if end if @@ -454,7 +467,7 @@ subroutine export_trexio if (export_mo_two_e_ints_cholesky) then print *, 'MO two-e integrals Cholesky' - rc = trexio_write_mo_2e_int_eri_cholesky_num(f, cholesky_ao_num) + rc = trexio_write_mo_2e_int_eri_cholesky_num(f(1), cholesky_ao_num) call trexio_assert(rc, TREXIO_SUCCESS) icount = 0_8 @@ -470,7 +483,7 @@ subroutine export_trexio chol_index(2,icount) = j chol_index(3,icount) = k if (icount == BUFSIZE) then - rc = trexio_write_mo_2e_int_eri_cholesky(f, offset, icount, chol_index, chol_buffer) + rc = trexio_write_mo_2e_int_eri_cholesky(f(1), offset, icount, chol_index, chol_buffer) call trexio_assert(rc, TREXIO_SUCCESS) offset += icount icount = 0_8 @@ -480,7 +493,7 @@ subroutine export_trexio end do if (icount > 0_8) then - rc = trexio_write_mo_2e_int_eri_cholesky(f, offset, icount, chol_index, chol_buffer) + rc = trexio_write_mo_2e_int_eri_cholesky(f(1), offset, icount, chol_index, chol_buffer) call trexio_assert(rc, TREXIO_SUCCESS) end if end if @@ -489,14 +502,16 @@ subroutine export_trexio ! One-e RDM ! --------- - rc = trexio_write_rdm_1e(f,one_e_dm_mo) - call trexio_assert(rc, TREXIO_SUCCESS) + do k=1,N_states + rc = trexio_write_rdm_1e(f(k),one_e_dm_mo_alpha(:,:,k) + one_e_dm_mo_beta(:,:,k)) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_rdm_1e_up(f,one_e_dm_mo_alpha_average) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_rdm_1e_up(f(k),one_e_dm_mo_alpha(:,:,k)) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_rdm_1e_dn(f,one_e_dm_mo_beta_average) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_rdm_1e_dn(f(k),one_e_dm_mo_beta(:,:,k)) + call trexio_assert(rc, TREXIO_SUCCESS) + enddo ! Two-e RDM @@ -521,7 +536,7 @@ subroutine export_trexio eri_index(3,icount) = k eri_index(4,icount) = l if (icount == BUFSIZE) then - rc = trexio_write_rdm_2e(f, offset, icount, eri_index, eri_buffer) + rc = trexio_write_rdm_2e(f(1), offset, icount, eri_index, eri_buffer) call trexio_assert(rc, TREXIO_SUCCESS) offset += icount icount = 0_8 @@ -532,7 +547,7 @@ subroutine export_trexio end do if (icount >= 0_8) then - rc = trexio_write_rdm_2e(f, offset, icount, eri_index, eri_buffer) + rc = trexio_write_rdm_2e(f(1), offset, icount, eri_index, eri_buffer) call trexio_assert(rc, TREXIO_SUCCESS) end if end if @@ -547,56 +562,60 @@ subroutine export_trexio double precision, allocatable :: coef_buffer(:,:) integer :: nint -! rc = trexio_read_determinant_int64_num(f, nint) -! call trexio_assert(rc, TREXIO_SUCCESS) - nint = N_int + rc = trexio_get_int64_num(f(1), nint) + call trexio_assert(rc, TREXIO_SUCCESS) +! nint = N_int if (nint /= N_int) then stop 'Problem with N_int' endif allocate ( det_buffer(nint, 2, BUFSIZE), coef_buffer(BUFSIZE, n_states) ) - icount = 0_8 - offset = 0_8 - rc = trexio_write_state_num(f, n_states) - call trexio_assert(rc, TREXIO_SUCCESS) + do k=1, N_states + icount = 0_8 + offset = 0_8 + rc = trexio_write_state_num(f(k), n_states) + call trexio_assert(rc, TREXIO_SUCCESS) + +! Will need to be updated with TREXIO 2.4 +! rc = trexio_write_state_id(f(k), k-1) + rc = trexio_write_state_id(f(k), k) + call trexio_assert(rc, TREXIO_SUCCESS) + + rc = trexio_write_state_file_name(f(k), filenames, len(filenames(1))) + call trexio_assert(rc, TREXIO_SUCCESS) + enddo - rc = trexio_set_state (f, 0) - call trexio_assert(rc, TREXIO_SUCCESS) do k=1,n_det icount += 1_8 det_buffer(1:nint, 1:2, icount) = psi_det(1:N_int, 1:2, k) coef_buffer(icount,1:N_states) = psi_coef(k,1:N_states) if (icount == BUFSIZE) then - call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_determinant_list(f, offset, icount, det_buffer) - call trexio_assert(rc, TREXIO_SUCCESS) do i=1,N_states - rc = trexio_set_state (f, i-1) + rc = trexio_write_determinant_list(f(i), offset, icount, det_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_determinant_coefficient(f(i), offset, icount, coef_buffer(1,i)) call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_determinant_coefficient(f, offset, icount, coef_buffer(1,i)) end do - rc = trexio_set_state (f, 0) offset += icount icount = 0_8 end if end do if (icount >= 0_8) then - call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_determinant_list(f, offset, icount, det_buffer) - call trexio_assert(rc, TREXIO_SUCCESS) - do i=1,N_states - rc = trexio_set_state (f, i-1) - call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_determinant_coefficient(f, offset, icount, coef_buffer(1,i)) - end do - rc = trexio_set_state (f, 0) + do i=1,N_states + rc = trexio_write_determinant_list(f(i), offset, icount, det_buffer) + call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_determinant_coefficient(f(i), offset, icount, coef_buffer(1,i)) + call trexio_assert(rc, TREXIO_SUCCESS) + end do end if deallocate ( det_buffer, coef_buffer ) - rc = trexio_close(f) - call trexio_assert(rc, TREXIO_SUCCESS) + do k=1,N_states + rc = trexio_close(f(k)) + call trexio_assert(rc, TREXIO_SUCCESS) + enddo end diff --git a/src/two_body_rdm/two_e_dm_mo.irp.f b/src/two_body_rdm/two_e_dm_mo.irp.f index 6bd115a2..99be1f54 100644 --- a/src/two_body_rdm/two_e_dm_mo.irp.f +++ b/src/two_body_rdm/two_e_dm_mo.irp.f @@ -30,7 +30,6 @@ BEGIN_PROVIDER [double precision, two_e_dm_mo, (mo_num,mo_num,mo_num,mo_num)] enddo enddo two_e_dm_mo(:,:,:,:) = two_e_dm_mo(:,:,:,:) -! * 2.d0 END_PROVIDER From 3aae1dbf77f20ef9a2e46adb6462beabca8ab8ee Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 12 May 2023 16:06:07 +0200 Subject: [PATCH 090/337] fix completion in qp set_file --- etc/qp.rc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/qp.rc b/etc/qp.rc index d339f475..9eec4570 100644 --- a/etc/qp.rc +++ b/etc/qp.rc @@ -188,7 +188,7 @@ _qp_Complete() ;; esac;; set_file) - COMPREPLY=( $(compgen -W "$(for i in $(find . -name ezfio | sed 's/ezfio$/.version/') ; do [[ -f $i ]] && echo ${i%/.version} ; done)" -- ${cur} ) ) + COMPREPLY=( $(compgen -W "$(for i in */ $(find . -name ezfio | sed 's/ezfio$/.version/') ; do [[ -f $i ]] && echo ${i%/.version} ; done)" -- ${cur} ) ) return 0 ;; plugins) From 0fa576f90930bcf0ffc7a933a42d5667071ad3cd Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 12 May 2023 16:51:17 +0200 Subject: [PATCH 091/337] Accelerated (T) --- src/ccsd/ccsd_t_space_orb.irp.f | 244 ++++++++++++++++++++++---------- 1 file changed, 172 insertions(+), 72 deletions(-) diff --git a/src/ccsd/ccsd_t_space_orb.irp.f b/src/ccsd/ccsd_t_space_orb.irp.f index 1f1db87e..24b86972 100644 --- a/src/ccsd/ccsd_t_space_orb.irp.f +++ b/src/ccsd/ccsd_t_space_orb.irp.f @@ -8,15 +8,15 @@ subroutine ccsd_par_t_space(nO,nV,t1,t2,energy) double precision, intent(in) :: t1(nO, nV) double precision, intent(in) :: t2(nO, nO, nV, nV) double precision, intent(out) :: energy - + double precision, allocatable :: W(:,:,:,:,:,:) double precision, allocatable :: V(:,:,:,:,:,:) integer :: i,j,k,a,b,c - + allocate(W(nO,nO,nO,nV,nV,nV)) allocate(V(nO,nO,nO,nV,nV,nV)) - call form_w(nO,nV,t2,W) + call form_w(nO,nV,t2,W) call form_v(nO,nV,t1,W,V) energy = 0d0 @@ -33,9 +33,9 @@ subroutine ccsd_par_t_space(nO,nV,t1,t2,energy) enddo enddo enddo - + energy = energy / 3d0 - + deallocate(V,W) end @@ -46,7 +46,7 @@ subroutine form_w(nO,nV,t2,W) integer, intent(in) :: nO,nV double precision, intent(in) :: t2(nO, nO, nV, nV) double precision, intent(out) :: W(nO, nO, nO, nV, nV, nV) - + integer :: i,j,k,l,a,b,c,d W = 0d0 @@ -133,7 +133,7 @@ subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) double precision, intent(in) :: t2(nO,nO,nV,nV) double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) double precision, intent(out) :: energy - + double precision, allocatable :: W(:,:,:,:,:,:) double precision, allocatable :: V(:,:,:,:,:,:) double precision, allocatable :: W_ijk(:,:,:), V_ijk(:,:,:) @@ -141,7 +141,7 @@ subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) double precision, allocatable :: T_vvoo(:,:,:,:), T_ovvo(:,:,:,:), T_vo(:,:) integer :: i,j,k,l,a,b,c,d double precision :: e,ta,tb, delta, delta_ijk - + !allocate(W(nV,nV,nV,nO,nO,nO)) !allocate(V(nV,nV,nV,nO,nO,nO)) allocate(W_ijk(nV,nV,nV), V_ijk(nV,nV,nV)) @@ -154,10 +154,10 @@ subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) & !$OMP PRIVATE(a,b,c,d,i,j,k,l) & !$OMP DEFAULT(NONE) - + !v_vvvo(b,a,d,i) * t2(k,j,c,d) & !X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) - + !$OMP DO collapse(3) do i = 1, nO do a = 1, nV @@ -181,7 +181,7 @@ subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) enddo enddo !$OMP END DO nowait - + !v_vooo(c,j,k,l) * t2(i,l,a,b) & !X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) & @@ -208,10 +208,10 @@ subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) enddo enddo !$OMP END DO nowait - + !v_vvoo(b,c,j,k) * t1(i,a) & !X_vvoo(b,c,k,j) * T1_vo(a,i) & - + !$OMP DO collapse(3) do j = 1, nO do k = 1, nO @@ -267,7 +267,7 @@ subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) call wall_time(tb) write(*,'(F12.2,A5,F12.2,A2)') dble(i)/dble(nO)*100d0, '% in ', tb - ta, ' s' enddo - + energy = energy / 3d0 deallocate(W_ijk,V_ijk,X_vvvo,X_ovoo,T_vvoo,T_ovvo,T_vo) @@ -285,78 +285,178 @@ subroutine form_w_ijk(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W) double precision, intent(in) :: T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO) double precision, intent(in) :: X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO) double precision, intent(out) :: W(nV,nV,nV)!,nO,nO,nO) - + integer :: l,a,b,c,d + double precision, allocatable, dimension(:,:,:) :: X, Y, Z !W = 0d0 !do i = 1, nO ! do j = 1, nO ! do k = 1, nO - !$OMP PARALLEL & - !$OMP SHARED(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W) & - !$OMP PRIVATE(a,b,c,d,l) & - !$OMP DEFAULT(NONE) - !$OMP DO collapse(2) - do c = 1, nV - do b = 1, nV - do a = 1, nV - W(a,b,c) = 0d0 + allocate(X(nV,nV,nV)) + allocate(Y(nV,nV,nV)) + allocate(Z(nV,nV,nV)) - do d = 1, nV - !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & - W(a,b,c) = W(a,b,c) & - ! chem (bd|ai) - ! phys - !+ cc_space_v_vvvo(b,a,d,i) * t2(k,j,c,d) & - !+ cc_space_v_vvvo(c,a,d,i) * t2(j,k,b,d) & ! bc kj - !+ cc_space_v_vvvo(a,c,d,k) * t2(j,i,b,d) & ! prev ac ik - !+ cc_space_v_vvvo(b,c,d,k) * t2(i,j,a,d) & ! prev ab ij - !+ cc_space_v_vvvo(c,b,d,j) * t2(i,k,a,d) & ! prev bc kj - !+ cc_space_v_vvvo(a,b,d,j) * t2(k,i,c,d) ! prev ac ik - + X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) & - + X_vvvo(d,c,a,i) * T_vvoo(d,b,j,k) & ! bc kj - + X_vvvo(d,a,c,k) * T_vvoo(d,b,j,i) & ! prev ac ik - + X_vvvo(d,b,c,k) * T_vvoo(d,a,i,j) & ! prev ab ij - + X_vvvo(d,c,b,j) * T_vvoo(d,a,i,k) & ! prev bc kj - + X_vvvo(d,a,b,j) * T_vvoo(d,c,k,i) ! prev ac ik - enddo - + !$OMP PARALLEL DO + do b = 1, nV + do a = 1, nV + do d = 1, nV + Z(d,a,b) = X_vvvo(d,b,a,i) enddo enddo enddo - !$OMP END DO nowait + !$OMP END PARALLEL DO - !$OMP DO collapse(2) + call dgemm('T','N',nV*nV,nV,nV, 1.d0, & + Z, nV, T_vvoo(1,1,k,j), nV, 0.d0, W, nV*nV) + + !$OMP PARALLEL DO do c = 1, nV - do b = 1, nV - do a = 1, nV - - do l = 1, nO - !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & - W(a,b,c) = W(a,b,c) & - ! chem (ck|jl) - ! phys - !- cc_space_v_vooo(c,j,k,l) * t2(i,l,a,b) & - !- cc_space_v_vooo(b,k,j,l) * t2(i,l,a,c) & ! bc kj - !- cc_space_v_vooo(b,i,j,l) * t2(k,l,c,a) & ! prev ac ik - !- cc_space_v_vooo(a,j,i,l) * t2(k,l,c,b) & ! prev ab ij - !- cc_space_v_vooo(a,k,i,l) * t2(j,l,b,c) & ! prev bc kj - !- cc_space_v_vooo(c,i,k,l) * t2(j,l,b,a) ! prev ac ik - - X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) & - - X_ovoo(l,b,k,j) * T_ovvo(l,a,c,i) & ! bc kj - - X_ovoo(l,b,i,j) * T_ovvo(l,c,a,k) & ! prev ac ik - - X_ovoo(l,a,j,i) * T_ovvo(l,c,b,k) & ! prev ab ij - - X_ovoo(l,a,k,i) * T_ovvo(l,b,c,j) & ! prev bc kj - - X_ovoo(l,c,i,k) * T_ovvo(l,b,a,j) ! prev ac ik - enddo - + do a = 1, nV + do d = 1, nV + Z(d,a,c) = X_vvvo(d,c,a,i) enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL - + !$OMP END PARALLEL DO + + call dgemm('T','N',nV*nV,nV,nV, 1.d0, & + Z, nV, T_vvoo(1,1,j,k), nV, 0.d0, Y, nV*nV) + + call dgemm('T','N',nV*nV,nV,nV, 1.d0, & + X_vvvo(1,1,1,k), nV, T_vvoo(1,1,j,i), nV, 1.d0, Y, nV*nV) + + call dgemm('T','N',nV,nV*nV,nV, 1.d0, & + T_vvoo(1,1,i,j), nV, X_vvvo(1,1,1,k), nV, 1.d0, W, nV) + + call dgemm('T','N',nV,nV*nV,nV, 1.d0, & + T_vvoo(1,1,i,k), nV, X_vvvo(1,1,1,j), nV, 1.d0, Y, nV) + + call dgemm('T','N',nV*nV,nV,nV, 1.d0, & + X_vvvo(1,1,1,j), nV, T_vvoo(1,1,k,i), nV, 1.d0, W, nV*nV) + + deallocate(Z) + + + allocate(Z(nO,nV,nV)) + + call dgemm('T','N',nV*nV,nV,nO, -1.d0, & + T_ovvo(1,1,1,i), nO, X_ovoo(1,1,j,k), nO, 1.d0, W, nV*nV) + + call dgemm('T','N',nV*nV,nV,nO, -1.d0, & + T_ovvo(1,1,1,i), nO, X_ovoo(1,1,k,j), nO, 1.d0, Y, nV*nV) + + !$OMP PARALLEL DO + do c = 1, nV + do a = 1, nV + do l = 1, nO + Z(l,a,c) = T_ovvo(l,c,a,k) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm('T','N',nV*nV,nV,nO, -1.d0, & + Z, nO, X_ovoo(1,1,i,j), nO, 1.d0, Y, nV*nV) + + call dgemm('T','N',nV,nV*nV,nO, -1.d0, & + X_ovoo(1,1,j,i), nO, T_ovvo(1,1,1,k), nO, 1.d0, Y, nV) + + call dgemm('T','N',nV,nV*nV,nO, -1.d0, & + X_ovoo(1,1,k,i), nO, T_ovvo(1,1,1,j), nO, 1.d0, W, nV) + + !$OMP PARALLEL DO + do b = 1, nV + do a = 1, nV + do l = 1, nO + Z(l,a,b) = T_ovvo(l,b,a,j) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm('T','N',nV*nV,nV,nO, -1.d0, & + Z, nO, X_ovoo(1,1,i,k), nO, 1.d0, W, nV*nV) + + !$OMP PARALLEL DO + do c = 1, nV + do b = 1, nV + do a = 1, nV + W(a,b,c) = W(a,b,c) + Y(a,c,b) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(X,Y,Z) + + +! !$OMP PARALLEL & +! !$OMP SHARED(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W) & +! !$OMP PRIVATE(a,b,c,d,l) & +! !$OMP DEFAULT(NONE) +! +! !$OMP DO collapse(2) +! do c = 1, nV +! do b = 1, nV +! do a = 1, nV +! W(a,b,c) = 0.d0 +! +! do d = 1, nV +! !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & +! W(a,b,c) = W(a,b,c) & +! ! chem (bd|ai) +! ! phys +! !+ cc_space_v_vvvo(b,a,d,i) * t2(k,j,c,d) & +! !+ cc_space_v_vvvo(c,a,d,i) * t2(j,k,b,d) & ! bc kj +! !+ cc_space_v_vvvo(a,c,d,k) * t2(j,i,b,d) & ! prev ac ik +! !+ cc_space_v_vvvo(b,c,d,k) * t2(i,j,a,d) & ! prev ab ij +! !+ cc_space_v_vvvo(c,b,d,j) * t2(i,k,a,d) & ! prev bc kj +! !+ cc_space_v_vvvo(a,b,d,j) * t2(k,i,c,d) ! prev ac ik +! + X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) & +! + X_vvvo(d,c,a,i) * T_vvoo(d,b,j,k) & ! bc kj +! + X_vvvo(d,a,c,k) * T_vvoo(d,b,j,i) & ! prev ac ik +! + X_vvvo(d,b,c,k) * T_vvoo(d,a,i,j) & ! prev ab ij +! + X_vvvo(d,c,b,j) * T_vvoo(d,a,i,k) & ! prev bc kj +! + X_vvvo(d,a,b,j) * T_vvoo(d,c,k,i) ! prev ac ik +! enddo +! +! enddo +! enddo +! enddo +! !$OMP END DO nowait +! +! !$OMP DO collapse(2) +! do c = 1, nV +! do b = 1, nV +! do a = 1, nV +! +! do l = 1, nO +! !W(i,j,k,a,b,c) = W(i,j,k,a,b,c) & +! W(a,b,c) = W(a,b,c) & +! ! chem (ck|jl) +! ! phys +! !- cc_space_v_vooo(c,j,k,l) * t2(i,l,a,b) & +! !- cc_space_v_vooo(b,k,j,l) * t2(i,l,a,c) & ! bc kj +! !- cc_space_v_vooo(b,i,j,l) * t2(k,l,c,a) & ! prev ac ik +! !- cc_space_v_vooo(a,j,i,l) * t2(k,l,c,b) & ! prev ab ij +! !- cc_space_v_vooo(a,k,i,l) * t2(j,l,b,c) & ! prev bc kj +! !- cc_space_v_vooo(c,i,k,l) * t2(j,l,b,a) ! prev ac ik +! - T_ovvo(l,a,b,i) * X_ovoo(l,c,j,k) & +! - T_ovvo(l,a,c,i) * X_ovoo(l,b,k,j) & ! bc kj +! - T_ovvo(l,c,a,k) * X_ovoo(l,b,i,j) & ! prev ac ik +! - T_ovvo(l,c,b,k) * X_ovoo(l,a,j,i) & ! prev ab ij +! - T_ovvo(l,b,c,j) * X_ovoo(l,a,k,i) & ! prev bc kj +! - T_ovvo(l,b,a,j) * X_ovoo(l,c,i,k) ! prev ac ik +! enddo +! +! enddo +! enddo +! enddo +! !$OMP END DO +! !$OMP END PARALLEL + ! enddo ! enddo !enddo @@ -382,7 +482,7 @@ implicit none !do i = 1, nO ! do j = 1, nO ! do k = 1, nO - + !$OMP PARALLEL & !$OMP SHARED(nO,nV,i,j,k,T_vo,X_vvoo,W,V) & !$OMP PRIVATE(a,b,c) & @@ -404,7 +504,7 @@ implicit none enddo !$OMP END DO !$OMP END PARALLEL - + ! enddo ! enddo !enddo From 69a76c6dba05188e1856c0e195ada4daa25984f5 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 12 May 2023 19:17:07 +0200 Subject: [PATCH 092/337] Added W_abc routines for (T) --- src/ccsd/ccsd_t_space_orb.irp.f | 5 +- src/ccsd/ccsd_t_space_orb_abc.irp.f | 252 ++++++++++++++++++++++++++++ 2 files changed, 255 insertions(+), 2 deletions(-) create mode 100644 src/ccsd/ccsd_t_space_orb_abc.irp.f diff --git a/src/ccsd/ccsd_t_space_orb.irp.f b/src/ccsd/ccsd_t_space_orb.irp.f index 24b86972..37f2b484 100644 --- a/src/ccsd/ccsd_t_space_orb.irp.f +++ b/src/ccsd/ccsd_t_space_orb.irp.f @@ -257,7 +257,7 @@ subroutine ccsd_par_t_space_v2(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) enddo enddo enddo - !$OMP END DO + !$OMP END DO NOWAIT !$OMP CRITICAL energy = energy + e !$OMP END CRITICAL @@ -426,7 +426,7 @@ subroutine form_w_ijk(nO,nV,i,j,k,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W) ! enddo ! enddo ! !$OMP END DO nowait -! +! ! !$OMP DO collapse(2) ! do c = 1, nV ! do b = 1, nV @@ -510,3 +510,4 @@ implicit none !enddo end + diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f new file mode 100644 index 00000000..3b762a06 --- /dev/null +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -0,0 +1,252 @@ +! Main + +subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: t2(nO,nO,nV,nV) + double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) + double precision, intent(out) :: energy + + double precision, allocatable :: W(:,:,:,:,:,:) + double precision, allocatable :: V(:,:,:,:,:,:) + double precision, allocatable :: W_abc(:,:,:), V_abc(:,:,:) + double precision, allocatable :: W_cab(:,:,:), W_cba(:,:,:) + double precision, allocatable :: W_bca(:,:,:), V_cba(:,:,:) + double precision, allocatable :: X_vvvo(:,:,:,:), X_ovoo(:,:,:,:), X_vvoo(:,:,:,:) + double precision, allocatable :: T_vvoo(:,:,:,:), T_ovvo(:,:,:,:), T_vo(:,:) + integer :: i,j,k,l,a,b,c,d + double precision :: e,ta,tb, delta, delta_abc + + !allocate(W(nV,nV,nV,nO,nO,nO)) + !allocate(V(nV,nV,nV,nO,nO,nO)) + allocate(W_abc(nO,nO,nO), V_abc(nO,nO,nO), W_cab(nO,nO,nO)) + allocate(W_bca(nO,nO,nO), V_cba(nO,nO,nO), W_cba(nO,nO,nO)) + allocate(X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO), X_vvoo(nV,nV,nO,nO)) + allocate(T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO), T_vo(nV,nO)) + + ! Temporary arrays + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,T_vvoo,T_ovvo,T_vo,X_vvvo,X_ovoo,X_vvoo, & + !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) & + !$OMP PRIVATE(a,b,c,d,i,j,k,l) & + !$OMP DEFAULT(NONE) + + !v_vvvo(b,a,d,i) * t2(k,j,c,d) & + !X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) + + !$OMP DO collapse(3) + do i = 1, nO + do a = 1, nV + do b = 1, nV + do d = 1, nV + X_vvvo(d,b,a,i) = v_vvvo(b,a,d,i) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(3) + do j = 1, nO + do k = 1, nO + do c = 1, nV + do d = 1, nV + T_vvoo(d,c,k,j) = t2(k,j,c,d) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !v_vooo(c,j,k,l) * t2(i,l,a,b) & + !X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) & + + !$OMP DO collapse(3) + do k = 1, nO + do j = 1, nO + do c = 1, nV + do l = 1, nO + X_ovoo(l,c,j,k) = v_vooo(c,j,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(3) + do i = 1, nO + do b = 1, nV + do a = 1, nV + do l = 1, nO + T_ovvo(l,a,b,i) = t2(i,l,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !v_vvoo(b,c,j,k) * t1(i,a) & + !X_vvoo(b,c,k,j) * T1_vo(a,i) & + + !$OMP DO collapse(3) + do j = 1, nO + do k = 1, nO + do c = 1, nV + do b = 1, nV + X_vvoo(b,c,k,j) = v_vvoo(b,c,j,k) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO collapse(1) + do i = 1, nO + do a = 1, nV + T_vo(a,i) = t1(i,a) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(ta) + energy = 0d0 + do c = 1, nV + do b = 1, nV + do a = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + call form_w_abc(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) + call form_w_abc(nO,nV,b,c,a,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_bca) + call form_w_abc(nO,nV,c,a,b,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_cab) + call form_w_abc(nO,nV,c,b,a,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_cba) + + call form_v_abc(nO,nV,a,b,c,T_vo,X_vvoo,W_abc,V_abc) + call form_v_abc(nO,nV,c,b,a,T_vo,X_vvoo,W_cba,V_cba) + !$OMP PARALLEL & + !$OMP SHARED(energy,nO,a,b,c,W_abc,W_cab,W_bca,V_abc,V_cba,f_o,f_v,delta_abc)& + !$OMP PRIVATE(i,j,k,e,delta) & + !$OMP DEFAULT(NONE) + e = 0d0 + !$OMP DO + do i = 1, nO + do j = 1, nO + do k = 1, nO + delta = 1d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) + !energy = energy + (4d0 * W(i,j,k,a,b,c) + W(i,j,k,b,c,a) + W(i,j,k,c,a,b)) * (V(i,j,k,a,b,c) - V(i,j,k,c,b,a)) / (cc_space_f_o(i) + cc_space_f_o(j) + cc_space_f_o(k) - cc_space_f_v(a) - cc_space_f_v(b) - cc_space_f_v(c)) !delta_ooovvv(i,j,k,a,b,c) + e = e + (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k))& + * (V_abc(i,j,k) - V_cba(i,j,k)) * delta + enddo + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + energy = energy + e + !$OMP END CRITICAL + !$OMP END PARALLEL + enddo + enddo + call wall_time(tb) + write(*,'(F12.2,A5,F12.2,A2)') dble(i)/dble(nO)*100d0, '% in ', tb - ta, ' s' + enddo + + energy = energy / 3d0 + + deallocate(W_abc,V_abc,W_cab,V_cba,W_bca,X_vvvo,X_ovoo,T_vvoo,T_ovvo,T_vo) + !deallocate(V,W) +end + + +subroutine form_w_abc(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) + + implicit none + + integer, intent(in) :: nO,nV,a,b,c + !double precision, intent(in) :: t2(nO,nO,nV,nV) + double precision, intent(in) :: T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO) + double precision, intent(in) :: X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO) + double precision, intent(out) :: W_abc(nO,nO,nO) + + integer :: l,i,j,k,d + + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) & + !$OMP PRIVATE(i,j,k,d,l) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do k = 1, nO + do j = 1, nO + do i = 1, nO + W_abc(i,j,k) = 0.d0 + + do d = 1, nV + W_abc(i,j,k) = W_abc(i,j,k) & + + X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) & + + X_vvvo(d,c,a,i) * T_vvoo(d,b,j,k) & + + X_vvvo(d,a,c,k) * T_vvoo(d,b,j,i) & + + X_vvvo(d,b,c,k) * T_vvoo(d,a,i,j) & + + X_vvvo(d,c,b,j) * T_vvoo(d,a,i,k) & + + X_vvvo(d,a,b,j) * T_vvoo(d,c,k,i) + + enddo + + do l = 1, nO + W_abc(i,j,k) = W_abc(i,j,k) & + - T_ovvo(l,a,b,i) * X_ovoo(l,c,j,k) & + - T_ovvo(l,a,c,i) * X_ovoo(l,b,k,j) & ! bc kj + - T_ovvo(l,c,a,k) * X_ovoo(l,b,i,j) & ! prev ac ik + - T_ovvo(l,c,b,k) * X_ovoo(l,a,j,i) & ! prev ab ij + - T_ovvo(l,b,c,j) * X_ovoo(l,a,k,i) & ! prev bc kj + - T_ovvo(l,b,a,j) * X_ovoo(l,c,i,k) ! prev ac ik + enddo + + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + +end + + +! V_abc + +subroutine form_v_abc(nO,nV,a,b,c,T_vo,X_vvoo,W,V) + +implicit none + + integer, intent(in) :: nO,nV,a,b,c + !double precision, intent(in) :: t1(nO,nV) + double precision, intent(in) :: T_vo(nV,nO) + double precision, intent(in) :: X_vvoo(nV,nV,nO,nO) + double precision, intent(in) :: W(nO,nO,nO) + double precision, intent(out) :: V(nO,nO,nO) + + integer :: i,j,k + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,a,b,c,T_vo,X_vvoo,W,V) & + !$OMP PRIVATE(i,j,k) & + !$OMP DEFAULT(NONE) + !$OMP DO collapse(2) + do k = 1, nO + do j = 1, nO + do i = 1, nO + !V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) & + V(i,j,k) = W(i,j,k) & + + X_vvoo(b,c,k,j) * T_vo(a,i) & + + X_vvoo(a,c,k,i) * T_vo(b,j) & + + X_vvoo(a,b,j,i) * T_vo(c,k) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + From 85ca8854188289b34010eb431813919df5507aed Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 12 May 2023 19:20:08 +0200 Subject: [PATCH 093/337] Fixing github actions --- .github/workflows/compilation.yml | 1 + .github/workflows/configuration.yml | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/compilation.yml b/.github/workflows/compilation.yml index 42710ce5..85daf7db 100644 --- a/.github/workflows/compilation.yml +++ b/.github/workflows/compilation.yml @@ -48,6 +48,7 @@ jobs: ./configure -i docopt || : ./configure -i resultsFile || : ./configure -i bats || : + ./configure -i trexio-nohdf5 || : ./configure -c ./config/gfortran_debug.cfg - name: Compilation run: | diff --git a/.github/workflows/configuration.yml b/.github/workflows/configuration.yml index 14019e5d..ba37f5dd 100644 --- a/.github/workflows/configuration.yml +++ b/.github/workflows/configuration.yml @@ -22,7 +22,7 @@ jobs: - uses: actions/checkout@v3 - name: Install dependencies run: | - sudo apt install gfortran gcc liblapack-dev libblas-dev wget python3 make m4 pkg-config + sudo apt install gfortran gcc liblapack-dev libblas-dev wget python3 make m4 pkg-config hdf5 - name: zlib run: | ./configure -i zlib || echo OK @@ -50,6 +50,12 @@ jobs: - name: bats run: | ./configure -i bats || echo OK + - name: trexio-nohdf5 + run: | + ./configure -i trexio-nohdf5 || echo OK + - name: trexio + run: | + ./configure -i trexio || echo OK - name: Final check run: | ./configure -c config/gfortran_debug.cfg From 6d1cf74e09c10ecd2ba19f9952f2f466a7ad4874 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 12 May 2023 19:58:08 +0200 Subject: [PATCH 094/337] Added NEED in trexio --- src/trexio/NEED | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 src/trexio/NEED diff --git a/src/trexio/NEED b/src/trexio/NEED new file mode 100644 index 00000000..625463a2 --- /dev/null +++ b/src/trexio/NEED @@ -0,0 +1,8 @@ +ezfio_files +determinants +mo_one_e_ints +mo_two_e_ints +ao_two_e_ints +ao_one_e_ints +two_body_rdm +hartree_fock From b8804f058a2872976af4712248609fab5bf6edaf Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 12 May 2023 21:38:01 +0200 Subject: [PATCH 095/337] Moved qp_import_trexio.py --- {src/trexio => scripts}/qp_import_trexio.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) rename {src/trexio => scripts}/qp_import_trexio.py (98%) diff --git a/src/trexio/qp_import_trexio.py b/scripts/qp_import_trexio.py similarity index 98% rename from src/trexio/qp_import_trexio.py rename to scripts/qp_import_trexio.py index de8d1269..d8a19160 100755 --- a/src/trexio/qp_import_trexio.py +++ b/scripts/qp_import_trexio.py @@ -13,12 +13,17 @@ Options: import sys import os -import trexio import numpy as np from functools import reduce from ezfio import ezfio from docopt import docopt +try: + import trexio +except ImportError: + print("Error: trexio python module is not found. Try python3 -m pip install trexio") + sys.exit(1) + try: QP_ROOT = os.environ["QP_ROOT"] From a45fe53a9c61dce28602ed086b1d518be00e05aa Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 13 May 2023 09:15:34 +0200 Subject: [PATCH 096/337] compilation on lcpq is fine --- src/cosgtos_ao_int/NEED | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 src/cosgtos_ao_int/NEED diff --git a/src/cosgtos_ao_int/NEED b/src/cosgtos_ao_int/NEED new file mode 100644 index 00000000..932f88a3 --- /dev/null +++ b/src/cosgtos_ao_int/NEED @@ -0,0 +1,2 @@ +ezfio_files +ao_basis From 8cefe6eb441ee16e2710064ed38d11779dbc7d26 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 13 May 2023 12:18:36 +0200 Subject: [PATCH 097/337] save tc coef --- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 20 ++++++++++++++++++++ src/tc_scf/rotate_tcscf_orbitals.irp.f | 3 ++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index 3140d229..a83d6cd0 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -298,6 +298,26 @@ end print*,' = ', s2_eigvec_tc_bi_orth(i) enddo + double precision, allocatable :: buffer(:,:) + allocate(buffer(N_det,N_states)) + do k = 1, N_states + do i = 1, N_det + psi_l_coef_bi_ortho(i,k) = leigvec_tc_bi_orth(i,k) + buffer(i,k) = leigvec_tc_bi_orth(i,k) + enddo + enddo + TOUCH psi_l_coef_bi_ortho + call ezfio_set_tc_bi_ortho_psi_l_coef_bi_ortho(buffer) + do k = 1, N_states + do i = 1, N_det + psi_r_coef_bi_ortho(i,k) = reigvec_tc_bi_orth(i,k) + buffer(i,k) = reigvec_tc_bi_orth(i,k) + enddo + enddo + TOUCH psi_r_coef_bi_ortho + call ezfio_set_tc_bi_ortho_psi_r_coef_bi_ortho(buffer) + deallocate(buffer) + END_PROVIDER diff --git a/src/tc_scf/rotate_tcscf_orbitals.irp.f b/src/tc_scf/rotate_tcscf_orbitals.irp.f index 31999c18..2567faf0 100644 --- a/src/tc_scf/rotate_tcscf_orbitals.irp.f +++ b/src/tc_scf/rotate_tcscf_orbitals.irp.f @@ -17,7 +17,8 @@ program rotate_tcscf_orbitals bi_ortho = .True. touch bi_ortho - call maximize_overlap() + call minimize_tc_orb_angles() + !call maximize_overlap() end From 6289508c1e4e1ae7abce6388cf42fa12b5d28752 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 13 May 2023 13:32:52 +0200 Subject: [PATCH 098/337] Swapped indices in CCSD(T) --- scripts/qp_import_trexio.py | 23 +++--- src/ccsd/ccsd_space_orb_sub.irp.f | 2 +- src/ccsd/ccsd_t_space_orb_abc.irp.f | 108 +++++++++++++--------------- src/utils/linear_algebra.irp.f | 30 ++++---- 4 files changed, 79 insertions(+), 84 deletions(-) diff --git a/scripts/qp_import_trexio.py b/scripts/qp_import_trexio.py index d8a19160..eb19e16b 100755 --- a/scripts/qp_import_trexio.py +++ b/scripts/qp_import_trexio.py @@ -95,14 +95,15 @@ def write_ezfio(trexio_filename, filename): p = re.compile(r'(\d*)$') label = [p.sub("", x).capitalize() for x in label] ezfio.set_nuclei_nucl_label(label) + print("OK") else: ezfio.set_nuclei_nucl_num(1) ezfio.set_nuclei_nucl_charge([0.]) ezfio.set_nuclei_nucl_coord([0.,0.,0.]) ezfio.set_nuclei_nucl_label(["X"]) + print("None") - print("OK") print("Electrons\t...\t", end=' ') @@ -110,12 +111,12 @@ def write_ezfio(trexio_filename, filename): try: num_beta = trexio.read_electron_dn_num(trexio_file) except: - num_beta = sum(charge)//2 + num_beta = int(sum(charge))//2 try: num_alpha = trexio.read_electron_up_num(trexio_file) except: - num_alpha = sum(charge) - num_beta + num_alpha = int(sum(charge)) - num_beta if num_alpha == 0: print("\n\nError: There are zero electrons in the TREXIO file.\n\n") @@ -123,7 +124,7 @@ def write_ezfio(trexio_filename, filename): ezfio.set_electrons_elec_alpha_num(num_alpha) ezfio.set_electrons_elec_beta_num(num_beta) - print("OK") + print(f"{num_alpha} {num_beta}") print("Basis\t\t...\t", end=' ') @@ -263,7 +264,10 @@ def write_ezfio(trexio_filename, filename): ezfio.set_ao_basis_ao_expo(expo) ezfio.set_ao_basis_ao_basis("Read from TREXIO") - print("OK") + print("OK") + + else: + print("None") # _ @@ -308,10 +312,10 @@ def write_ezfio(trexio_filename, filename): for i in range(num_beta): mo_occ[i] += 1. ezfio.set_mo_basis_mo_occ(mo_occ) + print("OK") except: - pass + print("None") - print("OK") print("Pseudos\t\t...\t", end=' ') @@ -391,9 +395,10 @@ def write_ezfio(trexio_filename, filename): ezfio.set_pseudo_pseudo_n_kl(pseudo_n_kl) ezfio.set_pseudo_pseudo_v_kl(pseudo_v_kl) ezfio.set_pseudo_pseudo_dz_kl(pseudo_dz_kl) + print("OK") - - print("OK") + else: + print("None") diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index b63375cf..acd14034 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -169,7 +169,7 @@ subroutine run_ccsd_space_orb ! New print*,'Computing (T) correction...' call wall_time(ta) - call ccsd_par_t_space_v2(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & + call ccsd_par_t_space_v3(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) call wall_time(tb) print*,'Time: ',tb-ta, ' s' diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index 3b762a06..acc2aaa9 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -15,8 +15,8 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) double precision, allocatable :: W_abc(:,:,:), V_abc(:,:,:) double precision, allocatable :: W_cab(:,:,:), W_cba(:,:,:) double precision, allocatable :: W_bca(:,:,:), V_cba(:,:,:) - double precision, allocatable :: X_vvvo(:,:,:,:), X_ovoo(:,:,:,:), X_vvoo(:,:,:,:) - double precision, allocatable :: T_vvoo(:,:,:,:), T_ovvo(:,:,:,:), T_vo(:,:) + double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) + double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) integer :: i,j,k,l,a,b,c,d double precision :: e,ta,tb, delta, delta_abc @@ -24,25 +24,25 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !allocate(V(nV,nV,nV,nO,nO,nO)) allocate(W_abc(nO,nO,nO), V_abc(nO,nO,nO), W_cab(nO,nO,nO)) allocate(W_bca(nO,nO,nO), V_cba(nO,nO,nO), W_cba(nO,nO,nO)) - allocate(X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO), X_vvoo(nV,nV,nO,nO)) - allocate(T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO), T_vo(nV,nO)) + allocate(X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV), X_oovv(nO,nO,nV,nV)) + allocate(T_voov(nV,nO,nO,nV),T_oovv(nO,nO,nV,nV)) ! Temporary arrays !$OMP PARALLEL & - !$OMP SHARED(nO,nV,T_vvoo,T_ovvo,T_vo,X_vvvo,X_ovoo,X_vvoo, & + !$OMP SHARED(nO,nV,T_voov,T_oovv,X_vovv,X_ooov,X_oovv, & !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) & !$OMP PRIVATE(a,b,c,d,i,j,k,l) & !$OMP DEFAULT(NONE) !v_vvvo(b,a,d,i) * t2(k,j,c,d) & - !X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) + !X_vovv(d,i,b,a,i) * T_voov(d,j,c,k) !$OMP DO collapse(3) do i = 1, nO do a = 1, nV do b = 1, nV do d = 1, nV - X_vvvo(d,b,a,i) = v_vvvo(b,a,d,i) + X_vovv(d,i,b,a) = v_vvvo(b,a,d,i) enddo enddo enddo @@ -54,7 +54,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) do k = 1, nO do c = 1, nV do d = 1, nV - T_vvoo(d,c,k,j) = t2(k,j,c,d) + T_voov(d,k,j,c) = t2(k,j,c,d) enddo enddo enddo @@ -62,14 +62,14 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !$OMP END DO nowait !v_vooo(c,j,k,l) * t2(i,l,a,b) & - !X_ovoo(l,c,j,k) * T_ovvo(l,a,b,i) & + !X_ooov(l,j,k,c) * T_oovv(l,i,a,b) & !$OMP DO collapse(3) - do k = 1, nO - do j = 1, nO - do c = 1, nV + do c = 1, nV + do k = 1, nO + do j = 1, nO do l = 1, nO - X_ovoo(l,c,j,k) = v_vooo(c,j,k,l) + X_ooov(l,j,k,c) = v_vooo(c,j,k,l) enddo enddo enddo @@ -81,35 +81,27 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) do b = 1, nV do a = 1, nV do l = 1, nO - T_ovvo(l,a,b,i) = t2(i,l,a,b) + T_oovv(l,i,a,b) = t2(i,l,a,b) enddo enddo enddo enddo !$OMP END DO nowait - !v_vvoo(b,c,j,k) * t1(i,a) & - !X_vvoo(b,c,k,j) * T1_vo(a,i) & + !X_oovv(j,k,b,c) * T1_vo(a,i) & !$OMP DO collapse(3) - do j = 1, nO - do k = 1, nO - do c = 1, nV - do b = 1, nV - X_vvoo(b,c,k,j) = v_vvoo(b,c,j,k) + do c = 1, nV + do b = 1, nV + do j = 1, nO + do k = 1, nO + X_oovv(j,k,b,c) = v_vvoo(b,c,j,k) enddo enddo enddo enddo !$OMP END DO nowait - !$OMP DO collapse(1) - do i = 1, nO - do a = 1, nV - T_vo(a,i) = t1(i,a) - enddo - enddo - !$OMP END DO !$OMP END PARALLEL call wall_time(ta) @@ -118,13 +110,13 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) do b = 1, nV do a = 1, nV delta_abc = f_v(a) + f_v(b) + f_v(c) - call form_w_abc(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) - call form_w_abc(nO,nV,b,c,a,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_bca) - call form_w_abc(nO,nV,c,a,b,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_cab) - call form_w_abc(nO,nV,c,b,a,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_cba) + call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) + call form_w_abc(nO,nV,b,c,a,T_voov,T_oovv,X_vovv,X_ooov,W_bca) + call form_w_abc(nO,nV,c,a,b,T_voov,T_oovv,X_vovv,X_ooov,W_cab) + call form_w_abc(nO,nV,c,b,a,T_voov,T_oovv,X_vovv,X_ooov,W_cba) - call form_v_abc(nO,nV,a,b,c,T_vo,X_vvoo,W_abc,V_abc) - call form_v_abc(nO,nV,c,b,a,T_vo,X_vvoo,W_cba,V_cba) + call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc) + call form_v_abc(nO,nV,c,b,a,t1,X_oovv,W_cba,V_cba) !$OMP PARALLEL & !$OMP SHARED(energy,nO,a,b,c,W_abc,W_cab,W_bca,V_abc,V_cba,f_o,f_v,delta_abc)& !$OMP PRIVATE(i,j,k,e,delta) & @@ -154,26 +146,26 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) energy = energy / 3d0 - deallocate(W_abc,V_abc,W_cab,V_cba,W_bca,X_vvvo,X_ovoo,T_vvoo,T_ovvo,T_vo) + deallocate(W_abc,V_abc,W_cab,V_cba,W_bca,X_vovv,X_ooov,T_voov,T_oovv) !deallocate(V,W) end -subroutine form_w_abc(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) +subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) implicit none integer, intent(in) :: nO,nV,a,b,c !double precision, intent(in) :: t2(nO,nO,nV,nV) - double precision, intent(in) :: T_vvoo(nV,nV,nO,nO), T_ovvo(nO,nV,nV,nO) - double precision, intent(in) :: X_vvvo(nV,nV,nV,nO), X_ovoo(nO,nV,nO,nO) + double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) + double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) double precision, intent(out) :: W_abc(nO,nO,nO) integer :: l,i,j,k,d !$OMP PARALLEL & - !$OMP SHARED(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) & + !$OMP SHARED(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) & !$OMP PRIVATE(i,j,k,d,l) & !$OMP DEFAULT(NONE) @@ -185,23 +177,23 @@ subroutine form_w_abc(nO,nV,a,b,c,T_vvoo,T_ovvo,X_vvvo,X_ovoo,W_abc) do d = 1, nV W_abc(i,j,k) = W_abc(i,j,k) & - + X_vvvo(d,b,a,i) * T_vvoo(d,c,k,j) & - + X_vvvo(d,c,a,i) * T_vvoo(d,b,j,k) & - + X_vvvo(d,a,c,k) * T_vvoo(d,b,j,i) & - + X_vvvo(d,b,c,k) * T_vvoo(d,a,i,j) & - + X_vvvo(d,c,b,j) * T_vvoo(d,a,i,k) & - + X_vvvo(d,a,b,j) * T_vvoo(d,c,k,i) + + X_vovv(d,i,b,a) * T_voov(d,k,j,c) & + + X_vovv(d,i,c,a) * T_voov(d,j,k,b) & + + X_vovv(d,k,a,c) * T_voov(d,j,i,b) & + + X_vovv(d,k,b,c) * T_voov(d,i,j,a) & + + X_vovv(d,j,c,b) * T_voov(d,i,k,a) & + + X_vovv(d,j,a,b) * T_voov(d,k,i,c) enddo do l = 1, nO W_abc(i,j,k) = W_abc(i,j,k) & - - T_ovvo(l,a,b,i) * X_ovoo(l,c,j,k) & - - T_ovvo(l,a,c,i) * X_ovoo(l,b,k,j) & ! bc kj - - T_ovvo(l,c,a,k) * X_ovoo(l,b,i,j) & ! prev ac ik - - T_ovvo(l,c,b,k) * X_ovoo(l,a,j,i) & ! prev ab ij - - T_ovvo(l,b,c,j) * X_ovoo(l,a,k,i) & ! prev bc kj - - T_ovvo(l,b,a,j) * X_ovoo(l,c,i,k) ! prev ac ik + - T_oovv(l,i,a,b) * X_ooov(l,j,k,c) & + - T_oovv(l,i,a,c) * X_ooov(l,k,j,b) & ! bc kj + - T_oovv(l,k,c,a) * X_ooov(l,i,j,b) & ! prev ac ik + - T_oovv(l,k,c,b) * X_ooov(l,j,i,a) & ! prev ab ij + - T_oovv(l,j,b,c) * X_ooov(l,k,i,a) & ! prev bc kj + - T_oovv(l,j,b,a) * X_ooov(l,i,k,c) ! prev ac ik enddo enddo @@ -216,21 +208,21 @@ end ! V_abc -subroutine form_v_abc(nO,nV,a,b,c,T_vo,X_vvoo,W,V) +subroutine form_v_abc(nO,nV,a,b,c,T_ov,X_oovv,W,V) implicit none integer, intent(in) :: nO,nV,a,b,c !double precision, intent(in) :: t1(nO,nV) - double precision, intent(in) :: T_vo(nV,nO) - double precision, intent(in) :: X_vvoo(nV,nV,nO,nO) + double precision, intent(in) :: T_ov(nO,nV) + double precision, intent(in) :: X_oovv(nO,nO,nV,nV) double precision, intent(in) :: W(nO,nO,nO) double precision, intent(out) :: V(nO,nO,nO) integer :: i,j,k !$OMP PARALLEL & - !$OMP SHARED(nO,nV,a,b,c,T_vo,X_vvoo,W,V) & + !$OMP SHARED(nO,nV,a,b,c,T_ov,X_oovv,W,V) & !$OMP PRIVATE(i,j,k) & !$OMP DEFAULT(NONE) !$OMP DO collapse(2) @@ -239,9 +231,9 @@ implicit none do i = 1, nO !V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) & V(i,j,k) = W(i,j,k) & - + X_vvoo(b,c,k,j) * T_vo(a,i) & - + X_vvoo(a,c,k,i) * T_vo(b,j) & - + X_vvoo(a,b,j,i) * T_vo(c,k) + + X_oovv(j,k,b,c) * T_ov(i,a) & + + X_oovv(i,k,a,c) * T_ov(j,b) & + + X_oovv(i,j,a,b) * T_ov(k,c) enddo enddo enddo diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index 3b43d607..69873bc0 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1823,41 +1823,39 @@ subroutine pivoted_cholesky( A, rank, tol, ndim, U) ! U is allocated inside this subroutine ! rank is the number of Cholesky vectors depending on tol ! -integer :: ndim -integer, intent(inout) :: rank -double precision, dimension(ndim, ndim), intent(inout) :: A -double precision, dimension(ndim, rank), intent(out) :: U -double precision, intent(in) :: tol +integer :: ndim +integer, intent(inout) :: rank +double precision, intent(inout) :: A(ndim, ndim) +double precision, intent(out) :: U(ndim, rank) +double precision, intent(in) :: tol integer, dimension(:), allocatable :: piv double precision, dimension(:), allocatable :: work character, parameter :: uplo = "U" -integer :: N, LDA +integer :: LDA integer :: info integer :: k, l, rank0 -external :: dpstrf rank0 = rank -N = size(A, dim=1) -LDA = N -allocate(piv(N)) -allocate(work(2*N)) -call dpstrf(uplo, N, A, LDA, piv, rank, tol, work, info) +LDA = ndim +allocate(piv(ndim)) +allocate(work(2*ndim)) +call dpstrf(uplo, ndim, A, LDA, piv, rank, tol, work, info) if (rank > rank0) then print *, 'Bug: rank > rank0 in pivoted cholesky. Increase rank before calling' stop end if -do k = 1, N - A(k+1:, k) = 0.00D+0 +do k = 1, ndim + A(k+1:ndim, k) = 0.00D+0 end do ! TODO: It should be possible to use only one vector of size (1:rank) as a buffer ! to do the swapping in-place U(:,:) = 0.00D+0 -do k = 1, N +do k = 1, ndim l = piv(k) - U(l, :) = A(1:rank, k) + U(l, 1:rank) = A(1:rank, k) end do end subroutine pivoted_cholesky From ca5857ac3630a452199bb25b29eed04e8674e6b3 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 13 May 2023 15:34:16 +0200 Subject: [PATCH 099/337] Added dgemm in ccsd_t_space_orb_abc.irp.f --- src/ccsd/ccsd_t_space_orb_abc.irp.f | 92 +++++++++++++++++++++++++---- 1 file changed, 79 insertions(+), 13 deletions(-) diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index acc2aaa9..e960d47d 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -162,7 +162,86 @@ subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) double precision, intent(out) :: W_abc(nO,nO,nO) integer :: l,i,j,k,d + double precision, allocatable, dimension(:,:,:) :: W_ikj, X + allocate(W_ikj(nO,nO,nO)) + allocate(X(nV,nO,nO)) + + W_abc = 0.d0 + W_ikj = 0.d0 + +! X_vovv(d,i,c,a) * T_voov(d,j,k,b) : i jk + call dgemm('T','N', nO, nO*nO, nV, 1.d0, & + X_vovv(1,1,c,a), nV, T_voov(1,1,1,b), nV, 0.d0, W_abc, nO) + +! T_voov(d,i,j,a) * X_vovv(d,k,b,c) : ij k + call dgemm('T','N', nO*nO, nO, nV, 1.d0, & + T_voov(1,1,1,a), nV, X_vovv(1,1,b,c), nV, 1.d0, W_abc, nO*nO) + +! T_voov(d,k,i,c) * X_vovv(d,j,a,b) : ki j + !$OMP PARALLEL DO COLLAPSE(2) PRIVATE(i,k,d) + do k=1,nO + do i=1,nO + do d=1,nV + X(d,i,k) = T_voov(d,k,i,c) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm('T','N', nO*nO, nO, nV, 1.d0, & + X(1,1,1), nV, X_vovv(1,1,a,b), nV, 0.d0, W_ikj, nO*nO) + +! X_vovv(d,k,a,c) * T_voov(d,j,i,b) : k ji + !$OMP PARALLEL DO COLLAPSE(2) PRIVATE(i,k,d) + do j=1,nO + do i=1,nO + do d=1,nV + X(d,i,j) = T_voov(d,j,i,b) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm('T','N', nO*nO, nO, nV, 1.d0, & + X(1,1,1), nV, X_vovv(1,1,a,c), nV, 1.d0, W_abc, nO*nO) + +! T_voov(d,i,k,a) * X_vovv(d,j,c,b) : ik j + call dgemm('T','N', nO*nO, nO, nV, 1.d0, & + T_voov(1,1,1,a), nV, X_vovv(1,1,c,b), nV, 1.d0, W_ikj, nO*nO) + +! X_vovv(d,i,b,a) * T_voov(d,k,j,c) : i kj + !$OMP PARALLEL DO COLLAPSE(2) PRIVATE(i,k,d) + do k=1,nO + do j=1,nO + do d=1,nV + X(d,j,k) = T_voov(d,k,j,c) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm('T','N', nO, nO*nO, nV, 1.d0, & + X_vovv(1,1,b,a), nV, X(1,1,1), nV, 1.d0, W_abc, nO) + + + +! - T_oovv(l,i,a,b) * X_ooov(l,j,k,c) : i jk +! - T_oovv(l,i,a,c) * X_ooov(l,k,j,b) : i kj +! - T_oovv(l,k,c,a) * X_ooov(l,i,j,b) : k ij +! - T_oovv(l,k,c,b) * X_ooov(l,j,i,a) : k ji +! - T_oovv(l,j,b,c) * X_ooov(l,k,i,a) : j ki +! - T_oovv(l,j,b,a) * X_ooov(l,i,k,c) : j ik + + !$OMP PARALLEL DO COLLAPSE(2) PRIVATE(i,j,k) + do k=1,nO + do j=1,nO + do i=1,nO + W_abc(i,j,k) = W_abc(i,j,k) + W_ikj(i,k,j) + enddo + enddo + enddo + !$OMP END PARALLEL DO !$OMP PARALLEL & !$OMP SHARED(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) & @@ -173,18 +252,6 @@ subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) do k = 1, nO do j = 1, nO do i = 1, nO - W_abc(i,j,k) = 0.d0 - - do d = 1, nV - W_abc(i,j,k) = W_abc(i,j,k) & - + X_vovv(d,i,b,a) * T_voov(d,k,j,c) & - + X_vovv(d,i,c,a) * T_voov(d,j,k,b) & - + X_vovv(d,k,a,c) * T_voov(d,j,i,b) & - + X_vovv(d,k,b,c) * T_voov(d,i,j,a) & - + X_vovv(d,j,c,b) * T_voov(d,i,k,a) & - + X_vovv(d,j,a,b) * T_voov(d,k,i,c) - - enddo do l = 1, nO W_abc(i,j,k) = W_abc(i,j,k) & @@ -202,7 +269,6 @@ subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) !$OMP END DO !$OMP END PARALLEL - end From 1c0141d9a2be1b8025c76a178c81559b63432121 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 13 May 2023 21:25:49 +0200 Subject: [PATCH 100/337] Full dgemm in ccsd_t_space_orb_abc.irp.f --- src/ccsd/ccsd_t_space_orb_abc.irp.f | 140 ++++++++++++---------------- 1 file changed, 62 insertions(+), 78 deletions(-) diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index e960d47d..c5c15fb3 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -162,78 +162,97 @@ subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) double precision, intent(out) :: W_abc(nO,nO,nO) integer :: l,i,j,k,d - double precision, allocatable, dimension(:,:,:) :: W_ikj, X + double precision, allocatable, dimension(:,:,:) :: W_ikj + double precision, allocatable :: X(:,:,:,:) allocate(W_ikj(nO,nO,nO)) - allocate(X(nV,nO,nO)) + allocate(X(nV,nO,nO,2)) - W_abc = 0.d0 - W_ikj = 0.d0 + do k=1,nO + do i=1,nO + do d=1,nV + X(d,i,k,1) = T_voov(d,k,i,c) +! X(d,i,j,2) = T_voov(d,j,i,b) + X(d,i,k,2) = T_voov(d,k,i,b) +! X(d,j,k,1) = T_voov(d,k,j,c) + enddo + enddo + enddo ! X_vovv(d,i,c,a) * T_voov(d,j,k,b) : i jk + call dgemm('T','N', nO, nO*nO, nV, 1.d0, & X_vovv(1,1,c,a), nV, T_voov(1,1,1,b), nV, 0.d0, W_abc, nO) ! T_voov(d,i,j,a) * X_vovv(d,k,b,c) : ij k + call dgemm('T','N', nO*nO, nO, nV, 1.d0, & T_voov(1,1,1,a), nV, X_vovv(1,1,b,c), nV, 1.d0, W_abc, nO*nO) -! T_voov(d,k,i,c) * X_vovv(d,j,a,b) : ki j - !$OMP PARALLEL DO COLLAPSE(2) PRIVATE(i,k,d) - do k=1,nO - do i=1,nO - do d=1,nV - X(d,i,k) = T_voov(d,k,i,c) - enddo - enddo - enddo - !$OMP END PARALLEL DO - - call dgemm('T','N', nO*nO, nO, nV, 1.d0, & - X(1,1,1), nV, X_vovv(1,1,a,b), nV, 0.d0, W_ikj, nO*nO) - ! X_vovv(d,k,a,c) * T_voov(d,j,i,b) : k ji - !$OMP PARALLEL DO COLLAPSE(2) PRIVATE(i,k,d) - do j=1,nO - do i=1,nO - do d=1,nV - X(d,i,j) = T_voov(d,j,i,b) - enddo - enddo - enddo - !$OMP END PARALLEL DO call dgemm('T','N', nO*nO, nO, nV, 1.d0, & - X(1,1,1), nV, X_vovv(1,1,a,c), nV, 1.d0, W_abc, nO*nO) + X(1,1,1,2), nV, X_vovv(1,1,a,c), nV, 1.d0, W_abc, nO*nO) + +! X_vovv(d,i,b,a) * T_voov(d,k,j,c) : i kj + + call dgemm('T','N', nO, nO*nO, nV, 1.d0, & + X_vovv(1,1,b,a), nV, X(1,1,1,1), nV, 1.d0, W_abc, nO) + +! T_voov(d,k,i,c) * X_vovv(d,j,a,b) : ki j + + call dgemm('T','N', nO*nO, nO, nV, 1.d0, & + X(1,1,1,1), nV, X_vovv(1,1,a,b), nV, 0.d0, W_ikj, nO*nO) ! T_voov(d,i,k,a) * X_vovv(d,j,c,b) : ik j call dgemm('T','N', nO*nO, nO, nV, 1.d0, & T_voov(1,1,1,a), nV, X_vovv(1,1,c,b), nV, 1.d0, W_ikj, nO*nO) -! X_vovv(d,i,b,a) * T_voov(d,k,j,c) : i kj - !$OMP PARALLEL DO COLLAPSE(2) PRIVATE(i,k,d) + deallocate(X) + + allocate(X(nO,nO,nO,2)) + do k=1,nO do j=1,nO - do d=1,nV - X(d,j,k) = T_voov(d,k,j,c) + do l=1,nO + X(l,j,k,1) = X_ooov(l,k,j,b) +! X(l,i,j,2) = X_ooov(l,j,i,a) + X(l,j,k,2) = X_ooov(l,k,j,a) +! X(l,i,k,2) = X_ooov(l,k,i,a) enddo enddo enddo - !$OMP END PARALLEL DO - - call dgemm('T','N', nO, nO*nO, nV, 1.d0, & - X_vovv(1,1,b,a), nV, X(1,1,1), nV, 1.d0, W_abc, nO) - ! - T_oovv(l,i,a,b) * X_ooov(l,j,k,c) : i jk -! - T_oovv(l,i,a,c) * X_ooov(l,k,j,b) : i kj -! - T_oovv(l,k,c,a) * X_ooov(l,i,j,b) : k ij -! - T_oovv(l,k,c,b) * X_ooov(l,j,i,a) : k ji -! - T_oovv(l,j,b,c) * X_ooov(l,k,i,a) : j ki -! - T_oovv(l,j,b,a) * X_ooov(l,i,k,c) : j ik + call dgemm('T','N', nO, nO*nO, nO, -1.d0, & + T_oovv(1,1,a,b), nO, X_ooov(1,1,1,c), nO, 1.d0, W_abc, nO) + +! - T_oovv(l,i,a,c) * X_ooov(l,k,j,b) : i kj + + call dgemm('T','N', nO, nO*nO, nO, -1.d0, & + T_oovv(1,1,a,c), nO, X(1,1,1,1), nO, 1.d0, W_abc, nO) + +! - X_ooov(l,i,j,b) * T_oovv(l,k,c,a) : ij k + + call dgemm('T','N', nO*nO, nO, nO, -1.d0, & + X_ooov(1,1,1,b), nO, T_oovv(1,1,c,a), nO, 1.d0, W_abc, nO*nO) + +! - X_ooov(l,j,i,a) * T_oovv(l,k,c,b) : ji k + + call dgemm('T','N', nO*nO, nO, nO, -1.d0, & + X(1,1,1,2), nO, T_oovv(1,1,c,b), nO, 1.d0, W_abc, nO*nO) + +! - X_ooov(l,k,i,a) * T_oovv(l,j,b,c) : ki j + + call dgemm('T','N', nO*nO, nO, nO, -1.d0, & + X(1,1,1,2), nO, T_oovv(1,1,b,c), nO, 1.d0, W_ikj, nO*nO) + +! - X_ooov(l,i,k,c) * T_oovv(l,j,b,a) : ik j + + call dgemm('T','N', nO*nO, nO, nO, -1.d0, & + X_ooov(1,1,1,c), nO, T_oovv(1,1,b,a), nO, 1.d0, W_ikj, nO*nO) - !$OMP PARALLEL DO COLLAPSE(2) PRIVATE(i,j,k) do k=1,nO do j=1,nO do i=1,nO @@ -241,33 +260,6 @@ subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) enddo enddo enddo - !$OMP END PARALLEL DO - - !$OMP PARALLEL & - !$OMP SHARED(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) & - !$OMP PRIVATE(i,j,k,d,l) & - !$OMP DEFAULT(NONE) - - !$OMP DO collapse(3) - do k = 1, nO - do j = 1, nO - do i = 1, nO - - do l = 1, nO - W_abc(i,j,k) = W_abc(i,j,k) & - - T_oovv(l,i,a,b) * X_ooov(l,j,k,c) & - - T_oovv(l,i,a,c) * X_ooov(l,k,j,b) & ! bc kj - - T_oovv(l,k,c,a) * X_ooov(l,i,j,b) & ! prev ac ik - - T_oovv(l,k,c,b) * X_ooov(l,j,i,a) & ! prev ab ij - - T_oovv(l,j,b,c) * X_ooov(l,k,i,a) & ! prev bc kj - - T_oovv(l,j,b,a) * X_ooov(l,i,k,c) ! prev ac ik - enddo - - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL end @@ -287,15 +279,9 @@ implicit none integer :: i,j,k - !$OMP PARALLEL & - !$OMP SHARED(nO,nV,a,b,c,T_ov,X_oovv,W,V) & - !$OMP PRIVATE(i,j,k) & - !$OMP DEFAULT(NONE) - !$OMP DO collapse(2) do k = 1, nO do j = 1, nO do i = 1, nO - !V(i,j,k,a,b,c) = V(i,j,k,a,b,c) + W(i,j,k,a,b,c) & V(i,j,k) = W(i,j,k) & + X_oovv(j,k,b,c) * T_ov(i,a) & + X_oovv(i,k,a,c) * T_ov(j,b) & @@ -303,8 +289,6 @@ implicit none enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL end From 19f2ede59c95cee5e34a49d960b894c5765805fe Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 13 May 2023 21:43:01 +0200 Subject: [PATCH 101/337] check TC energy after rotations --- src/tc_scf/routines_rotates.irp.f | 31 ++++++++++++++++++++++++++++++- src/tc_scf/tc_scf.irp.f | 2 +- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/tc_scf/routines_rotates.irp.f b/src/tc_scf/routines_rotates.irp.f index 8c1071b2..4ac66b5f 100644 --- a/src/tc_scf/routines_rotates.irp.f +++ b/src/tc_scf/routines_rotates.irp.f @@ -93,14 +93,22 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) integer :: i, j, k, n_degen_list, m, n, n_degen, ilast, ifirst double precision :: max_angle, norm + double precision :: E_old, E_new, E_thr integer, allocatable :: list_degen(:,:) double precision, allocatable :: new_angles(:) + double precision, allocatable :: mo_r_coef_old(:,:), mo_l_coef_old(:,:) double precision, allocatable :: mo_r_coef_good(:,:), mo_l_coef_good(:,:) double precision, allocatable :: mo_r_coef_new(:,:) - double precision, allocatable :: fock_diag(:),s_mat(:,:) + double precision, allocatable :: fock_diag(:), s_mat(:,:) double precision, allocatable :: stmp(:,:), T(:,:), Snew(:,:), smat2(:,:) double precision, allocatable :: mo_l_coef_tmp(:,:), mo_r_coef_tmp(:,:), mo_l_coef_new(:,:) + E_thr = 1d-8 + E_old = TC_HF_energy + allocate(mo_l_coef_old(ao_num,mo_num), mo_r_coef_old(ao_num,mo_num)) + mo_r_coef_old = mo_r_coef + mo_l_coef_old = mo_l_coef + good_angles = .False. allocate(mo_l_coef_good(ao_num, mo_num), mo_r_coef_good(ao_num,mo_num)) @@ -253,11 +261,32 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) TOUCH mo_l_coef mo_r_coef + ! check if TC energy has changed + E_new = TC_HF_energy + if(dabs(E_new - E_old) .gt. E_thr) then + mo_r_coef = mo_r_coef_old + mo_l_coef = mo_l_coef_old + deallocate(mo_l_coef_old, mo_r_coef_old) + call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) + call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) + TOUCH mo_l_coef mo_r_coef + print*, ' TC energy bef rotation = ', E_old + print*, ' TC energy aft rotation = ', E_new + print*, ' the rotation is refused' + stop + endif + allocate(new_angles(mo_num)) new_angles(1:mo_num) = dabs(angle_left_right(1:mo_num)) max_angle = maxval(new_angles) good_angles = max_angle.lt.45.d0 print *, ' max_angle = ', max_angle + deallocate(new_angles) + + + deallocate(mo_l_coef_old, mo_r_coef_old) + deallocate(mo_l_coef_good, mo_r_coef_good) + deallocate(mo_r_coef_new) end diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f index 2485ee8b..88ddd26c 100644 --- a/src/tc_scf/tc_scf.irp.f +++ b/src/tc_scf/tc_scf.irp.f @@ -53,7 +53,7 @@ program tc_scf stop endif - !call minimize_tc_orb_angles() + call minimize_tc_orb_angles() endif From 2ff4e61c9e283890d5c1819c034b788487f08405 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 13 May 2023 21:48:04 +0200 Subject: [PATCH 102/337] Better parallelism in (T) --- src/ccsd/ccsd_t_space_orb_abc.irp.f | 61 ++++++++++++++--------------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index c5c15fb3..8b6db915 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -14,19 +14,17 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) double precision, allocatable :: V(:,:,:,:,:,:) double precision, allocatable :: W_abc(:,:,:), V_abc(:,:,:) double precision, allocatable :: W_cab(:,:,:), W_cba(:,:,:) - double precision, allocatable :: W_bca(:,:,:), V_cba(:,:,:) + double precision, allocatable :: W_bca(:,:,:) double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) integer :: i,j,k,l,a,b,c,d double precision :: e,ta,tb, delta, delta_abc - !allocate(W(nV,nV,nV,nO,nO,nO)) - !allocate(V(nV,nV,nV,nO,nO,nO)) - allocate(W_abc(nO,nO,nO), V_abc(nO,nO,nO), W_cab(nO,nO,nO)) - allocate(W_bca(nO,nO,nO), V_cba(nO,nO,nO), W_cba(nO,nO,nO)) allocate(X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV), X_oovv(nO,nO,nV,nV)) allocate(T_voov(nV,nO,nO,nV),T_oovv(nO,nO,nV,nV)) + call set_multiple_levels_omp(.False.) + ! Temporary arrays !$OMP PARALLEL & !$OMP SHARED(nO,nV,T_voov,T_oovv,X_vovv,X_ooov,X_oovv, & @@ -104,50 +102,48 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !$OMP END PARALLEL - call wall_time(ta) energy = 0d0 + !$OMP PARALLEL & + !$OMP PRIVATE(a,b,c,W_abc,W_cab,W_bca,W_cba,V_abc) & + !$OMP PRIVATE(i,j,k,e,delta,delta_abc) & + !$OMP DEFAULT(SHARED) + allocate(W_abc(nO,nO,nO), W_cab(nO,nO,nO), V_abc(nO,nO,nO), & + W_bca(nO,nO,nO), W_cba(nO,nO,nO) ) + !$OMP DO do c = 1, nV do b = 1, nV do a = 1, nV + e = 0d0 delta_abc = f_v(a) + f_v(b) + f_v(c) call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) + call form_w_abc(nO,nV,c,b,a,T_voov,T_oovv,X_vovv,X_ooov,W_cba) call form_w_abc(nO,nV,b,c,a,T_voov,T_oovv,X_vovv,X_ooov,W_bca) call form_w_abc(nO,nV,c,a,b,T_voov,T_oovv,X_vovv,X_ooov,W_cab) - call form_w_abc(nO,nV,c,b,a,T_voov,T_oovv,X_vovv,X_ooov,W_cba) - call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc) - call form_v_abc(nO,nV,c,b,a,t1,X_oovv,W_cba,V_cba) - !$OMP PARALLEL & - !$OMP SHARED(energy,nO,a,b,c,W_abc,W_cab,W_bca,V_abc,V_cba,f_o,f_v,delta_abc)& - !$OMP PRIVATE(i,j,k,e,delta) & - !$OMP DEFAULT(NONE) - e = 0d0 - !$OMP DO + call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba) do i = 1, nO do j = 1, nO do k = 1, nO delta = 1d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) - !energy = energy + (4d0 * W(i,j,k,a,b,c) + W(i,j,k,b,c,a) + W(i,j,k,c,a,b)) * (V(i,j,k,a,b,c) - V(i,j,k,c,b,a)) / (cc_space_f_o(i) + cc_space_f_o(j) + cc_space_f_o(k) - cc_space_f_v(a) - cc_space_f_v(b) - cc_space_f_v(c)) !delta_ooovvv(i,j,k,a,b,c) e = e + (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k))& - * (V_abc(i,j,k) - V_cba(i,j,k)) * delta + * V_abc(i,j,k) * delta enddo enddo enddo - !$OMP END DO NOWAIT !$OMP CRITICAL energy = energy + e !$OMP END CRITICAL - !$OMP END PARALLEL enddo enddo - call wall_time(tb) - write(*,'(F12.2,A5,F12.2,A2)') dble(i)/dble(nO)*100d0, '% in ', tb - ta, ' s' enddo + !$OMP END DO - energy = energy / 3d0 + deallocate(W_abc,V_abc,W_cab,W_bca,W_cba) + !$OMP END PARALLEL - deallocate(W_abc,V_abc,W_cab,V_cba,W_bca,X_vovv,X_ooov,T_voov,T_oovv) - !deallocate(V,W) + energy = energy / 3.d0 + + deallocate(X_vovv,X_ooov,T_voov,T_oovv) end @@ -233,7 +229,7 @@ subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) call dgemm('T','N', nO, nO*nO, nO, -1.d0, & T_oovv(1,1,a,c), nO, X(1,1,1,1), nO, 1.d0, W_abc, nO) -! - X_ooov(l,i,j,b) * T_oovv(l,k,c,a) : ij k +! - X_ooov(l,i,j,b) * T_oovv(l,k,c,a) : ij k call dgemm('T','N', nO*nO, nO, nO, -1.d0, & X_ooov(1,1,1,b), nO, T_oovv(1,1,c,a), nO, 1.d0, W_abc, nO*nO) @@ -261,31 +257,34 @@ subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) enddo enddo + deallocate(X,W_ikj) end ! V_abc -subroutine form_v_abc(nO,nV,a,b,c,T_ov,X_oovv,W,V) +subroutine form_v_abc(nO,nV,a,b,c,T_ov,X_oovv,W_abc,V_abc,W_cba) implicit none integer, intent(in) :: nO,nV,a,b,c - !double precision, intent(in) :: t1(nO,nV) double precision, intent(in) :: T_ov(nO,nV) double precision, intent(in) :: X_oovv(nO,nO,nV,nV) - double precision, intent(in) :: W(nO,nO,nO) - double precision, intent(out) :: V(nO,nO,nO) + double precision, intent(in) :: W_abc(nO,nO,nO), W_cba(nO,nO,nO) + double precision, intent(out) :: V_abc(nO,nO,nO) integer :: i,j,k do k = 1, nO do j = 1, nO do i = 1, nO - V(i,j,k) = W(i,j,k) & + V_abc(i,j,k) = W_abc(i,j,k) - W_cba(i,j,k) & + X_oovv(j,k,b,c) * T_ov(i,a) & + X_oovv(i,k,a,c) * T_ov(j,b) & - + X_oovv(i,j,a,b) * T_ov(k,c) + + X_oovv(i,j,a,b) * T_ov(k,c) & + - X_oovv(j,k,b,a) * T_ov(i,c) & + - X_oovv(i,k,c,a) * T_ov(j,b) & + - X_oovv(i,j,c,b) * T_ov(k,a) enddo enddo enddo From c18bea7e817af0142e2fd76577c9f7d90a39e533 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 13 May 2023 22:23:08 +0200 Subject: [PATCH 103/337] Merged 4 calls --- src/ccsd/ccsd_t_space_orb_abc.irp.f | 109 ++++++++++++++++------------ 1 file changed, 64 insertions(+), 45 deletions(-) diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index 8b6db915..7f334a37 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -115,11 +115,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) do a = 1, nV e = 0d0 delta_abc = f_v(a) + f_v(b) + f_v(c) - call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) - call form_w_abc(nO,nV,c,b,a,T_voov,T_oovv,X_vovv,X_ooov,W_cba) - call form_w_abc(nO,nV,b,c,a,T_voov,T_oovv,X_vovv,X_ooov,W_bca) - call form_w_abc(nO,nV,c,a,b,T_voov,T_oovv,X_vovv,X_ooov,W_cab) - + call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab) call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba) do i = 1, nO do j = 1, nO @@ -147,112 +143,135 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) end -subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc) +subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab) implicit none integer, intent(in) :: nO,nV,a,b,c - !double precision, intent(in) :: t2(nO,nO,nV,nV) double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) double precision, intent(out) :: W_abc(nO,nO,nO) + double precision, intent(out) :: W_cba(nO,nO,nO) + double precision, intent(out) :: W_bca(nO,nO,nO) + double precision, intent(out) :: W_cab(nO,nO,nO) integer :: l,i,j,k,d - double precision, allocatable, dimension(:,:,:) :: W_ikj + double precision, allocatable, dimension(:,:,:,:) :: W_ikj double precision, allocatable :: X(:,:,:,:) - allocate(W_ikj(nO,nO,nO)) - allocate(X(nV,nO,nO,2)) + allocate(W_ikj(nO,nO,nO,4)) + allocate(X(nV,nO,nO,3)) do k=1,nO do i=1,nO do d=1,nV X(d,i,k,1) = T_voov(d,k,i,c) -! X(d,i,j,2) = T_voov(d,j,i,b) X(d,i,k,2) = T_voov(d,k,i,b) -! X(d,j,k,1) = T_voov(d,k,j,c) + X(d,i,k,3) = T_voov(d,k,i,a) enddo enddo enddo ! X_vovv(d,i,c,a) * T_voov(d,j,k,b) : i jk - call dgemm('T','N', nO, nO*nO, nV, 1.d0, & - X_vovv(1,1,c,a), nV, T_voov(1,1,1,b), nV, 0.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,a), nV, T_voov(1,1,1,b), nV, 0.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,c), nV, T_voov(1,1,1,b), nV, 0.d0, W_cba, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,b), nV, T_voov(1,1,1,c), nV, 0.d0, W_bca, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,c), nV, T_voov(1,1,1,a), nV, 0.d0, W_cab, nO) ! T_voov(d,i,j,a) * X_vovv(d,k,b,c) : ij k - call dgemm('T','N', nO*nO, nO, nV, 1.d0, & - T_voov(1,1,1,a), nV, X_vovv(1,1,b,c), nV, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,b,c), nV, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,b,a), nV, 1.d0, W_cba, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,c,a), nV, 1.d0, W_bca, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,a,b), nV, 1.d0, W_cab, nO*nO) + ! X_vovv(d,k,a,c) * T_voov(d,j,i,b) : k ji - call dgemm('T','N', nO*nO, nO, nV, 1.d0, & - X(1,1,1,2), nV, X_vovv(1,1,a,c), nV, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,a,c), nV, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,c,a), nV, 1.d0, W_cba, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,b,a), nV, 1.d0, W_bca, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,c,b), nV, 1.d0, W_cab, nO*nO) ! X_vovv(d,i,b,a) * T_voov(d,k,j,c) : i kj - call dgemm('T','N', nO, nO*nO, nV, 1.d0, & - X_vovv(1,1,b,a), nV, X(1,1,1,1), nV, 1.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,a), nV, X(1,1,1,1), nV, 1.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,c), nV, X(1,1,1,3), nV, 1.d0, W_cba, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,b), nV, X(1,1,1,3), nV, 1.d0, W_bca, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,c), nV, X(1,1,1,2), nV, 1.d0, W_cab, nO) ! T_voov(d,k,i,c) * X_vovv(d,j,a,b) : ki j - call dgemm('T','N', nO*nO, nO, nV, 1.d0, & - X(1,1,1,1), nV, X_vovv(1,1,a,b), nV, 0.d0, W_ikj, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,a,b), nV, 0.d0, W_ikj(1,1,1,1), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,c,b), nV, 0.d0, W_ikj(1,1,1,2), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,b,c), nV, 0.d0, W_ikj(1,1,1,3), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,c,a), nV, 0.d0, W_ikj(1,1,1,4), nO*nO) ! T_voov(d,i,k,a) * X_vovv(d,j,c,b) : ik j - call dgemm('T','N', nO*nO, nO, nV, 1.d0, & - T_voov(1,1,1,a), nV, X_vovv(1,1,c,b), nV, 1.d0, W_ikj, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,c,b), nV, 1.d0, W_ikj(1,1,1,1), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,a,b), nV, 1.d0, W_ikj(1,1,1,2), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,a,c), nV, 1.d0, W_ikj(1,1,1,3), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,b,a), nV, 1.d0, W_ikj(1,1,1,4), nO*nO) deallocate(X) - allocate(X(nO,nO,nO,2)) + allocate(X(nO,nO,nO,3)) do k=1,nO do j=1,nO do l=1,nO X(l,j,k,1) = X_ooov(l,k,j,b) -! X(l,i,j,2) = X_ooov(l,j,i,a) X(l,j,k,2) = X_ooov(l,k,j,a) -! X(l,i,k,2) = X_ooov(l,k,i,a) + X(l,j,k,3) = X_ooov(l,k,j,c) enddo enddo enddo ! - T_oovv(l,i,a,b) * X_ooov(l,j,k,c) : i jk - call dgemm('T','N', nO, nO*nO, nO, -1.d0, & - T_oovv(1,1,a,b), nO, X_ooov(1,1,1,c), nO, 1.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,b), nO, X_ooov(1,1,1,c), nO, 1.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,b), nO, X_ooov(1,1,1,a), nO, 1.d0, W_cba, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,c), nO, X_ooov(1,1,1,a), nO, 1.d0, W_bca, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,a), nO, X_ooov(1,1,1,b), nO, 1.d0, W_cab, nO) ! - T_oovv(l,i,a,c) * X_ooov(l,k,j,b) : i kj - - call dgemm('T','N', nO, nO*nO, nO, -1.d0, & - T_oovv(1,1,a,c), nO, X(1,1,1,1), nO, 1.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,c), nO, X(1,1,1,1), nO, 1.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,a), nO, X(1,1,1,1), nO, 1.d0, W_cba, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,a), nO, X(1,1,1,3), nO, 1.d0, W_bca, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,b), nO, X(1,1,1,2), nO, 1.d0, W_cab, nO) ! - X_ooov(l,i,j,b) * T_oovv(l,k,c,a) : ij k - - call dgemm('T','N', nO*nO, nO, nO, -1.d0, & - X_ooov(1,1,1,b), nO, T_oovv(1,1,c,a), nO, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,c,a), nO, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,a,c), nO, 1.d0, W_cba, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,a,b), nO, 1.d0, W_bca, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,b,c), nO, 1.d0, W_cab, nO*nO) ! - X_ooov(l,j,i,a) * T_oovv(l,k,c,b) : ji k - - call dgemm('T','N', nO*nO, nO, nO, -1.d0, & - X(1,1,1,2), nO, T_oovv(1,1,c,b), nO, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,c,b), nO, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,a,b), nO, 1.d0, W_cba, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,a,c), nO, 1.d0, W_bca, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,b,a), nO, 1.d0, W_cab, nO*nO) ! - X_ooov(l,k,i,a) * T_oovv(l,j,b,c) : ki j - - call dgemm('T','N', nO*nO, nO, nO, -1.d0, & - X(1,1,1,2), nO, T_oovv(1,1,b,c), nO, 1.d0, W_ikj, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,b,c), nO, 1.d0, W_ikj(1,1,1,1), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,b,a), nO, 1.d0, W_ikj(1,1,1,2), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,c,a), nO, 1.d0, W_ikj(1,1,1,3), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,a,b), nO, 1.d0, W_ikj(1,1,1,4), nO*nO) ! - X_ooov(l,i,k,c) * T_oovv(l,j,b,a) : ik j - - call dgemm('T','N', nO*nO, nO, nO, -1.d0, & - X_ooov(1,1,1,c), nO, T_oovv(1,1,b,a), nO, 1.d0, W_ikj, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,b,a), nO, 1.d0, W_ikj(1,1,1,1), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,b,c), nO, 1.d0, W_ikj(1,1,1,2), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,c,b), nO, 1.d0, W_ikj(1,1,1,3), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,a,c), nO, 1.d0, W_ikj(1,1,1,4), nO*nO) do k=1,nO do j=1,nO do i=1,nO - W_abc(i,j,k) = W_abc(i,j,k) + W_ikj(i,k,j) + W_abc(i,j,k) = W_abc(i,j,k) + W_ikj(i,k,j,1) + W_cba(i,j,k) = W_cba(i,j,k) + W_ikj(i,k,j,2) + W_bca(i,j,k) = W_bca(i,j,k) + W_ikj(i,k,j,3) + W_cab(i,j,k) = W_cab(i,j,k) + W_ikj(i,k,j,4) enddo enddo enddo From cad1da1768b7ab3d9a93b6d6439a0bb414fb8ab7 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 13 May 2023 23:29:58 +0200 Subject: [PATCH 104/337] All permutations in ccsd_t_space_orb_abc.irp.f --- src/ccsd/ccsd_t_space_orb_abc.irp.f | 168 +++++++++++++++++++--------- 1 file changed, 114 insertions(+), 54 deletions(-) diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index 7f334a37..65a04549 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -12,9 +12,10 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) double precision, allocatable :: W(:,:,:,:,:,:) double precision, allocatable :: V(:,:,:,:,:,:) - double precision, allocatable :: W_abc(:,:,:), V_abc(:,:,:) - double precision, allocatable :: W_cab(:,:,:), W_cba(:,:,:) - double precision, allocatable :: W_bca(:,:,:) + double precision, allocatable :: W_abc(:,:,:), W_cab(:,:,:), W_bca(:,:,:) + double precision, allocatable :: W_bac(:,:,:), W_cba(:,:,:), W_acb(:,:,:) + double precision, allocatable :: V_abc(:,:,:), V_cab(:,:,:), V_bca(:,:,:) + double precision, allocatable :: V_bac(:,:,:), V_cba(:,:,:), V_acb(:,:,:) double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) integer :: i,j,k,l,a,b,c,d @@ -103,26 +104,30 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !$OMP END PARALLEL energy = 0d0 - !$OMP PARALLEL & - !$OMP PRIVATE(a,b,c,W_abc,W_cab,W_bca,W_cba,V_abc) & - !$OMP PRIVATE(i,j,k,e,delta,delta_abc) & - !$OMP DEFAULT(SHARED) - allocate(W_abc(nO,nO,nO), W_cab(nO,nO,nO), V_abc(nO,nO,nO), & - W_bca(nO,nO,nO), W_cba(nO,nO,nO) ) + !$OMP PARALLEL & + !$OMP PRIVATE(a,b,c) & + !$OMP PRIVATE(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & + !$OMP V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) & + !$OMP PRIVATE(i,j,k,e,delta,delta_abc) & + !$OMP DEFAULT(SHARED) + allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), & + W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), & + V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), & + V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) ) !$OMP DO do c = 1, nV do b = 1, nV do a = 1, nV e = 0d0 delta_abc = f_v(a) + f_v(b) + f_v(c) - call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab) - call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba) + call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) + call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) do i = 1, nO do j = 1, nO do k = 1, nO delta = 1d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) e = e + (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k))& - * V_abc(i,j,k) * delta + * (V_abc(i,j,k) - V_cba(i,j,k)) * delta enddo enddo enddo @@ -134,7 +139,9 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) enddo !$OMP END DO - deallocate(W_abc,V_abc,W_cab,W_bca,W_cba) + deallocate(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & + V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) + !$OMP END PARALLEL energy = energy / 3.d0 @@ -143,7 +150,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) end -subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab) +subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) implicit none @@ -154,20 +161,22 @@ subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca, double precision, intent(out) :: W_cba(nO,nO,nO) double precision, intent(out) :: W_bca(nO,nO,nO) double precision, intent(out) :: W_cab(nO,nO,nO) + double precision, intent(out) :: W_bac(nO,nO,nO) + double precision, intent(out) :: W_acb(nO,nO,nO) integer :: l,i,j,k,d double precision, allocatable, dimension(:,:,:,:) :: W_ikj double precision, allocatable :: X(:,:,:,:) - allocate(W_ikj(nO,nO,nO,4)) + allocate(W_ikj(nO,nO,nO,6)) allocate(X(nV,nO,nO,3)) do k=1,nO do i=1,nO do d=1,nV - X(d,i,k,1) = T_voov(d,k,i,c) + X(d,i,k,1) = T_voov(d,k,i,a) X(d,i,k,2) = T_voov(d,k,i,b) - X(d,i,k,3) = T_voov(d,k,i,a) + X(d,i,k,3) = T_voov(d,k,i,c) enddo enddo enddo @@ -175,44 +184,56 @@ subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca, ! X_vovv(d,i,c,a) * T_voov(d,j,k,b) : i jk call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,a), nV, T_voov(1,1,1,b), nV, 0.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,a), nV, T_voov(1,1,1,a), nV, 0.d0, W_bac, nO) call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,c), nV, T_voov(1,1,1,b), nV, 0.d0, W_cba, nO) call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,b), nV, T_voov(1,1,1,c), nV, 0.d0, W_bca, nO) call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,c), nV, T_voov(1,1,1,a), nV, 0.d0, W_cab, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,a), nV, T_voov(1,1,1,c), nV, 0.d0, W_acb, nO) ! T_voov(d,i,j,a) * X_vovv(d,k,b,c) : ij k call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,b,c), nV, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,a,c), nV, 1.d0, W_bac, nO*nO) call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,b,a), nV, 1.d0, W_cba, nO*nO) call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,c,a), nV, 1.d0, W_bca, nO*nO) call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,a,b), nV, 1.d0, W_cab, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,c,b), nV, 1.d0, W_acb, nO*nO) ! X_vovv(d,k,a,c) * T_voov(d,j,i,b) : k ji call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,a,c), nV, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,b,c), nV, 1.d0, W_bac, nO*nO) call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,c,a), nV, 1.d0, W_cba, nO*nO) - call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,b,a), nV, 1.d0, W_bca, nO*nO) - call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,c,b), nV, 1.d0, W_cab, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,b,a), nV, 1.d0, W_bca, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,c,b), nV, 1.d0, W_cab, nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,a,b), nV, 1.d0, W_acb, nO*nO) ! X_vovv(d,i,b,a) * T_voov(d,k,j,c) : i kj - call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,a), nV, X(1,1,1,1), nV, 1.d0, W_abc, nO) - call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,c), nV, X(1,1,1,3), nV, 1.d0, W_cba, nO) - call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,b), nV, X(1,1,1,3), nV, 1.d0, W_bca, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,a), nV, X(1,1,1,3), nV, 1.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,b), nV, X(1,1,1,3), nV, 1.d0, W_bac, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,c), nV, X(1,1,1,1), nV, 1.d0, W_cba, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,b), nV, X(1,1,1,1), nV, 1.d0, W_bca, nO) call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,c), nV, X(1,1,1,2), nV, 1.d0, W_cab, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,a), nV, X(1,1,1,2), nV, 1.d0, W_acb, nO) ! T_voov(d,k,i,c) * X_vovv(d,j,a,b) : ki j - call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,a,b), nV, 0.d0, W_ikj(1,1,1,1), nO*nO) - call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,c,b), nV, 0.d0, W_ikj(1,1,1,2), nO*nO) - call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,b,c), nV, 0.d0, W_ikj(1,1,1,3), nO*nO) - call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,c,a), nV, 0.d0, W_ikj(1,1,1,4), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,a,b), nV, 0.d0, W_ikj(1,1,1,1), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,3), nV, X_vovv(1,1,b,a), nV, 0.d0, W_ikj(1,1,1,2), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,c,b), nV, 0.d0, W_ikj(1,1,1,3), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,1), nV, X_vovv(1,1,b,c), nV, 0.d0, W_ikj(1,1,1,4), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,c,a), nV, 0.d0, W_ikj(1,1,1,5), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, X(1,1,1,2), nV, X_vovv(1,1,a,c), nV, 0.d0, W_ikj(1,1,1,6), nO*nO) ! T_voov(d,i,k,a) * X_vovv(d,j,c,b) : ik j call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,c,b), nV, 1.d0, W_ikj(1,1,1,1), nO*nO) - call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,a,b), nV, 1.d0, W_ikj(1,1,1,2), nO*nO) - call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,a,c), nV, 1.d0, W_ikj(1,1,1,3), nO*nO) - call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,b,a), nV, 1.d0, W_ikj(1,1,1,4), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,c,a), nV, 1.d0, W_ikj(1,1,1,2), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,a,b), nV, 1.d0, W_ikj(1,1,1,3), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,b), nV, X_vovv(1,1,a,c), nV, 1.d0, W_ikj(1,1,1,4), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,c), nV, X_vovv(1,1,b,a), nV, 1.d0, W_ikj(1,1,1,5), nO*nO) + call dgemm('T','N', nO*nO, nO, nV, 1.d0, T_voov(1,1,1,a), nV, X_vovv(1,1,b,c), nV, 1.d0, W_ikj(1,1,1,6), nO*nO) deallocate(X) @@ -221,8 +242,8 @@ subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca, do k=1,nO do j=1,nO do l=1,nO - X(l,j,k,1) = X_ooov(l,k,j,b) - X(l,j,k,2) = X_ooov(l,k,j,a) + X(l,j,k,1) = X_ooov(l,k,j,a) + X(l,j,k,2) = X_ooov(l,k,j,b) X(l,j,k,3) = X_ooov(l,k,j,c) enddo enddo @@ -231,47 +252,61 @@ subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca, ! - T_oovv(l,i,a,b) * X_ooov(l,j,k,c) : i jk call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,b), nO, X_ooov(1,1,1,c), nO, 1.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,a), nO, X_ooov(1,1,1,c), nO, 1.d0, W_bac, nO) call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,b), nO, X_ooov(1,1,1,a), nO, 1.d0, W_cba, nO) call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,c), nO, X_ooov(1,1,1,a), nO, 1.d0, W_bca, nO) call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,a), nO, X_ooov(1,1,1,b), nO, 1.d0, W_cab, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,c), nO, X_ooov(1,1,1,b), nO, 1.d0, W_acb, nO) ! - T_oovv(l,i,a,c) * X_ooov(l,k,j,b) : i kj - call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,c), nO, X(1,1,1,1), nO, 1.d0, W_abc, nO) - call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,a), nO, X(1,1,1,1), nO, 1.d0, W_cba, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,c), nO, X(1,1,1,2), nO, 1.d0, W_abc, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,c), nO, X(1,1,1,1), nO, 1.d0, W_bac, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,a), nO, X(1,1,1,2), nO, 1.d0, W_cba, nO) call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,b,a), nO, X(1,1,1,3), nO, 1.d0, W_bca, nO) - call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,b), nO, X(1,1,1,2), nO, 1.d0, W_cab, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,c,b), nO, X(1,1,1,1), nO, 1.d0, W_cab, nO) + call dgemm('T','N', nO, nO*nO, nO, -1.d0, T_oovv(1,1,a,b), nO, X(1,1,1,3), nO, 1.d0, W_acb, nO) ! - X_ooov(l,i,j,b) * T_oovv(l,k,c,a) : ij k call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,c,a), nO, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,c,b), nO, 1.d0, W_bac, nO*nO) call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,a,c), nO, 1.d0, W_cba, nO*nO) call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,a,b), nO, 1.d0, W_bca, nO*nO) call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,b,c), nO, 1.d0, W_cab, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,b,a), nO, 1.d0, W_acb, nO*nO) ! - X_ooov(l,j,i,a) * T_oovv(l,k,c,b) : ji k - call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,c,b), nO, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,c,b), nO, 1.d0, W_abc, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,c,a), nO, 1.d0, W_bac, nO*nO) call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,a,b), nO, 1.d0, W_cba, nO*nO) - call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,a,c), nO, 1.d0, W_bca, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,a,c), nO, 1.d0, W_bca, nO*nO) call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,b,a), nO, 1.d0, W_cab, nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,b,c), nO, 1.d0, W_acb, nO*nO) ! - X_ooov(l,k,i,a) * T_oovv(l,j,b,c) : ki j - call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,b,c), nO, 1.d0, W_ikj(1,1,1,1), nO*nO) - call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,b,a), nO, 1.d0, W_ikj(1,1,1,2), nO*nO) - call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,c,a), nO, 1.d0, W_ikj(1,1,1,3), nO*nO) - call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,a,b), nO, 1.d0, W_ikj(1,1,1,4), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,b,c), nO, 1.d0, W_ikj(1,1,1,1), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,a,c), nO, 1.d0, W_ikj(1,1,1,2), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,b,a), nO, 1.d0, W_ikj(1,1,1,3), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,2), nO, T_oovv(1,1,c,a), nO, 1.d0, W_ikj(1,1,1,4), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,3), nO, T_oovv(1,1,a,b), nO, 1.d0, W_ikj(1,1,1,5), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X(1,1,1,1), nO, T_oovv(1,1,c,b), nO, 1.d0, W_ikj(1,1,1,6), nO*nO) ! - X_ooov(l,i,k,c) * T_oovv(l,j,b,a) : ik j call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,b,a), nO, 1.d0, W_ikj(1,1,1,1), nO*nO) - call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,b,c), nO, 1.d0, W_ikj(1,1,1,2), nO*nO) - call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,c,b), nO, 1.d0, W_ikj(1,1,1,3), nO*nO) - call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,a,c), nO, 1.d0, W_ikj(1,1,1,4), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,c), nO, T_oovv(1,1,a,b), nO, 1.d0, W_ikj(1,1,1,2), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,b,c), nO, 1.d0, W_ikj(1,1,1,3), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,a), nO, T_oovv(1,1,c,b), nO, 1.d0, W_ikj(1,1,1,4), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,a,c), nO, 1.d0, W_ikj(1,1,1,5), nO*nO) + call dgemm('T','N', nO*nO, nO, nO, -1.d0, X_ooov(1,1,1,b), nO, T_oovv(1,1,c,a), nO, 1.d0, W_ikj(1,1,1,6), nO*nO) do k=1,nO do j=1,nO do i=1,nO W_abc(i,j,k) = W_abc(i,j,k) + W_ikj(i,k,j,1) - W_cba(i,j,k) = W_cba(i,j,k) + W_ikj(i,k,j,2) - W_bca(i,j,k) = W_bca(i,j,k) + W_ikj(i,k,j,3) - W_cab(i,j,k) = W_cab(i,j,k) + W_ikj(i,k,j,4) + W_bac(i,j,k) = W_bac(i,j,k) + W_ikj(i,k,j,2) + W_cba(i,j,k) = W_cba(i,j,k) + W_ikj(i,k,j,3) + W_bca(i,j,k) = W_bca(i,j,k) + W_ikj(i,k,j,4) + W_cab(i,j,k) = W_cab(i,j,k) + W_ikj(i,k,j,5) + W_acb(i,j,k) = W_acb(i,j,k) + W_ikj(i,k,j,6) enddo enddo enddo @@ -282,28 +317,53 @@ end ! V_abc -subroutine form_v_abc(nO,nV,a,b,c,T_ov,X_oovv,W_abc,V_abc,W_cba) +subroutine form_v_abc(nO,nV,a,b,c,T_ov,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) implicit none integer, intent(in) :: nO,nV,a,b,c double precision, intent(in) :: T_ov(nO,nV) double precision, intent(in) :: X_oovv(nO,nO,nV,nV) - double precision, intent(in) :: W_abc(nO,nO,nO), W_cba(nO,nO,nO) - double precision, intent(out) :: V_abc(nO,nO,nO) + double precision, intent(in) :: W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO) + double precision, intent(in) :: W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO) + double precision, intent(out) :: V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO) + double precision, intent(out) :: V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) integer :: i,j,k do k = 1, nO do j = 1, nO do i = 1, nO - V_abc(i,j,k) = W_abc(i,j,k) - W_cba(i,j,k) & + V_abc(i,j,k) = W_abc(i,j,k) & + X_oovv(j,k,b,c) * T_ov(i,a) & + X_oovv(i,k,a,c) * T_ov(j,b) & - + X_oovv(i,j,a,b) * T_ov(k,c) & - - X_oovv(j,k,b,a) * T_ov(i,c) & - - X_oovv(i,k,c,a) * T_ov(j,b) & - - X_oovv(i,j,c,b) * T_ov(k,a) + + X_oovv(i,j,a,b) * T_ov(k,c) + + V_cba(i,j,k) = W_cba(i,j,k) & + + X_oovv(j,k,b,a) * T_ov(i,c) & + + X_oovv(i,k,c,a) * T_ov(j,b) & + + X_oovv(i,j,c,b) * T_ov(k,a) + + V_bca(i,j,k) = W_bca(i,j,k) & + + X_oovv(j,k,c,a) * T_ov(i,b) & + + X_oovv(i,k,b,a) * T_ov(j,c) & + + X_oovv(i,j,b,c) * T_ov(k,a) + + V_cab(i,j,k) = W_cab(i,j,k) & + + X_oovv(j,k,a,b) * T_ov(i,c) & + + X_oovv(i,k,c,b) * T_ov(j,a) & + + X_oovv(i,j,c,a) * T_ov(k,b) + + V_bac(i,j,k) = W_bac(i,j,k) & + + X_oovv(j,k,a,c) * T_ov(i,b) & + + X_oovv(i,k,b,c) * T_ov(j,a) & + + X_oovv(i,j,b,a) * T_ov(k,c) + + V_acb(i,j,k) = W_acb(i,j,k) & + + X_oovv(j,k,c,b) * T_ov(i,a) & + + X_oovv(i,k,a,b) * T_ov(j,c) & + + X_oovv(i,j,a,c) * T_ov(k,b) + enddo enddo enddo From d4ba229e6fdb6d567dd0c0258cb14aa14fa6524d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 14 May 2023 02:13:55 +0200 Subject: [PATCH 105/337] Symmetries in (T) --- src/ccsd/ccsd_t_space_orb_abc.irp.f | 51 +++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index 65a04549..a2e4ec7b 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -19,7 +19,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) integer :: i,j,k,l,a,b,c,d - double precision :: e,ta,tb, delta, delta_abc + double precision :: e,ta,tb, delta, delta_abc, x1, x2, x3 allocate(X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV), X_oovv(nO,nO,nV,nV)) allocate(T_voov(nV,nO,nO,nV),T_oovv(nO,nO,nV,nV)) @@ -105,7 +105,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) energy = 0d0 !$OMP PARALLEL & - !$OMP PRIVATE(a,b,c) & + !$OMP PRIVATE(a,b,c,x1) & !$OMP PRIVATE(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & !$OMP V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) & !$OMP PRIVATE(i,j,k,e,delta,delta_abc) & @@ -114,30 +114,55 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), & V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), & V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) ) + e = 0d0 !$OMP DO - do c = 1, nV - do b = 1, nV - do a = 1, nV - e = 0d0 + do a = 1, nV + do b = 1, a-1 + do c = 1, b-1 delta_abc = f_v(a) + f_v(b) + f_v(c) call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) do i = 1, nO do j = 1, nO do k = 1, nO - delta = 1d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) - e = e + (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k))& - * (V_abc(i,j,k) - V_cba(i,j,k)) * delta + delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) + e = e + delta * ( & + (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k)) * (V_abc(i,j,k) - V_cba(i,j,k)) + & + (4d0 * W_acb(i,j,k) + W_cba(i,j,k) + W_bac(i,j,k)) * (V_acb(i,j,k) - V_bca(i,j,k)) + & + (4d0 * W_bac(i,j,k) + W_acb(i,j,k) + W_cba(i,j,k)) * (V_bac(i,j,k) - V_cab(i,j,k)) + & + (4d0 * W_bca(i,j,k) + W_cab(i,j,k) + W_abc(i,j,k)) * (V_bca(i,j,k) - V_acb(i,j,k)) + & + (4d0 * W_cba(i,j,k) + W_bac(i,j,k) + W_acb(i,j,k)) * (V_cba(i,j,k) - V_abc(i,j,k)) + & + (4d0 * W_cab(i,j,k) + W_abc(i,j,k) + W_bca(i,j,k)) * (V_cab(i,j,k) - V_bac(i,j,k)) + & + 0.d0) enddo enddo enddo - !$OMP CRITICAL - energy = energy + e - !$OMP END CRITICAL + enddo + enddo + + c = a + do b = 1, nV + delta_abc = f_v(a) + f_v(b) + f_v(c) + call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) + call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) + do i = 1, nO + do j = 1, nO + do k = 1, nO + delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) + e = e + delta * ( & + (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k)) * (V_abc(i,j,k) - V_cba(i,j,k)) + & + (4d0 * W_acb(i,j,k) + W_cba(i,j,k) + W_bac(i,j,k)) * (V_acb(i,j,k) - V_bca(i,j,k)) + & + (4d0 * W_bac(i,j,k) + W_acb(i,j,k) + W_cba(i,j,k)) * (V_bac(i,j,k) - V_cab(i,j,k)) + & + 0.d0) + enddo + enddo enddo enddo enddo !$OMP END DO + !$OMP CRITICAL + energy = energy + e + !$OMP END CRITICAL deallocate(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) @@ -184,7 +209,7 @@ subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca, ! X_vovv(d,i,c,a) * T_voov(d,j,k,b) : i jk call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,a), nV, T_voov(1,1,1,b), nV, 0.d0, W_abc, nO) - call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,a), nV, T_voov(1,1,1,a), nV, 0.d0, W_bac, nO) + call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,c,b), nV, T_voov(1,1,1,a), nV, 0.d0, W_bac, nO) call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,c), nV, T_voov(1,1,1,b), nV, 0.d0, W_cba, nO) call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,a,b), nV, T_voov(1,1,1,c), nV, 0.d0, W_bca, nO) call dgemm('T','N', nO, nO*nO, nV, 1.d0, X_vovv(1,1,b,c), nV, T_voov(1,1,1,a), nV, 0.d0, W_cab, nO) From 2e54537f1547861586c3c078e8ce5b3e1a9df652 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 14 May 2023 02:41:34 +0200 Subject: [PATCH 106/337] v3 of (T) is fast! --- src/ccsd/ccsd_t_space_orb_abc.irp.f | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index a2e4ec7b..462d4adf 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -115,7 +115,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), & V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) ) e = 0d0 - !$OMP DO + !$OMP DO SCHEDULE(dynamic) do a = 1, nV do b = 1, a-1 do c = 1, b-1 @@ -142,6 +142,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) c = a do b = 1, nV + if (b == c) cycle delta_abc = f_v(a) + f_v(b) + f_v(c) call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) @@ -159,7 +160,8 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) enddo enddo enddo - !$OMP END DO + !$OMP END DO NOWAIT + !$OMP CRITICAL energy = energy + e !$OMP END CRITICAL From df07c65980affa277b304a17d35f1636f598171a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 14 May 2023 10:07:50 +0200 Subject: [PATCH 107/337] Fixed trexio installation --- configure | 2 -- 1 file changed, 2 deletions(-) diff --git a/configure b/configure index 66bc9419..48e6fd12 100755 --- a/configure +++ b/configure @@ -215,7 +215,6 @@ EOF cd trexio-${VERSION} ./configure --prefix=\${QP_ROOT} --without-hdf5 make -j 8 && make -j 8 check && make -j 8 install - cp ${QP_ROOT}/include/trexio_f.f90 ${QP_ROOT}/src/ezfio_files tar -zxvf "\${QP_ROOT}"/external/qp2-dependencies/${ARCHITECTURE}/ninja.tar.gz mv ninja "\${QP_ROOT}"/bin/ EOF @@ -229,7 +228,6 @@ EOF cd trexio-${VERSION} ./configure --prefix=\${QP_ROOT} make -j 8 && make -j 8 check && make -j 8 install - cp ${QP_ROOT}/include/trexio_f.f90 ${QP_ROOT}/src/ezfio_files EOF From c9f579483af138575cb7a37b9cf1fdb39f22c2fa Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Mon, 15 May 2023 00:31:28 +0200 Subject: [PATCH 108/337] added Gill grid --- src/ao_many_one_e_ints/listj1b.irp.f | 24 +- src/becke_numerical_grid/EZFIO.cfg | 12 + src/becke_numerical_grid/extra_grid.irp.f | 278 +++++++++++----- .../extra_grid_vector.irp.f | 15 +- src/becke_numerical_grid/grid_becke.irp.f | 314 ++++++++++++------ .../grid_becke_vector.irp.f | 35 +- .../integration_radial.irp.f | 142 ++++---- .../step_function_becke.irp.f | 43 ++- src/non_h_ints_mu/j12_nucl_utils.irp.f | 4 +- 9 files changed, 570 insertions(+), 297 deletions(-) diff --git a/src/ao_many_one_e_ints/listj1b.irp.f b/src/ao_many_one_e_ints/listj1b.irp.f index 93ac459e..02963605 100644 --- a/src/ao_many_one_e_ints/listj1b.irp.f +++ b/src/ao_many_one_e_ints/listj1b.irp.f @@ -7,17 +7,17 @@ BEGIN_PROVIDER [integer, List_all_comb_b2_size] PROVIDE j1b_type - if(j1b_type .eq. 3) then + if((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then List_all_comb_b2_size = 2**nucl_num - elseif(j1b_type .eq. 4) then + elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then List_all_comb_b2_size = nucl_num + 1 else - print *, 'j1b_type = ', j1b_pen, 'is not implemented' + print *, 'j1b_type = ', j1b_type, 'is not implemented' stop endif @@ -67,7 +67,7 @@ END_PROVIDER List_all_comb_b2_expo = 0.d0 List_all_comb_b2_cent = 0.d0 - if(j1b_type .eq. 3) then + if((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then do i = 1, List_all_comb_b2_size @@ -121,7 +121,7 @@ END_PROVIDER List_all_comb_b2_coef(i) = (-1.d0)**dble(phase) * dexp(-List_all_comb_b2_coef(i)) enddo - elseif(j1b_type .eq. 4) then + elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then List_all_comb_b2_coef( 1) = 1.d0 List_all_comb_b2_expo( 1) = 0.d0 @@ -136,7 +136,7 @@ END_PROVIDER else - print *, 'j1b_type = ', j1b_pen, 'is not implemented' + print *, 'j1b_type = ', j1b_type, 'is not implemented' stop endif @@ -156,18 +156,18 @@ BEGIN_PROVIDER [ integer, List_all_comb_b3_size] implicit none double precision :: tmp - if(j1b_type .eq. 3) then + if((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then List_all_comb_b3_size = 3**nucl_num - elseif(j1b_type .eq. 4) then + elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then tmp = 0.5d0 * dble(nucl_num) * (dble(nucl_num) + 3.d0) List_all_comb_b3_size = int(tmp) + 1 else - print *, 'j1b_type = ', j1b_pen, 'is not implemented' + print *, 'j1b_type = ', j1b_type, 'is not implemented' stop endif @@ -230,7 +230,7 @@ END_PROVIDER List_all_comb_b3_expo = 0.d0 List_all_comb_b3_cent = 0.d0 - if(j1b_type .eq. 3) then + if((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then do i = 1, List_all_comb_b3_size @@ -287,7 +287,7 @@ END_PROVIDER List_all_comb_b3_coef(i) = (-1.d0)**dble(phase) * facto * dexp(-List_all_comb_b3_coef(i)) enddo - elseif(j1b_type .eq. 4) then + elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then ii = 1 List_all_comb_b3_coef( ii) = 1.d0 @@ -347,7 +347,7 @@ END_PROVIDER else - print *, 'j1b_type = ', j1b_pen, 'is not implemented' + print *, 'j1b_type = ', j1b_type, 'is not implemented' stop endif diff --git a/src/becke_numerical_grid/EZFIO.cfg b/src/becke_numerical_grid/EZFIO.cfg index 4083e0e7..7861f074 100644 --- a/src/becke_numerical_grid/EZFIO.cfg +++ b/src/becke_numerical_grid/EZFIO.cfg @@ -64,3 +64,15 @@ doc: Number of angular extra_grid points given from input. Warning, this number interface: ezfio,provider,ocaml default: 1202 +[rad_grid_type] +type: character*(32) +doc: method used to sample the radial space. Possible choices are [KNOWLES | GILL] +interface: ezfio,provider,ocaml +default: KNOWLES + +[extra_rad_grid_type] +type: character*(32) +doc: method used to sample the radial space. Possible choices are [KNOWLES | GILL] +interface: ezfio,provider,ocaml +default: KNOWLES + diff --git a/src/becke_numerical_grid/extra_grid.irp.f b/src/becke_numerical_grid/extra_grid.irp.f index db691e55..9bd24f22 100644 --- a/src/becke_numerical_grid/extra_grid.irp.f +++ b/src/becke_numerical_grid/extra_grid.irp.f @@ -1,96 +1,149 @@ +! --- + BEGIN_PROVIDER [integer, n_points_extra_radial_grid] &BEGIN_PROVIDER [integer, n_points_extra_integration_angular] - implicit none - BEGIN_DOC - ! n_points_extra_radial_grid = number of radial grid points_extra per atom - ! - ! n_points_extra_integration_angular = number of angular grid points_extra per atom - ! - ! These numbers are automatically set by setting the grid_type_sgn parameter - END_DOC -if(.not.my_extra_grid_becke)then - select case (extra_grid_type_sgn) - case(0) - n_points_extra_radial_grid = 23 - n_points_extra_integration_angular = 170 - case(1) - n_points_extra_radial_grid = 50 - n_points_extra_integration_angular = 194 - case(2) - n_points_extra_radial_grid = 75 - n_points_extra_integration_angular = 302 - case(3) - n_points_extra_radial_grid = 99 - n_points_extra_integration_angular = 590 - case default - write(*,*) '!!! Quadrature grid not available !!!' - stop - end select -else - n_points_extra_radial_grid = my_n_pt_r_extra_grid - n_points_extra_integration_angular = my_n_pt_a_extra_grid -endif + + BEGIN_DOC + ! n_points_extra_radial_grid = number of radial grid points_extra per atom + ! + ! n_points_extra_integration_angular = number of angular grid points_extra per atom + ! + ! These numbers are automatically set by setting the grid_type_sgn parameter + END_DOC + + implicit none + + if(.not.my_extra_grid_becke)then + select case (extra_grid_type_sgn) + case(0) + n_points_extra_radial_grid = 23 + n_points_extra_integration_angular = 170 + case(1) + n_points_extra_radial_grid = 50 + n_points_extra_integration_angular = 194 + case(2) + n_points_extra_radial_grid = 75 + n_points_extra_integration_angular = 302 + case(3) + n_points_extra_radial_grid = 99 + n_points_extra_integration_angular = 590 + case default + write(*,*) '!!! Quadrature grid not available !!!' + stop + end select + else + n_points_extra_radial_grid = my_n_pt_r_extra_grid + n_points_extra_integration_angular = my_n_pt_a_extra_grid + endif + END_PROVIDER +! --- + BEGIN_PROVIDER [integer, n_points_extra_grid_per_atom] - implicit none + BEGIN_DOC ! Number of grid points_extra per atom END_DOC + + implicit none n_points_extra_grid_per_atom = n_points_extra_integration_angular * n_points_extra_radial_grid END_PROVIDER +! --- + BEGIN_PROVIDER [double precision, grid_points_extra_radial, (n_points_extra_radial_grid)] &BEGIN_PROVIDER [double precision, dr_radial_extra_integral] - implicit none BEGIN_DOC ! points_extra in [0,1] to map the radial integral [0,\infty] END_DOC + + implicit none + integer :: i + dr_radial_extra_integral = 1.d0/dble(n_points_extra_radial_grid-1) - integer :: i do i = 1, n_points_extra_radial_grid grid_points_extra_radial(i) = dble(i-1) * dr_radial_extra_integral enddo END_PROVIDER +! --- + BEGIN_PROVIDER [double precision, grid_points_extra_per_atom, (3,n_points_extra_integration_angular,n_points_extra_radial_grid,nucl_num)] + BEGIN_DOC ! x,y,z coordinates of grid points_extra used for integration in 3d space END_DOC + implicit none - integer :: i,j,k - double precision :: dr,x_ref,y_ref,z_ref - double precision :: knowles_function - do i = 1, nucl_num - x_ref = nucl_coord(i,1) - y_ref = nucl_coord(i,2) - z_ref = nucl_coord(i,3) - do j = 1, n_points_extra_radial_grid-1 - double precision :: x,r - ! x value for the mapping of the [0, +\infty] to [0,1] - x = grid_points_extra_radial(j) + integer :: i, j, k + double precision :: dr, x_ref, y_ref, z_ref + double precision :: x, r, tmp + double precision, external :: knowles_function - ! value of the radial coordinate for the integration - r = knowles_function(alpha_knowles(grid_atomic_number(i)),m_knowles,x) + grid_points_extra_per_atom = 0.d0 - ! explicit values of the grid points_extra centered around each atom - do k = 1, n_points_extra_integration_angular - grid_points_extra_per_atom(1,k,j,i) = & - x_ref + angular_quadrature_points_extra(k,1) * r - grid_points_extra_per_atom(2,k,j,i) = & - y_ref + angular_quadrature_points_extra(k,2) * r - grid_points_extra_per_atom(3,k,j,i) = & - z_ref + angular_quadrature_points_extra(k,3) * r + PROVIDE extra_rad_grid_type + if(extra_rad_grid_type .eq. "KNOWLES") then + + do i = 1, nucl_num + x_ref = nucl_coord(i,1) + y_ref = nucl_coord(i,2) + z_ref = nucl_coord(i,3) + do j = 1, n_points_extra_radial_grid-1 + + ! x value for the mapping of the [0, +\infty] to [0,1] + x = grid_points_extra_radial(j) + ! value of the radial coordinate for the integration + r = knowles_function(alpha_knowles(grid_atomic_number(i)), m_knowles, x) + + ! explicit values of the grid points_extra centered around each atom + do k = 1, n_points_extra_integration_angular + grid_points_extra_per_atom(1,k,j,i) = x_ref + angular_quadrature_points_extra(k,1) * r + grid_points_extra_per_atom(2,k,j,i) = y_ref + angular_quadrature_points_extra(k,2) * r + grid_points_extra_per_atom(3,k,j,i) = z_ref + angular_quadrature_points_extra(k,3) * r + enddo enddo enddo - enddo + + elseif(extra_rad_grid_type .eq. "GILL") then + ! GILL & CHIEN, 2002 + + do i = 1, nucl_num + x_ref = nucl_coord(i,1) + y_ref = nucl_coord(i,2) + z_ref = nucl_coord(i,3) + do j = 1, n_points_extra_radial_grid-1 + + r = R_gill * dble(j-1)**2 / dble(n_points_extra_radial_grid-j+1)**2 + + ! explicit values of the grid points_extra centered around each atom + do k = 1, n_points_extra_integration_angular + grid_points_extra_per_atom(1,k,j,i) = x_ref + angular_quadrature_points_extra(k,1) * r + grid_points_extra_per_atom(2,k,j,i) = y_ref + angular_quadrature_points_extra(k,2) * r + grid_points_extra_per_atom(3,k,j,i) = z_ref + angular_quadrature_points_extra(k,3) * r + enddo + enddo + enddo + + else + + print*, " extra_rad_grid_type = ", extra_rad_grid_type, ' is not implemented' + stop + + endif + + END_PROVIDER +! --- + BEGIN_PROVIDER [double precision, weight_at_r_extra, (n_points_extra_integration_angular,n_points_extra_radial_grid,nucl_num) ] + BEGIN_DOC ! Weight function at grid points_extra : w_n(r) according to the equation (22) ! of Becke original paper (JCP, 88, 1988) @@ -99,11 +152,14 @@ BEGIN_PROVIDER [double precision, weight_at_r_extra, (n_points_extra_integration ! represented by the last dimension and the points_extra are labelled by the ! other dimensions. END_DOC + implicit none - integer :: i,j,k,l,m - double precision :: r(3) - double precision :: accu,cell_function_becke - double precision :: tmp_array(nucl_num) + integer :: i, j, k, l, m + double precision :: r(3) + double precision :: accu + double precision :: tmp_array(nucl_num) + double precision, external :: cell_function_becke + ! run over all points_extra in space ! that are referred to each atom do j = 1, nucl_num @@ -114,6 +170,7 @@ BEGIN_PROVIDER [double precision, weight_at_r_extra, (n_points_extra_integration r(1) = grid_points_extra_per_atom(1,l,k,j) r(2) = grid_points_extra_per_atom(2,l,k,j) r(3) = grid_points_extra_per_atom(3,l,k,j) + accu = 0.d0 ! For each of these points_extra in space, ou need to evaluate the P_n(r) do i = 1, nucl_num @@ -124,18 +181,19 @@ BEGIN_PROVIDER [double precision, weight_at_r_extra, (n_points_extra_integration enddo accu = 1.d0/accu weight_at_r_extra(l,k,j) = tmp_array(j) * accu + if(isnan(weight_at_r_extra(l,k,j)))then - print*,'isnan(weight_at_r_extra(l,k,j))' - print*,l,k,j - accu = 0.d0 - do i = 1, nucl_num - ! function defined for each atom "i" by equation (13) and (21) with k == 3 - tmp_array(i) = cell_function_becke(r,i) ! P_n(r) - print*,i,tmp_array(i) - ! Then you compute the summ the P_n(r) function for each of the "r" points_extra - accu += tmp_array(i) - enddo - write(*,'(100(F16.10,X))')tmp_array(j) , accu + print*,'isnan(weight_at_r_extra(l,k,j))' + print*,l,k,j + accu = 0.d0 + do i = 1, nucl_num + ! function defined for each atom "i" by equation (13) and (21) with k == 3 + tmp_array(i) = cell_function_becke(r,i) ! P_n(r) + print*,i,tmp_array(i) + ! Then you compute the summ the P_n(r) function for each of the "r" points_extra + accu += tmp_array(i) + enddo + write(*,'(100(F16.10,X))')tmp_array(j) , accu stop endif enddo @@ -144,35 +202,73 @@ BEGIN_PROVIDER [double precision, weight_at_r_extra, (n_points_extra_integration END_PROVIDER +! --- + BEGIN_PROVIDER [double precision, final_weight_at_r_extra, (n_points_extra_integration_angular,n_points_extra_radial_grid,nucl_num) ] + BEGIN_DOC ! Total weight on each grid point which takes into account all Lebedev, Voronoi and radial weights. END_DOC + implicit none - integer :: i,j,k,l,m - double precision :: r(3) - double precision :: accu,cell_function_becke - double precision :: tmp_array(nucl_num) - double precision :: contrib_integration,x - double precision :: derivative_knowles_function,knowles_function - ! run over all points_extra in space - do j = 1, nucl_num ! that are referred to each atom - do i = 1, n_points_extra_radial_grid -1 !for each radial grid attached to the "jth" atom - x = grid_points_extra_radial(i) ! x value for the mapping of the [0, +\infty] to [0,1] - do k = 1, n_points_extra_integration_angular ! for each angular point attached to the "jth" atom - contrib_integration = derivative_knowles_function(alpha_knowles(grid_atomic_number(j)),m_knowles,x)& - *knowles_function(alpha_knowles(grid_atomic_number(j)),m_knowles,x)**2 - final_weight_at_r_extra(k,i,j) = weights_angular_points_extra(k) * weight_at_r_extra(k,i,j) * contrib_integration * dr_radial_extra_integral - if(isnan(final_weight_at_r_extra(k,i,j)))then - print*,'isnan(final_weight_at_r_extra(k,i,j))' - print*,k,i,j - write(*,'(100(F16.10,X))')weights_angular_points_extra(k) , weight_at_r_extra(k,i,j) , contrib_integration , dr_radial_extra_integral - stop - endif + integer :: i, j, k, l, m + double precision :: r(3) + double precision :: tmp_array(nucl_num) + double precision :: contrib_integration, x, tmp + double precision, external :: derivative_knowles_function, knowles_function + + PROVIDE extra_rad_grid_type + if(extra_rad_grid_type .eq. "KNOWLES") then + + ! run over all points_extra in space + do j = 1, nucl_num ! that are referred to each atom + do i = 1, n_points_extra_radial_grid -1 !for each radial grid attached to the "jth" atom + x = grid_points_extra_radial(i) ! x value for the mapping of the [0, +\infty] to [0,1] + do k = 1, n_points_extra_integration_angular ! for each angular point attached to the "jth" atom + contrib_integration = derivative_knowles_function(alpha_knowles(grid_atomic_number(j)),m_knowles,x)& + * knowles_function(alpha_knowles(grid_atomic_number(j)),m_knowles,x)**2 + final_weight_at_r_extra(k,i,j) = weights_angular_points_extra(k) * weight_at_r_extra(k,i,j) * contrib_integration * dr_radial_extra_integral + if(isnan(final_weight_at_r_extra(k,i,j)))then + print*,'isnan(final_weight_at_r_extra(k,i,j))' + print*,k,i,j + write(*,'(100(F16.10,X))')weights_angular_points_extra(k) , weight_at_r_extra(k,i,j) , contrib_integration , dr_radial_extra_integral + stop + endif + enddo enddo enddo - enddo + + elseif(extra_rad_grid_type .eq. "GILL") then + ! GILL & CHIEN, 2002 + + PROVIDE R_gill + tmp = 2.d0 * R_gill * R_gill * R_gill * dble(n_points_extra_radial_grid) + + ! run over all points_extra in space + do j = 1, nucl_num ! that are referred to each atom + do i = 1, n_points_extra_radial_grid -1 !for each radial grid attached to the "jth" atom + contrib_integration = tmp * dble(i-1)**5 / dble(n_points_extra_radial_grid-i+1)**7 + + do k = 1, n_points_extra_integration_angular ! for each angular point attached to the "jth" atom + final_weight_at_r_extra(k,i,j) = weights_angular_points_extra(k) * weight_at_r_extra(k,i,j) * contrib_integration + if(isnan(final_weight_at_r_extra(k,i,j)))then + print*,'isnan(final_weight_at_r_extra(k,i,j))' + print*,k,i,j + write(*,'(100(F16.10,X))') weights_angular_points_extra(k), weight_at_r_extra(k,i,j), contrib_integration + stop + endif + enddo + enddo + enddo + + else + + print*, " extra_rad_grid_type = ", extra_rad_grid_type, ' is not implemented' + stop + + endif + END_PROVIDER diff --git a/src/becke_numerical_grid/extra_grid_vector.irp.f b/src/becke_numerical_grid/extra_grid_vector.irp.f index 3a5e6d3c..e4fc03b5 100644 --- a/src/becke_numerical_grid/extra_grid_vector.irp.f +++ b/src/becke_numerical_grid/extra_grid_vector.irp.f @@ -1,26 +1,35 @@ +! --- + BEGIN_PROVIDER [integer, n_points_extra_final_grid] - implicit none + BEGIN_DOC ! Number of points_extra which are non zero END_DOC - integer :: i,j,k,l + + implicit none + integer :: i, j, k, l + n_points_extra_final_grid = 0 + do j = 1, nucl_num do i = 1, n_points_extra_radial_grid -1 do k = 1, n_points_extra_integration_angular - if(dabs(final_weight_at_r_extra(k,i,j)) < thresh_extra_grid)then + if(dabs(final_weight_at_r_extra(k,i,j)) < thresh_extra_grid) then cycle endif n_points_extra_final_grid += 1 enddo enddo enddo + print*,'n_points_extra_final_grid = ',n_points_extra_final_grid print*,'n max point = ',n_points_extra_integration_angular*(n_points_extra_radial_grid*nucl_num - 1) ! call ezfio_set_becke_numerical_grid_n_points_extra_final_grid(n_points_extra_final_grid) END_PROVIDER +! --- + BEGIN_PROVIDER [double precision, final_grid_points_extra, (3,n_points_extra_final_grid)] &BEGIN_PROVIDER [double precision, final_weight_at_r_vector_extra, (n_points_extra_final_grid) ] &BEGIN_PROVIDER [integer, index_final_points_extra, (3,n_points_extra_final_grid) ] diff --git a/src/becke_numerical_grid/grid_becke.irp.f b/src/becke_numerical_grid/grid_becke.irp.f index 79f15c9a..21b9f98d 100644 --- a/src/becke_numerical_grid/grid_becke.irp.f +++ b/src/becke_numerical_grid/grid_becke.irp.f @@ -1,103 +1,174 @@ + +! --- + BEGIN_PROVIDER [integer, n_points_radial_grid] &BEGIN_PROVIDER [integer, n_points_integration_angular] - implicit none - BEGIN_DOC - ! n_points_radial_grid = number of radial grid points per atom - ! - ! n_points_integration_angular = number of angular grid points per atom - ! - ! These numbers are automatically set by setting the grid_type_sgn parameter - END_DOC -if(.not.my_grid_becke)then - select case (grid_type_sgn) - case(0) - n_points_radial_grid = 23 - n_points_integration_angular = 170 - case(1) - n_points_radial_grid = 50 - n_points_integration_angular = 194 - case(2) - n_points_radial_grid = 75 - n_points_integration_angular = 302 - case(3) - n_points_radial_grid = 99 - n_points_integration_angular = 590 - case default - write(*,*) '!!! Quadrature grid not available !!!' - stop - end select -else - n_points_radial_grid = my_n_pt_r_grid - n_points_integration_angular = my_n_pt_a_grid -endif + + BEGIN_DOC + ! n_points_radial_grid = number of radial grid points per atom + ! + ! n_points_integration_angular = number of angular grid points per atom + ! + ! These numbers are automatically set by setting the grid_type_sgn parameter + END_DOC + + implicit none + + if(.not.my_grid_becke)then + select case (grid_type_sgn) + case(0) + n_points_radial_grid = 23 + n_points_integration_angular = 170 + case(1) + n_points_radial_grid = 50 + n_points_integration_angular = 194 + case(2) + n_points_radial_grid = 75 + n_points_integration_angular = 302 + case(3) + n_points_radial_grid = 99 + n_points_integration_angular = 590 + case default + write(*,*) '!!! Quadrature grid not available !!!' + stop + end select + else + n_points_radial_grid = my_n_pt_r_grid + n_points_integration_angular = my_n_pt_a_grid + endif + END_PROVIDER +! --- + BEGIN_PROVIDER [integer, n_points_grid_per_atom] - implicit none + BEGIN_DOC ! Number of grid points per atom END_DOC + + implicit none + n_points_grid_per_atom = n_points_integration_angular * n_points_radial_grid END_PROVIDER -BEGIN_PROVIDER [integer , m_knowles] - implicit none +! --- + +BEGIN_PROVIDER [integer, m_knowles] + BEGIN_DOC ! value of the "m" parameter in the equation (7) of the paper of Knowles (JCP, 104, 1996) END_DOC + + implicit none + m_knowles = 3 + END_PROVIDER +! --- + +BEGIN_PROVIDER [double precision, R_gill] + + implicit none + + R_gill = 3.d0 + +END_PROVIDER + +! --- + BEGIN_PROVIDER [double precision, grid_points_radial, (n_points_radial_grid)] &BEGIN_PROVIDER [double precision, dr_radial_integral] - implicit none BEGIN_DOC ! points in [0,1] to map the radial integral [0,\infty] END_DOC - dr_radial_integral = 1.d0/dble(n_points_radial_grid-1) - integer :: i + + implicit none + integer :: i + + dr_radial_integral = 1.d0 / dble(n_points_radial_grid-1) + do i = 1, n_points_radial_grid grid_points_radial(i) = dble(i-1) * dr_radial_integral enddo END_PROVIDER +! --- + BEGIN_PROVIDER [double precision, grid_points_per_atom, (3,n_points_integration_angular,n_points_radial_grid,nucl_num)] + BEGIN_DOC ! x,y,z coordinates of grid points used for integration in 3d space END_DOC + implicit none - integer :: i,j,k - double precision :: dr,x_ref,y_ref,z_ref - double precision :: knowles_function - do i = 1, nucl_num - x_ref = nucl_coord(i,1) - y_ref = nucl_coord(i,2) - z_ref = nucl_coord(i,3) - do j = 1, n_points_radial_grid-1 - double precision :: x,r - ! x value for the mapping of the [0, +\infty] to [0,1] - x = grid_points_radial(j) + integer :: i, j, k + double precision :: dr, x_ref, y_ref, z_ref + double precision :: x, r, tmp + double precision, external :: knowles_function - ! value of the radial coordinate for the integration - r = knowles_function(alpha_knowles(grid_atomic_number(i)),m_knowles,x) + grid_points_per_atom = 0.d0 - ! explicit values of the grid points centered around each atom - do k = 1, n_points_integration_angular - grid_points_per_atom(1,k,j,i) = & - x_ref + angular_quadrature_points(k,1) * r - grid_points_per_atom(2,k,j,i) = & - y_ref + angular_quadrature_points(k,2) * r - grid_points_per_atom(3,k,j,i) = & - z_ref + angular_quadrature_points(k,3) * r + PROVIDE rad_grid_type + if(rad_grid_type .eq. "KNOWLES") then + + do i = 1, nucl_num + x_ref = nucl_coord(i,1) + y_ref = nucl_coord(i,2) + z_ref = nucl_coord(i,3) + do j = 1, n_points_radial_grid-1 + + ! x value for the mapping of the [0, +\infty] to [0,1] + x = grid_points_radial(j) + ! value of the radial coordinate for the integration + r = knowles_function(alpha_knowles(grid_atomic_number(i)), m_knowles, x) + + ! explicit values of the grid points centered around each atom + do k = 1, n_points_integration_angular + grid_points_per_atom(1,k,j,i) = x_ref + angular_quadrature_points(k,1) * r + grid_points_per_atom(2,k,j,i) = y_ref + angular_quadrature_points(k,2) * r + grid_points_per_atom(3,k,j,i) = z_ref + angular_quadrature_points(k,3) * r + enddo enddo enddo - enddo + + elseif(rad_grid_type .eq. "GILL") then + ! GILL & CHIEN, 2002 + + do i = 1, nucl_num + x_ref = nucl_coord(i,1) + y_ref = nucl_coord(i,2) + z_ref = nucl_coord(i,3) + do j = 1, n_points_radial_grid-1 + + r = R_gill * dble(j-1)**2 / dble(n_points_radial_grid-j+1)**2 + + ! explicit values of the grid points centered around each atom + do k = 1, n_points_integration_angular + grid_points_per_atom(1,k,j,i) = x_ref + angular_quadrature_points(k,1) * r + grid_points_per_atom(2,k,j,i) = y_ref + angular_quadrature_points(k,2) * r + grid_points_per_atom(3,k,j,i) = z_ref + angular_quadrature_points(k,3) * r + enddo + enddo + enddo + + else + + print*, " rad_grid_type = ", rad_grid_type, ' is not implemented' + stop + + endif + END_PROVIDER -BEGIN_PROVIDER [double precision, weight_at_r, (n_points_integration_angular,n_points_radial_grid,nucl_num) ] +! --- + +BEGIN_PROVIDER [double precision, weight_at_r, (n_points_integration_angular,n_points_radial_grid,nucl_num)] + BEGIN_DOC ! Weight function at grid points : w_n(r) according to the equation (22) ! of Becke original paper (JCP, 88, 1988) @@ -106,11 +177,13 @@ BEGIN_PROVIDER [double precision, weight_at_r, (n_points_integration_angular,n_p ! represented by the last dimension and the points are labelled by the ! other dimensions. END_DOC + implicit none - integer :: i,j,k,l,m - double precision :: r(3) - double precision :: accu,cell_function_becke - double precision :: tmp_array(nucl_num) + integer :: i, j, k, l, m + double precision :: r(3), accu + double precision :: tmp_array(nucl_num) + double precision, external :: cell_function_becke + ! run over all points in space ! that are referred to each atom do j = 1, nucl_num @@ -121,28 +194,30 @@ BEGIN_PROVIDER [double precision, weight_at_r, (n_points_integration_angular,n_p r(1) = grid_points_per_atom(1,l,k,j) r(2) = grid_points_per_atom(2,l,k,j) r(3) = grid_points_per_atom(3,l,k,j) + accu = 0.d0 ! For each of these points in space, ou need to evaluate the P_n(r) do i = 1, nucl_num ! function defined for each atom "i" by equation (13) and (21) with k == 3 - tmp_array(i) = cell_function_becke(r,i) ! P_n(r) + tmp_array(i) = cell_function_becke(r, i) ! P_n(r) ! Then you compute the summ the P_n(r) function for each of the "r" points accu += tmp_array(i) enddo accu = 1.d0/accu weight_at_r(l,k,j) = tmp_array(j) * accu - if(isnan(weight_at_r(l,k,j)))then - print*,'isnan(weight_at_r(l,k,j))' - print*,l,k,j - accu = 0.d0 - do i = 1, nucl_num - ! function defined for each atom "i" by equation (13) and (21) with k == 3 - tmp_array(i) = cell_function_becke(r,i) ! P_n(r) - print*,i,tmp_array(i) - ! Then you compute the summ the P_n(r) function for each of the "r" points - accu += tmp_array(i) - enddo - write(*,'(100(F16.10,X))')tmp_array(j) , accu + + if(isnan(weight_at_r(l,k,j))) then + print*,'isnan(weight_at_r(l,k,j))' + print*,l,k,j + accu = 0.d0 + do i = 1, nucl_num + ! function defined for each atom "i" by equation (13) and (21) with k == 3 + tmp_array(i) = cell_function_becke(r,i) ! P_n(r) + print*,i,tmp_array(i) + ! Then you compute the summ the P_n(r) function for each of the "r" points + accu += tmp_array(i) + enddo + write(*,'(100(F16.10,X))')tmp_array(j) , accu stop endif enddo @@ -151,35 +226,76 @@ BEGIN_PROVIDER [double precision, weight_at_r, (n_points_integration_angular,n_p END_PROVIDER +! --- + +BEGIN_PROVIDER [double precision, final_weight_at_r, (n_points_integration_angular,n_points_radial_grid,nucl_num)] -BEGIN_PROVIDER [double precision, final_weight_at_r, (n_points_integration_angular,n_points_radial_grid,nucl_num) ] BEGIN_DOC - ! Total weight on each grid point which takes into account all Lebedev, Voronoi and radial weights. + ! Total weight on each grid point which takes into account all Lebedev, Voronoi and radial weights. END_DOC + implicit none - integer :: i,j,k,l,m - double precision :: r(3) - double precision :: accu,cell_function_becke - double precision :: tmp_array(nucl_num) - double precision :: contrib_integration,x - double precision :: derivative_knowles_function,knowles_function - ! run over all points in space - do j = 1, nucl_num ! that are referred to each atom - do i = 1, n_points_radial_grid -1 !for each radial grid attached to the "jth" atom - x = grid_points_radial(i) ! x value for the mapping of the [0, +\infty] to [0,1] - do k = 1, n_points_integration_angular ! for each angular point attached to the "jth" atom - contrib_integration = derivative_knowles_function(alpha_knowles(grid_atomic_number(j)),m_knowles,x)& - *knowles_function(alpha_knowles(grid_atomic_number(j)),m_knowles,x)**2 - final_weight_at_r(k,i,j) = weights_angular_points(k) * weight_at_r(k,i,j) * contrib_integration * dr_radial_integral - if(isnan(final_weight_at_r(k,i,j)))then - print*,'isnan(final_weight_at_r(k,i,j))' - print*,k,i,j - write(*,'(100(F16.10,X))')weights_angular_points(k) , weight_at_r(k,i,j) , contrib_integration , dr_radial_integral - stop - endif + integer :: i, j, k, l, m + double precision :: r(3) + double precision :: tmp_array(nucl_num) + double precision :: contrib_integration, x, tmp + double precision, external :: derivative_knowles_function, knowles_function + + final_weight_at_r = 0.d0 + + PROVIDE rad_grid_type + if(rad_grid_type .eq. "KNOWLES") then + + ! run over all points in space + do j = 1, nucl_num ! that are referred to each atom + do i = 1, n_points_radial_grid -1 !for each radial grid attached to the "jth" atom + x = grid_points_radial(i) ! x value for the mapping of the [0, +\infty] to [0,1] + + do k = 1, n_points_integration_angular ! for each angular point attached to the "jth" atom + contrib_integration = derivative_knowles_function(alpha_knowles(grid_atomic_number(j)), m_knowles, x) & + * knowles_function(alpha_knowles(grid_atomic_number(j)), m_knowles, x)**2 + + final_weight_at_r(k,i,j) = weights_angular_points(k) * weight_at_r(k,i,j) * contrib_integration * dr_radial_integral + + if(isnan(final_weight_at_r(k,i,j))) then + print*,'isnan(final_weight_at_r(k,i,j))' + print*,k,i,j + write(*,'(100(F16.10,X))') weights_angular_points(k), weight_at_r(k,i,j), contrib_integration + stop + endif + enddo enddo enddo - enddo + + elseif(rad_grid_type .eq. "GILL") then + ! GILL & CHIEN, 2002 + + tmp = 2.d0 * R_gill * R_gill * R_gill * dble(n_points_radial_grid) + + ! run over all points in space + do j = 1, nucl_num ! that are referred to each atom + do i = 1, n_points_radial_grid - 1 !for each radial grid attached to the "jth" atom + contrib_integration = tmp * dble(i-1)**5 / dble(n_points_radial_grid-i+1)**7 + do k = 1, n_points_integration_angular ! for each angular point attached to the "jth" atom + final_weight_at_r(k,i,j) = weights_angular_points(k) * weight_at_r(k,i,j) * contrib_integration + + if(isnan(final_weight_at_r(k,i,j))) then + print*,'isnan(final_weight_at_r(k,i,j))' + print*,k,i,j + write(*,'(100(F16.10,X))') weights_angular_points(k), weight_at_r(k,i,j), contrib_integration, dr_radial_integral + stop + endif + enddo + enddo + enddo + + else + + print*, " rad_grid_type = ", rad_grid_type, ' is not implemented' + stop + + endif END_PROVIDER + diff --git a/src/becke_numerical_grid/grid_becke_vector.irp.f b/src/becke_numerical_grid/grid_becke_vector.irp.f index 343bd054..fd185641 100644 --- a/src/becke_numerical_grid/grid_becke_vector.irp.f +++ b/src/becke_numerical_grid/grid_becke_vector.irp.f @@ -21,22 +21,27 @@ BEGIN_PROVIDER [integer, n_points_final_grid] call ezfio_set_becke_numerical_grid_n_points_final_grid(n_points_final_grid) END_PROVIDER - BEGIN_PROVIDER [double precision, final_grid_points, (3,n_points_final_grid)] -&BEGIN_PROVIDER [double precision, final_weight_at_r_vector, (n_points_final_grid) ] -&BEGIN_PROVIDER [integer, index_final_points, (3,n_points_final_grid) ] -&BEGIN_PROVIDER [integer, index_final_points_reverse, (n_points_integration_angular,n_points_radial_grid,nucl_num) ] - implicit none +! --- + + BEGIN_PROVIDER [double precision, final_grid_points, (3,n_points_final_grid)] +&BEGIN_PROVIDER [double precision, final_weight_at_r_vector, (n_points_final_grid)] +&BEGIN_PROVIDER [integer, index_final_points, (3,n_points_final_grid)] +&BEGIN_PROVIDER [integer, index_final_points_reverse, (n_points_integration_angular,n_points_radial_grid,nucl_num)] + BEGIN_DOC -! final_grid_points(1:3,j) = (/ x, y, z /) of the jth grid point -! -! final_weight_at_r_vector(i) = Total weight function of the ith grid point which contains the Lebedev, Voronoi and radial weights contributions -! -! index_final_points(1:3,i) = gives the angular, radial and atomic indices associated to the ith grid point -! -! index_final_points_reverse(i,j,k) = index of the grid point having i as angular, j as radial and l as atomic indices + ! final_grid_points(1:3,j) = (/ x, y, z /) of the jth grid point + ! + ! final_weight_at_r_vector(i) = Total weight function of the ith grid point which contains the Lebedev, Voronoi and radial weights contributions + ! + ! index_final_points(1:3,i) = gives the angular, radial and atomic indices associated to the ith grid point + ! + ! index_final_points_reverse(i,j,k) = index of the grid point having i as angular, j as radial and l as atomic indices END_DOC - integer :: i,j,k,l,i_count - double precision :: r(3) + + implicit none + integer :: i, j, k, l, i_count + double precision :: r(3) + i_count = 0 do j = 1, nucl_num do i = 1, n_points_radial_grid -1 @@ -59,6 +64,8 @@ END_PROVIDER END_PROVIDER +! --- + BEGIN_PROVIDER [double precision, final_grid_points_transp, (n_points_final_grid,3)] implicit none BEGIN_DOC diff --git a/src/becke_numerical_grid/integration_radial.irp.f b/src/becke_numerical_grid/integration_radial.irp.f index 44c83070..3de151ab 100644 --- a/src/becke_numerical_grid/integration_radial.irp.f +++ b/src/becke_numerical_grid/integration_radial.irp.f @@ -1,71 +1,93 @@ - double precision function knowles_function(alpha,m,x) - implicit none - BEGIN_DOC -! Function proposed by Knowles (JCP, 104, 1996) for distributing the radial points : -! the Log "m" function ( equation (7) in the paper ) - END_DOC - double precision, intent(in) :: alpha,x - integer, intent(in) :: m -!print*, x - knowles_function = -alpha * dlog(1.d0-x**m) - end - double precision function derivative_knowles_function(alpha,m,x) - implicit none - BEGIN_DOC -! Derivative of the function proposed by Knowles (JCP, 104, 1996) for distributing the radial points - END_DOC - double precision, intent(in) :: alpha,x - integer, intent(in) :: m - double precision :: f - f = x**(m-1) - derivative_knowles_function = alpha * dble(m) * f / (1.d0 - x*f) - end +! --- - BEGIN_PROVIDER [double precision, alpha_knowles, (100)] - implicit none - integer :: i - BEGIN_DOC -! Recommended values for the alpha parameters according to the paper of Knowles (JCP, 104, 1996) -! as a function of the nuclear charge - END_DOC +double precision function knowles_function(alpha, m, x) - ! H-He - alpha_knowles(1) = 5.d0 - alpha_knowles(2) = 5.d0 + BEGIN_DOC + ! Function proposed by Knowles (JCP, 104, 1996) for distributing the radial points : + ! the Log "m" function ( equation (7) in the paper ) + END_DOC + + implicit none + double precision, intent(in) :: alpha, x + integer, intent(in) :: m - ! Li-Be - alpha_knowles(3) = 7.d0 - alpha_knowles(4) = 7.d0 + !print*, x + knowles_function = -alpha * dlog(1.d0-x**m) - ! B-Ne - do i = 5, 10 - alpha_knowles(i) = 5.d0 - enddo + return +end - ! Na-Mg - do i = 11, 12 - alpha_knowles(i) = 7.d0 - enddo +! --- - ! Al-Ar - do i = 13, 18 - alpha_knowles(i) = 5.d0 - enddo +double precision function derivative_knowles_function(alpha, m, x) - ! K-Ca - do i = 19, 20 - alpha_knowles(i) = 7.d0 - enddo + BEGIN_DOC + ! Derivative of the function proposed by Knowles (JCP, 104, 1996) for distributing the radial points + END_DOC - ! Sc-Zn - do i = 21, 30 - alpha_knowles(i) = 5.d0 - enddo + implicit none + double precision, intent(in) :: alpha, x + integer, intent(in) :: m + double precision :: f - ! Ga-Kr - do i = 31, 100 - alpha_knowles(i) = 7.d0 - enddo + f = x**(m-1) + derivative_knowles_function = alpha * dble(m) * f / (1.d0 - x*f) + + return +end + +! --- + +BEGIN_PROVIDER [double precision, alpha_knowles, (100)] + + BEGIN_DOC + ! Recommended values for the alpha parameters according to the paper of Knowles (JCP, 104, 1996) + ! as a function of the nuclear charge + END_DOC + + implicit none + integer :: i + + ! H-He + alpha_knowles(1) = 5.d0 + alpha_knowles(2) = 5.d0 + + ! Li-Be + alpha_knowles(3) = 7.d0 + alpha_knowles(4) = 7.d0 + + ! B-Ne + do i = 5, 10 + alpha_knowles(i) = 5.d0 + enddo + + ! Na-Mg + do i = 11, 12 + alpha_knowles(i) = 7.d0 + enddo + + ! Al-Ar + do i = 13, 18 + alpha_knowles(i) = 5.d0 + enddo + + ! K-Ca + do i = 19, 20 + alpha_knowles(i) = 7.d0 + enddo + + ! Sc-Zn + do i = 21, 30 + alpha_knowles(i) = 5.d0 + enddo + + ! Ga-Kr + do i = 31, 100 + alpha_knowles(i) = 7.d0 + enddo + +END_PROVIDER + +! --- - END_PROVIDER diff --git a/src/becke_numerical_grid/step_function_becke.irp.f b/src/becke_numerical_grid/step_function_becke.irp.f index 2905c6c0..6048c35f 100644 --- a/src/becke_numerical_grid/step_function_becke.irp.f +++ b/src/becke_numerical_grid/step_function_becke.irp.f @@ -20,31 +20,42 @@ double precision function f_function_becke(x) f_function_becke = 1.5d0 * x - 0.5d0 * x*x*x end -double precision function cell_function_becke(r,atom_number) - implicit none - double precision, intent(in) :: r(3) - integer, intent(in) :: atom_number +! --- + +double precision function cell_function_becke(r, atom_number) + BEGIN_DOC -! atom_number :: atom on which the cell function of Becke (1988, JCP,88(4)) + ! atom_number :: atom on which the cell function of Becke (1988, JCP,88(4)) ! r(1:3) :: x,y,z coordinantes of the current point END_DOC - double precision :: mu_ij,nu_ij - double precision :: distance_i,distance_j,step_function_becke - integer :: j - distance_i = (r(1) - nucl_coord_transp(1,atom_number) ) * (r(1) - nucl_coord_transp(1,atom_number)) + + implicit none + double precision, intent(in) :: r(3) + integer, intent(in) :: atom_number + integer :: j + double precision :: mu_ij, nu_ij + double precision :: distance_i, distance_j, step_function_becke + + distance_i = (r(1) - nucl_coord_transp(1,atom_number) ) * (r(1) - nucl_coord_transp(1,atom_number)) distance_i += (r(2) - nucl_coord_transp(2,atom_number) ) * (r(2) - nucl_coord_transp(2,atom_number)) distance_i += (r(3) - nucl_coord_transp(3,atom_number) ) * (r(3) - nucl_coord_transp(3,atom_number)) - distance_i = dsqrt(distance_i) + distance_i = dsqrt(distance_i) + cell_function_becke = 1.d0 do j = 1, nucl_num - if(j==atom_number)cycle - distance_j = (r(1) - nucl_coord_transp(1,j) ) * (r(1) - nucl_coord_transp(1,j)) - distance_j+= (r(2) - nucl_coord_transp(2,j) ) * (r(2) - nucl_coord_transp(2,j)) - distance_j+= (r(3) - nucl_coord_transp(3,j) ) * (r(3) - nucl_coord_transp(3,j)) - distance_j = dsqrt(distance_j) - mu_ij = (distance_i - distance_j)*nucl_dist_inv(atom_number,j) + if(j==atom_number) cycle + + distance_j = (r(1) - nucl_coord_transp(1,j) ) * (r(1) - nucl_coord_transp(1,j)) + distance_j += (r(2) - nucl_coord_transp(2,j) ) * (r(2) - nucl_coord_transp(2,j)) + distance_j += (r(3) - nucl_coord_transp(3,j) ) * (r(3) - nucl_coord_transp(3,j)) + distance_j = dsqrt(distance_j) + + mu_ij = (distance_i - distance_j) * nucl_dist_inv(atom_number,j) nu_ij = mu_ij + slater_bragg_type_inter_distance_ua(atom_number,j) * (1.d0 - mu_ij*mu_ij) + cell_function_becke *= step_function_becke(nu_ij) enddo + + return end diff --git a/src/non_h_ints_mu/j12_nucl_utils.irp.f b/src/non_h_ints_mu/j12_nucl_utils.irp.f index 079cb388..9b91a8ed 100644 --- a/src/non_h_ints_mu/j12_nucl_utils.irp.f +++ b/src/non_h_ints_mu/j12_nucl_utils.irp.f @@ -59,7 +59,7 @@ BEGIN_PROVIDER [ double precision, v_1b, (n_points_final_grid)] else - print*, 'j1b_type = ', j1b_pen, 'is not implemented for v_1b' + print*, 'j1b_type = ', j1b_type, 'is not implemented for v_1b' stop endif @@ -158,7 +158,7 @@ BEGIN_PROVIDER [double precision, v_1b_grad, (3, n_points_final_grid)] else - print*, 'j1b_type = ', j1b_pen, 'is not implemented' + print*, 'j1b_type = ', j1b_type, 'is not implemented' stop endif From 873d978348018e6e9774444c3532ffb45d323fb2 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 15 May 2023 13:06:06 +0200 Subject: [PATCH 109/337] Less multiplications in (T) --- src/ccsd/ccsd_t_space_orb_abc.irp.f | 55 +++++++++++++++-------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index 462d4adf..7c0ed929 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -36,10 +36,10 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !v_vvvo(b,a,d,i) * t2(k,j,c,d) & !X_vovv(d,i,b,a,i) * T_voov(d,j,c,k) - !$OMP DO collapse(3) - do i = 1, nO - do a = 1, nV - do b = 1, nV + !$OMP DO + do a = 1, nV + do b = 1, nV + do i = 1, nO do d = 1, nV X_vovv(d,i,b,a) = v_vvvo(b,a,d,i) enddo @@ -48,10 +48,10 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) enddo !$OMP END DO nowait - !$OMP DO collapse(3) - do j = 1, nO - do k = 1, nO - do c = 1, nV + !$OMP DO + do c = 1, nV + do j = 1, nO + do k = 1, nO do d = 1, nV T_voov(d,k,j,c) = t2(k,j,c,d) enddo @@ -63,7 +63,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !v_vooo(c,j,k,l) * t2(i,l,a,b) & !X_ooov(l,j,k,c) * T_oovv(l,i,a,b) & - !$OMP DO collapse(3) + !$OMP DO do c = 1, nV do k = 1, nO do j = 1, nO @@ -75,10 +75,10 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) enddo !$OMP END DO nowait - !$OMP DO collapse(3) - do i = 1, nO + !$OMP DO + do a = 1, nV do b = 1, nV - do a = 1, nV + do i = 1, nO do l = 1, nO T_oovv(l,i,a,b) = t2(i,l,a,b) enddo @@ -89,7 +89,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !X_oovv(j,k,b,c) * T1_vo(a,i) & - !$OMP DO collapse(3) + !$OMP DO do c = 1, nV do b = 1, nV do j = 1, nO @@ -122,18 +122,20 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) delta_abc = f_v(a) + f_v(b) + f_v(c) call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) - do i = 1, nO + do k = 1, nO do j = 1, nO - do k = 1, nO + do i = 1, nO delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) e = e + delta * ( & - (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k)) * (V_abc(i,j,k) - V_cba(i,j,k)) + & - (4d0 * W_acb(i,j,k) + W_cba(i,j,k) + W_bac(i,j,k)) * (V_acb(i,j,k) - V_bca(i,j,k)) + & - (4d0 * W_bac(i,j,k) + W_acb(i,j,k) + W_cba(i,j,k)) * (V_bac(i,j,k) - V_cab(i,j,k)) + & - (4d0 * W_bca(i,j,k) + W_cab(i,j,k) + W_abc(i,j,k)) * (V_bca(i,j,k) - V_acb(i,j,k)) + & - (4d0 * W_cba(i,j,k) + W_bac(i,j,k) + W_acb(i,j,k)) * (V_cba(i,j,k) - V_abc(i,j,k)) + & - (4d0 * W_cab(i,j,k) + W_abc(i,j,k) + W_bca(i,j,k)) * (V_cab(i,j,k) - V_bac(i,j,k)) + & - 0.d0) + (4d0 * (W_abc(i,j,k) - W_cba(i,j,k)) + & + W_bca(i,j,k) - W_bac(i,j,k) + & + W_cab(i,j,k) - W_acb(i,j,k) ) * (V_abc(i,j,k) - V_cba(i,j,k)) + & + (4d0 * (W_acb(i,j,k) - W_bca(i,j,k)) + & + W_cba(i,j,k) - W_cab(i,j,k) + & + W_bac(i,j,k) - W_abc(i,j,k) ) * (V_acb(i,j,k) - V_bca(i,j,k)) + & + (4d0 * (W_bac(i,j,k) - W_cab(i,j,k)) + & + W_acb(i,j,k) - W_abc(i,j,k) + & + W_cba(i,j,k) - W_bca(i,j,k) ) * (V_bac(i,j,k) - V_cab(i,j,k)) ) enddo enddo enddo @@ -146,15 +148,14 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) delta_abc = f_v(a) + f_v(b) + f_v(c) call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) - do i = 1, nO + do k = 1, nO do j = 1, nO - do k = 1, nO - delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) + do i = 1, nO + delta = 1.0d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) e = e + delta * ( & (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k)) * (V_abc(i,j,k) - V_cba(i,j,k)) + & (4d0 * W_acb(i,j,k) + W_cba(i,j,k) + W_bac(i,j,k)) * (V_acb(i,j,k) - V_bca(i,j,k)) + & - (4d0 * W_bac(i,j,k) + W_acb(i,j,k) + W_cba(i,j,k)) * (V_bac(i,j,k) - V_cab(i,j,k)) + & - 0.d0) + (4d0 * W_bac(i,j,k) + W_acb(i,j,k) + W_cba(i,j,k)) * (V_bac(i,j,k) - V_cab(i,j,k)) ) enddo enddo enddo From 738140547974f4e1ec9cac4cb25fa24edc963cc1 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 15 May 2023 19:37:34 +0200 Subject: [PATCH 110/337] Removed collapse in ccsd --- src/ccsd/ccsd_space_orb_sub.irp.f | 150 ++++++++++++++-------------- src/ccsd/ccsd_t_space_orb_abc.irp.f | 36 +++---- src/utils_cc/update_t.irp.f | 4 +- 3 files changed, 93 insertions(+), 97 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index acd14034..75752f5c 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -109,7 +109,7 @@ subroutine run_ccsd_space_orb call update_t1(nO,nV,cc_space_f_o,cc_space_f_v,r1,t1) call update_t2(nO,nV,cc_space_f_o,cc_space_f_v,r2,t2) else - print*,'Unkonw cc_method_method: '//cc_update_method + print*,'Unkown cc_method_method: '//cc_update_method endif call update_tau_space(nO,nV,t1,t2,tau) @@ -211,8 +211,8 @@ subroutine ccsd_energy_space(nO,nV,tau,t1,energy) !$omp default(none) e = 0d0 !$omp do - do i = 1, nO - do a = 1, nV + do a = 1, nV + do i = 1, nO e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a) enddo enddo @@ -255,7 +255,7 @@ subroutine update_tau_space(nO,nV,t1,t2,tau) !$OMP SHARED(nO,nV,tau,t2,t1) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) - !$OMP DO collapse(3) + !$OMP DO do b = 1, nV do a = 1, nV do j = 1, nO @@ -373,7 +373,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,X_voov,t2,t1) & !$omp private(u,beta,i,a) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do u = 1, nO do i = 1, nO @@ -412,7 +412,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,cc_space_v_ovov,cc_space_v_voov,X_ovov) & !$omp private(u,beta,i,a) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do u = 1, nO do a = 1, nv @@ -452,7 +452,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau) & !$omp private(b,beta,i,a) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do i = 1, nO do b = 1, nV @@ -464,11 +464,11 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) enddo !$omp end do nowait - !$omp do collapse(3) - do i = 1, nO - do b = 1, nV - do a = 1, nV - do u = 1, nO + !$omp do + do u = 1, nO + do i = 1, nO + do b = 1, nV + do a = 1, nV T_vvoo(a,b,i,u) = tau(i,u,a,b) enddo enddo @@ -504,8 +504,8 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,cc_space_v_vooo,W_oovo) & !$omp private(u,a,i,j) & !$omp default(none) - !$omp do collapse(3) do u = 1, nO + !$omp do do a = 1, nV do j = 1, nO do i = 1, nO @@ -513,8 +513,8 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel call dgemm('T','N', nO, nV, nO*nO*nV, & @@ -527,9 +527,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) max_r1 = 0d0 do a = 1, nV do i = 1, nO - if (dabs(r1(i,a)) > max_r1) then - max_r1 = dabs(r1(i,a)) - endif + max_r1 = max(dabs(r1(i,a)), max_r1) enddo enddo @@ -657,7 +655,7 @@ subroutine compute_H_vv(nO,nV,t1,t2,tau,H_vv) ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tau(i,j,beta,b) ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tmp_tau(b,i,j,beta) - !$omp do collapse(3) + !$omp do do beta = 1, nV do j = 1, nO do i = 1, nO @@ -727,7 +725,7 @@ subroutine compute_H_vo(nO,nV,t1,t2,H_vo) ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b) ! H_vo(a,i) = H_vo(a,i) + w(a,i,j,b) * t1(j,b) - !$omp do collapse(3) + !$omp do do b = 1, nV do j = 1, nO do i = 1, nO @@ -787,7 +785,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,cc_space_v_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -863,7 +861,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,t2,X_oovv) & !$omp private(u,v,gam,a) & !$omp default(none) - !$omp do collapse(3) + !$omp do do a = 1, nV do gam = 1, nV do v = 1, nO @@ -885,7 +883,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,Y_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -921,7 +919,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,X_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -957,7 +955,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,X_vovv,cc_space_v_ovvv) & !$omp private(u,a,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do u = 1, nO @@ -979,7 +977,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,Y_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1014,8 +1012,8 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,X_vovo,cc_space_v_ovov) & !$omp private(u,v,gam,i) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do gam = 1, nV do u = 1, nO do a = 1, nV @@ -1023,8 +1021,8 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel call dgemm('N','N',nV*nO*nV,nV,nO, & @@ -1041,7 +1039,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,X_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1079,7 +1077,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,X_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1116,8 +1114,8 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,X_vovo,cc_space_v_ovvo) & !$omp private(a,v,gam,i) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do gam = 1, nV do v = 1, nO do a = 1, nV @@ -1125,8 +1123,8 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel call dgemm('N','N',nO,nO*nV*nO,nV, & @@ -1143,7 +1141,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,X_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1182,19 +1180,19 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,X_ovvo,Y_voov,K1,J1,t2) & !$omp private(u,v,gam,beta,i,a) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do a = 1, nV do beta = 1, nV do u = 1, nO - X_ovvo(u,beta,a,i) = 0.5d0 * (2d0 * J1(u,a,beta,i) - K1(u,a,i,beta)) + X_ovvo(u,beta,a,i) = (J1(u,a,beta,i) - 0.5d0 * K1(u,a,i,beta)) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do nowait - !$omp do collapse(3) + !$omp do do gam = 1, nV do v = 1, nO do i = 1, nO @@ -1216,7 +1214,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,Z_ovov) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1252,7 +1250,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,K1,X_ovov,Y_ovov,t2) & !$omp private(u,a,i,beta,gam) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do u = 1, nO do a = 1, nV @@ -1264,7 +1262,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do nowait - !$omp do collapse(3) + !$omp do do gam = 1, nV do v = 1, nO do a = 1, nV @@ -1286,7 +1284,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,Z_ovov) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1319,7 +1317,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,K1,X_ovov,Z_ovov,t2) & !$omp private(u,v,gam,beta,i,a) & !$omp default(none) - !$omp do collapse(3) + !$omp do do a = 1, nV do i = 1, nO do gam = 1, nV @@ -1331,7 +1329,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do nowait - !$omp do collapse(3) + !$omp do do beta = 1, nV do v = 1, nO do a = 1, nV @@ -1353,7 +1351,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,Z_ovov) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1373,7 +1371,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2) & !$omp private(i,j,a,b) & !$omp default(none) - !$omp do collapse(3) + !$omp do do b = 1, nV do a = 1, nV do j = 1, nO @@ -1391,9 +1389,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) do a = 1, nV do j = 1, nO do i = 1, nO - if (dabs(r2(i,j,a,b)) > max_r2) then - max_r2 = dabs(r2(i,j,a,b)) - endif + max_r2 = max(r2(i,j,a,b), max_r2) enddo enddo enddo @@ -1448,7 +1444,7 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) !$omp shared(nO,nV,A1,cc_space_v_oooo,cc_space_v_ovoo,X_vooo) & !$omp private(u,v,i,j) & !$omp default(none) - !$omp do collapse(3) + !$omp do collapse(2) do j = 1, nO do i = 1, nO do v = 1, nO @@ -1462,7 +1458,7 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) ! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) & - !$omp do collapse(3) + !$omp do collapse(2) do j = 1, nO do i = 1, nO do u = 1, nO @@ -1484,7 +1480,7 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) !$omp shared(nO,nV,A1,Y_oooo) & !$omp private(u,v,i,j) & !$omp default(none) - !$omp do collapse(3) + !$omp do collapse(2) do j = 1, nO do i = 1, nO do v = 1, nO @@ -1553,7 +1549,7 @@ subroutine compute_B1(nO,nV,t1,t2,B1) !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo) & !$omp private(a,b,beta,gam) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do b = 1, nV @@ -1564,8 +1560,8 @@ subroutine compute_B1(nO,nV,t1,t2,B1) enddo enddo !$omp end do nowait - !$omp do collapse(3) do i = 1, nO + !$omp do do gam = 1, nV do b = 1, nV do a = 1, nV @@ -1573,8 +1569,8 @@ subroutine compute_B1(nO,nV,t1,t2,B1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel ! B1(a,b,beta,gam) -= cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & @@ -1594,7 +1590,7 @@ subroutine compute_B1(nO,nV,t1,t2,B1) !$omp shared(nV,B1,Y_vvvv) & !$omp private(a,b,beta,gam) & !$omp default(none) - !$omp do collapse(3) + !$omp do do gam = 1, nV do beta = 1, nV do b = 1, nV @@ -1658,7 +1654,7 @@ subroutine compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) enddo !$omp end do - !$omp do collapse(1) + !$omp do do i = 1, nO do j = 1, nO do a = 1, nV @@ -1720,7 +1716,7 @@ subroutine compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) enddo !$omp end do - !$omp do collapse(1) + !$omp do do beta = 1, nV do i = 1, nO do b = 1, nV @@ -1788,8 +1784,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp shared(nO,nV,J1,v_ovvo,v_ovoo,X_ovoo) & !$omp private(i,j,a,u,beta) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do beta = 1, nV do a = 1, nV do u = 1, nO @@ -1797,10 +1793,10 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do nowait - !$omp do collapse(3) + !$omp do collapse(2) do j = 1, nO do i = 1, nO do a = 1, nV @@ -1822,8 +1818,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp shared(nO,nV,J1,Y_ovov) & !$omp private(i,beta,a,u) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do beta = 1, nV do a = 1, nV do u = 1, nO @@ -1831,8 +1827,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel deallocate(X_ovoo) @@ -1849,7 +1845,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp shared(nO,nV,t2,t1,Y_ovov,X_voov,v_vvoo) & !$omp private(i,beta,a,u,b,j) & !$omp default(none) - !$omp do collapse(3) + !$omp do do b = 1, nV do j = 1, nO do beta = 1, nV @@ -1861,7 +1857,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo !$omp end do nowait - !$omp do collapse(3) + !$omp do do b = 1, nV do j = 1, nO do i = 1, nO @@ -1886,8 +1882,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp shared(nO,nV,J1,Z_ovvo,t2,Y_vovo,v_vvoo,X_ovvo) & !$omp private(i,beta,a,u,j,b) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do beta = 1, nV do a = 1, nV do u = 1, nO @@ -1895,12 +1891,12 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do nowait !+ 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) - !$omp do collapse(3) do j = 1, nO + !$omp do do b = 1, nV do i = 1, nO do a = 1, nV @@ -1908,11 +1904,11 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do nowait - !$omp do collapse(3) do j = 1, nO + !$omp do do b = 1, nV do beta = 1, nV do u = 1, nO @@ -1920,8 +1916,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel call dgemm('N','T',nO*nV,nV*nO,nV*nO, & @@ -1933,8 +1929,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp shared(nO,nV,J1,Z_ovvo) & !$omp private(i,beta,a,u) & !$omp default(none) - !$omp do collapse(3) do i = 1, nO + !$omp do do beta = 1, nV do a = 1, nV do u = 1, nO @@ -1942,8 +1938,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do !$omp end parallel deallocate(X_ovvo,Z_ovvo,Y_ovov) @@ -2003,7 +1999,7 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) !$omp shared(nO,nV,K1,X,Y,v_vvoo,v_ovov,t1,t2) & !$omp private(i,beta,a,u,j,b) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do i = 1, nO do a = 1, nV @@ -2015,8 +2011,8 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) enddo !$omp end do nowait - !$omp do collapse(3) do i = 1, nO + !$omp do do a = 1, nV do j = 1, nO do b = 1, nV @@ -2024,11 +2020,11 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) enddo enddo enddo + !$omp end do nowait enddo - !$omp end do nowait - !$omp do collapse(3) do j = 1, nO + !$omp do do b = 1, nV do beta = 1, nV do u = 1, nO @@ -2036,8 +2032,8 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) enddo enddo enddo + !$omp end do enddo - !$omp end do !$omp end parallel call dgemm('N','N',nO*nV*nO,nV,nO, & @@ -2060,7 +2056,7 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) !$omp shared(nO,nV,K1,Z) & !$omp private(i,beta,a,u) & !$omp default(none) - !$omp do collapse(3) + !$omp do do beta = 1, nV do i = 1, nO do a = 1, nV diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index 462d4adf..5cf27568 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -36,10 +36,10 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !v_vvvo(b,a,d,i) * t2(k,j,c,d) & !X_vovv(d,i,b,a,i) * T_voov(d,j,c,k) - !$OMP DO collapse(3) - do i = 1, nO - do a = 1, nV - do b = 1, nV + !$OMP DO + do a = 1, nV + do b = 1, nV + do i = 1, nO do d = 1, nV X_vovv(d,i,b,a) = v_vvvo(b,a,d,i) enddo @@ -48,10 +48,10 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) enddo !$OMP END DO nowait - !$OMP DO collapse(3) - do j = 1, nO - do k = 1, nO - do c = 1, nV + !$OMP DO + do c = 1, nV + do j = 1, nO + do k = 1, nO do d = 1, nV T_voov(d,k,j,c) = t2(k,j,c,d) enddo @@ -63,7 +63,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !v_vooo(c,j,k,l) * t2(i,l,a,b) & !X_ooov(l,j,k,c) * T_oovv(l,i,a,b) & - !$OMP DO collapse(3) + !$OMP DO do c = 1, nV do k = 1, nO do j = 1, nO @@ -75,10 +75,10 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) enddo !$OMP END DO nowait - !$OMP DO collapse(3) - do i = 1, nO - do b = 1, nV - do a = 1, nV + !$OMP DO + do b = 1, nV + do a = 1, nV + do i = 1, nO do l = 1, nO T_oovv(l,i,a,b) = t2(i,l,a,b) enddo @@ -89,11 +89,11 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !X_oovv(j,k,b,c) * T1_vo(a,i) & - !$OMP DO collapse(3) + !$OMP DO do c = 1, nV do b = 1, nV - do j = 1, nO - do k = 1, nO + do k = 1, nO + do j = 1, nO X_oovv(j,k,b,c) = v_vvoo(b,c,j,k) enddo enddo @@ -117,8 +117,8 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) e = 0d0 !$OMP DO SCHEDULE(dynamic) do a = 1, nV - do b = 1, a-1 - do c = 1, b-1 + do b = a+1, nV + do c = b+1, nV delta_abc = f_v(a) + f_v(b) + f_v(c) call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) diff --git a/src/utils_cc/update_t.irp.f b/src/utils_cc/update_t.irp.f index dbd4f4bd..0cf8626c 100644 --- a/src/utils_cc/update_t.irp.f +++ b/src/utils_cc/update_t.irp.f @@ -22,7 +22,7 @@ subroutine update_t1(nO,nV,f_o,f_v,r1,t1) !$OMP SHARED(nO,nV,t1,r1,cc_level_shift,f_o,f_v) & !$OMP PRIVATE(i,a) & !$OMP DEFAULT(NONE) - !$OMP DO collapse(1) + !$OMP DO do a = 1, nV do i = 1, nO t1(i,a) = t1(i,a) - r1(i,a) / (f_o(i) - f_v(a) - cc_level_shift) @@ -57,7 +57,7 @@ subroutine update_t2(nO,nV,f_o,f_v,r2,t2) !$OMP SHARED(nO,nV,t2,r2,cc_level_shift,f_o,f_v) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) - !$OMP DO collapse(3) + !$OMP DO do b = 1, nV do a = 1, nV do j = 1, nO From 5b427641a66047513227fc1ed9912f8784a17630 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 15 May 2023 19:46:06 +0200 Subject: [PATCH 111/337] Inlined multiply_poly --- src/ao_two_e_ints/two_e_integrals.irp.f | 232 +++++++++++++++++++++--- src/utils/integration.irp.f | 129 +++++++++++-- 2 files changed, 317 insertions(+), 44 deletions(-) diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f index 83fbadfd..4c3c6190 100644 --- a/src/ao_two_e_ints/two_e_integrals.irp.f +++ b/src/ao_two_e_ints/two_e_integrals.irp.f @@ -563,8 +563,20 @@ double precision function general_primitive_integral(dim, & d_poly(i)=0.d0 enddo - !DIR$ FORCEINLINE - call multiply_poly(Ix_pol,n_Ix,Iy_pol,n_Iy,d_poly,n_pt_tmp) +! call multiply_poly(Ix_pol,n_Ix,Iy_pol,n_Iy,d_poly,n_pt_tmp) + integer :: ib, ic + if (ior(n_Ix,n_Iy) >= 0) then + do ib=0,n_Ix + do ic = 0,n_Iy + d_poly(ib+ic) = d_poly(ib+ic) + Iy_pol(ic) * Ix_pol(ib) + enddo + enddo + + do n_pt_tmp = n_Ix+n_Iy, 0, -1 + if (d_poly(n_pt_tmp) /= 0.d0) exit + enddo + endif + if (n_pt_tmp == -1) then return endif @@ -573,8 +585,21 @@ double precision function general_primitive_integral(dim, & d1(i)=0.d0 enddo - !DIR$ FORCEINLINE - call multiply_poly(d_poly ,n_pt_tmp ,Iz_pol,n_Iz,d1,n_pt_out) +! call multiply_poly(d_poly ,n_pt_tmp ,Iz_pol,n_Iz,d1,n_pt_out) + if (ior(n_pt_tmp,n_Iz) >= 0) then + ! Bottleneck here + do ib=0,n_pt_tmp + do ic = 0,n_Iz + d1(ib+ic) = d1(ib+ic) + Iz_pol(ic) * d_poly(ib) + enddo + enddo + + do n_pt_out = n_pt_tmp+n_Iz, 0, -1 + if (d1(n_pt_out) /= 0.d0) exit + enddo + endif + + double precision :: rint_sum accu = accu + rint_sum(n_pt_out,const,d1) @@ -921,8 +946,20 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt X(ix) *= dble(a-1) enddo - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_10,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_10,2,d,nd) + if (nx >= 0) then + integer :: ib + do ib=0,nx + d(ib ) = d(ib ) + B_10(0) * X(ib) + d(ib+1) = d(ib+1) + B_10(1) * X(ib) + d(ib+2) = d(ib+2) + B_10(2) * X(ib) + enddo + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif nx = nd !DIR$ LOOP COUNT(8) @@ -943,8 +980,19 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt X(ix) *= c enddo endif - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_00,2,d,nd) + if (nx >= 0) then + do ib=0,nx + d(ib ) = d(ib ) + B_00(0) * X(ib) + d(ib+1) = d(ib+1) + B_00(1) * X(ib) + d(ib+2) = d(ib+2) + B_00(2) * X(ib) + enddo + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif endif ny=0 @@ -961,9 +1009,19 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt call I_x1_pol_mult_recurs(a-1,c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in) endif - !DIR$ FORCEINLINE - call multiply_poly(Y,ny,C_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(Y,ny,C_00,2,d,nd) + if (ny >= 0) then + do ib=0,ny + d(ib ) = d(ib ) + C_00(0) * Y(ib) + d(ib+1) = d(ib+1) + C_00(1) * Y(ib) + d(ib+2) = d(ib+2) + C_00(2) * Y(ib) + enddo + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif end recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) @@ -1001,8 +1059,20 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) enddo endif - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_00,2,d,nd) + if (nx >= 0) then + integer :: ib + do ib=0,nx + d(ib ) = d(ib ) + B_00(0) * X(ib) + d(ib+1) = d(ib+1) + B_00(1) * X(ib) + d(ib+2) = d(ib+2) + B_00(2) * X(ib) + enddo + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif ny=0 @@ -1012,8 +1082,19 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) enddo call I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in) - !DIR$ FORCEINLINE - call multiply_poly(Y,ny,C_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(Y,ny,C_00,2,d,nd) + if (ny >= 0) then + do ib=0,ny + d(ib ) = d(ib ) + C_00(0) * Y(ib) + d(ib+1) = d(ib+1) + C_00(1) * Y(ib) + d(ib+2) = d(ib+2) + C_00(2) * Y(ib) + enddo + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif end @@ -1040,8 +1121,20 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) nx = 0 call I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,X,nx,n_pt_in) - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_10,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_10,2,d,nd) + if (nx >= 0) then + integer :: ib + do ib=0,nx + d(ib ) = d(ib ) + B_10(0) * X(ib) + d(ib+1) = d(ib+1) + B_10(1) * X(ib) + d(ib+2) = d(ib+2) + B_10(2) * X(ib) + enddo + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif nx = nd !DIR$ LOOP COUNT(8) @@ -1059,8 +1152,19 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) enddo endif - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_00,2,d,nd) + if (nx >= 0) then + do ib=0,nx + d(ib ) = d(ib ) + B_00(0) * X(ib) + d(ib+1) = d(ib+1) + B_00(1) * X(ib) + d(ib+2) = d(ib+2) + B_00(2) * X(ib) + enddo + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif ny=0 !DIR$ LOOP COUNT(8) @@ -1070,9 +1174,19 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) !DIR$ FORCEINLINE call I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in) - !DIR$ FORCEINLINE - call multiply_poly(Y,ny,C_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(Y,ny,C_00,2,d,nd) + if (ny >= 0) then + do ib=0,ny + d(ib ) = d(ib ) + C_00(0) * Y(ib) + d(ib+1) = d(ib+1) + C_00(1) * Y(ib) + d(ib+2) = d(ib+2) + C_00(2) * Y(ib) + enddo + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif end recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) @@ -1119,8 +1233,21 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) Y(1) = D_00(1) Y(2) = D_00(2) - !DIR$ FORCEINLINE - call multiply_poly(Y,ny,D_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(Y,ny,D_00,2,d,nd) + if (ny >= 0) then + integer :: ib + do ib=0,ny + d(ib ) = d(ib ) + D_00(0) * Y(ib) + d(ib+1) = d(ib+1) + D_00(1) * Y(ib) + d(ib+2) = d(ib+2) + D_00(2) * Y(ib) + enddo + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif + return case default @@ -1137,8 +1264,19 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) X(ix) *= dble(c-1) enddo - !DIR$ FORCEINLINE - call multiply_poly(X,nx,B_01,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(X,nx,B_01,2,d,nd) + if (nx >= 0) then + do ib=0,nx + d(ib ) = d(ib ) + B_01(0) * X(ib) + d(ib+1) = d(ib+1) + B_01(1) * X(ib) + d(ib+2) = d(ib+2) + B_01(2) * X(ib) + enddo + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif ny = 0 !DIR$ LOOP COUNT(6) @@ -1147,8 +1285,19 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) enddo call I_x2_pol_mult(c-1,B_10,B_01,B_00,C_00,D_00,Y,ny,dim) - !DIR$ FORCEINLINE - call multiply_poly(Y,ny,D_00,2,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly(Y,ny,D_00,2,d,nd) + if (ny >= 0) then + do ib=0,ny + d(ib ) = d(ib ) + D_00(0) * Y(ib) + d(ib+1) = d(ib+1) + D_00(1) * Y(ib) + d(ib+2) = d(ib+2) + D_00(2) * Y(ib) + enddo + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + endif end select end @@ -1206,3 +1355,34 @@ subroutine compute_ao_integrals_jl(j,l,n_integrals,buffer_i,buffer_value) enddo end + + +subroutine multiply_poly_local(b,nb,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nb, nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:nb), c(0:nc) + double precision, intent(inout) :: d(0:nb+nc) + + integer :: ndtmp + integer :: ib, ic, id, k + if(ior(nc,nb) < 0) return !False if nc>=0 and nb>=0 + + do ib=0,nb + do ic = 0,nc + d(ib+ic) = d(ib+ic) + c(ic) * b(ib) + enddo + enddo + + do nd = nb+nc,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f index 15d79622..c8a36775 100644 --- a/src/utils/integration.irp.f +++ b/src/utils/integration.irp.f @@ -428,6 +428,112 @@ end subroutine +subroutine multiply_poly_0c(b,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:0), c(0:nc) + double precision, intent(inout) :: d(0:0+nc) + + integer :: ic + + do ic = 0,nc + d(ic) = d(ic) + c(ic) * b(0) + enddo + + do nd = nc,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + +subroutine multiply_poly_1c(b,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:1), c(0:nc) + double precision, intent(inout) :: d(0:1+nc) + + integer :: ic, id + if(nc < 0) return + + do ic = 0,nc + d( ic) = d( ic) + c(ic) * b(0) + d(1+ic) = d(1+ic) + c(ic) * b(1) + enddo + + do nd = nc+1,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + +subroutine multiply_poly_2c(b,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:2), c(0:nc) + double precision, intent(inout) :: d(0:2+nc) + + integer :: ic, id, k + if (nc <0) return + + do ic = 0,nc + d( ic) = d( ic) + c(ic) * b(0) + d(1+ic) = d(1+ic) + c(ic) * b(1) + d(2+ic) = d(2+ic) + c(ic) * b(2) + enddo + + do nd = nc+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + +subroutine multiply_poly_3c(b,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:3), c(0:nc) + double precision, intent(inout) :: d(0:3+nc) + + integer :: ic, id + if (nc <0) return + + do ic = 1,nc + d( ic) = d(1+ic) + c(ic) * b(0) + d(1+ic) = d(1+ic) + c(ic) * b(1) + d(2+ic) = d(1+ic) + c(ic) * b(2) + d(3+ic) = d(1+ic) + c(ic) * b(3) + enddo + + do nd = nc+3,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + subroutine multiply_poly(b,nb,c,nc,d,nd) @@ -444,29 +550,16 @@ subroutine multiply_poly(b,nb,c,nc,d,nd) integer :: ndtmp integer :: ib, ic, id, k - if(ior(nc,nb) >= 0) then ! True if nc>=0 and nb>=0 - continue - else - return - endif - ndtmp = nb+nc + if(ior(nc,nb) < 0) return !False if nc>=0 and nb>=0 - do ic = 0,nc - d(ic) = d(ic) + c(ic) * b(0) - enddo - - do ib=1,nb - d(ib) = d(ib) + c(0) * b(ib) - do ic = 1,nc + do ib=0,nb + do ic = 0,nc d(ib+ic) = d(ib+ic) + c(ic) * b(ib) enddo enddo - do nd = ndtmp,0,-1 - if (d(nd) == 0.d0) then - cycle - endif - exit + do nd = nb+nc,0,-1 + if (d(nd) /= 0.d0) exit enddo end From e3c0df574ee9bed8f2de3c21dc4506fd34fc7b7b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 16 May 2023 01:40:40 +0200 Subject: [PATCH 112/337] Implementing stochastic (T) --- src/ccsd/ccsd_space_orb_sub.irp.f | 4 +- src/ccsd/ccsd_t_space_orb_abc.irp.f | 153 ++++++++---- src/ccsd/ccsd_t_space_orb_stoch.irp.f | 320 ++++++++++++++++++++++++++ 3 files changed, 428 insertions(+), 49 deletions(-) create mode 100644 src/ccsd/ccsd_t_space_orb_stoch.irp.f diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 75752f5c..29ecca1c 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -169,7 +169,9 @@ subroutine run_ccsd_space_orb ! New print*,'Computing (T) correction...' call wall_time(ta) - call ccsd_par_t_space_v3(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & +! call ccsd_par_t_space_v3(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & +! ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) + call ccsd_par_t_space_stoch(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) call wall_time(tb) print*,'Time: ',tb-ta, ' s' diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index 70900738..294296bf 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -19,14 +19,13 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) integer :: i,j,k,l,a,b,c,d - double precision :: e,ta,tb, delta, delta_abc, x1, x2, x3 + double precision :: e,ta,tb + + call set_multiple_levels_omp(.False.) allocate(X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV), X_oovv(nO,nO,nV,nV)) allocate(T_voov(nV,nO,nO,nV),T_oovv(nO,nO,nV,nV)) - call set_multiple_levels_omp(.False.) - - ! Temporary arrays !$OMP PARALLEL & !$OMP SHARED(nO,nV,T_voov,T_oovv,X_vovv,X_ooov,X_oovv, & !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) & @@ -36,7 +35,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !v_vvvo(b,a,d,i) * t2(k,j,c,d) & !X_vovv(d,i,b,a,i) * T_voov(d,j,c,k) - !$OMP DO + !$OMP DO do a = 1, nV do b = 1, nV do i = 1, nO @@ -48,7 +47,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) enddo !$OMP END DO nowait - !$OMP DO + !$OMP DO do c = 1, nV do j = 1, nO do k = 1, nO @@ -63,7 +62,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !v_vooo(c,j,k,l) * t2(i,l,a,b) & !X_ooov(l,j,k,c) * T_oovv(l,i,a,b) & - !$OMP DO + !$OMP DO do c = 1, nV do k = 1, nO do j = 1, nO @@ -103,12 +102,13 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !$OMP END PARALLEL - energy = 0d0 + double precision, external :: ccsd_t_task_aba + double precision, external :: ccsd_t_task_abc + !$OMP PARALLEL & - !$OMP PRIVATE(a,b,c,x1) & + !$OMP PRIVATE(a,b,c,e) & !$OMP PRIVATE(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & !$OMP V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) & - !$OMP PRIVATE(i,j,k,e,delta,delta_abc) & !$OMP DEFAULT(SHARED) allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), & W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), & @@ -119,46 +119,18 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) do a = 1, nV do b = a+1, nV do c = b+1, nV - delta_abc = f_v(a) + f_v(b) + f_v(c) - call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) - call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) - do k = 1, nO - do j = 1, nO - do i = 1, nO - delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) - e = e + delta * ( & - (4d0 * (W_abc(i,j,k) - W_cba(i,j,k)) + & - W_bca(i,j,k) - W_bac(i,j,k) + & - W_cab(i,j,k) - W_acb(i,j,k) ) * (V_abc(i,j,k) - V_cba(i,j,k)) + & - (4d0 * (W_acb(i,j,k) - W_bca(i,j,k)) + & - W_cba(i,j,k) - W_cab(i,j,k) + & - W_bac(i,j,k) - W_abc(i,j,k) ) * (V_acb(i,j,k) - V_bca(i,j,k)) + & - (4d0 * (W_bac(i,j,k) - W_cab(i,j,k)) + & - W_acb(i,j,k) - W_abc(i,j,k) + & - W_cba(i,j,k) - W_bca(i,j,k) ) * (V_bac(i,j,k) - V_cab(i,j,k)) ) - enddo - enddo - enddo + e = e + ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov,V_abc, & + V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & + W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) enddo - enddo - c = a - do b = 1, nV - if (b == c) cycle - delta_abc = f_v(a) + f_v(b) + f_v(c) - call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) - call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) - do k = 1, nO - do j = 1, nO - do i = 1, nO - delta = 1.0d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) - e = e + delta * ( & - (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k)) * (V_abc(i,j,k) - V_cba(i,j,k)) + & - (4d0 * W_acb(i,j,k) + W_cba(i,j,k) + W_bac(i,j,k)) * (V_acb(i,j,k) - V_bca(i,j,k)) + & - (4d0 * W_bac(i,j,k) + W_acb(i,j,k) + W_cba(i,j,k)) * (V_bac(i,j,k) - V_cab(i,j,k)) ) - enddo - enddo - enddo + e = e + ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,V_abc, & + V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & + W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) + + e = e + ccsd_t_task_aba(b,a,nO,nV,t1,T_oovv,T_voov,V_abc, & + V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & + W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) enddo enddo !$OMP END DO NOWAIT @@ -178,6 +150,91 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) end +double precision function ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov,& + V_abc,V_acb,V_bac,V_bca,V_cab,V_cba, & + W_abc,W_acb,W_bac,W_bca,W_cab,W_cba, & + X_ooov,X_oovv,X_vovv,f_o,f_v) result(e) + implicit none + integer, intent(in) :: nO,nV,a,b,c + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: X_oovv(nO,nO,nV,nV) + double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) + double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) + double precision, intent(in) :: W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO) + double precision, intent(in) :: W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO) + double precision, intent(in) :: V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO) + double precision, intent(in) :: V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) + + double precision :: delta, delta_abc + integer :: i,j,k + + delta_abc = f_v(a) + f_v(b) + f_v(c) + e = 0.d0 + + call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) + + call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) + + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) + e = e + delta * ( & + (4d0 * (W_abc(i,j,k) - W_cba(i,j,k)) + & + W_bca(i,j,k) - W_bac(i,j,k) + & + W_cab(i,j,k) - W_acb(i,j,k) ) * (V_abc(i,j,k) - V_cba(i,j,k)) +& + (4d0 * (W_acb(i,j,k) - W_bca(i,j,k)) + & + W_cba(i,j,k) - W_cab(i,j,k) + & + W_bac(i,j,k) - W_abc(i,j,k) ) * (V_acb(i,j,k) - V_bca(i,j,k)) +& + (4d0 * (W_bac(i,j,k) - W_cab(i,j,k)) + & + W_acb(i,j,k) - W_abc(i,j,k) + & + W_cba(i,j,k) - W_bca(i,j,k) ) * (V_bac(i,j,k) - V_cab(i,j,k)) ) + enddo + enddo + enddo + +end + +double precision function ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,& + V_abc,V_acb,V_bac,V_bca,V_cab,V_cba, & + W_abc,W_acb,W_bac,W_bca,W_cab,W_cba, & + X_ooov,X_oovv,X_vovv,f_o,f_v) result(e) + implicit none + integer, intent(in) :: nO,nV,a,b + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: X_oovv(nO,nO,nV,nV) + double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) + double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) + double precision, intent(in) :: W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO) + double precision, intent(in) :: W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO) + double precision, intent(in) :: V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO) + double precision, intent(in) :: V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) + + double precision :: delta, delta_abc + integer :: i,j,k + + delta_abc = f_v(a) + f_v(b) + f_v(a) + e = 0.d0 + + call form_w_abc(nO,nV,a,b,a,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) + + call form_v_abc(nO,nV,a,b,a,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) + + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) + e = e + delta * ( & + (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k)) * (V_abc(i,j,k) - V_cba(i,j,k)) + & + (4d0 * W_acb(i,j,k) + W_cba(i,j,k) + W_bac(i,j,k)) * (V_acb(i,j,k) - V_bca(i,j,k)) + & + (4d0 * W_bac(i,j,k) + W_acb(i,j,k) + W_cba(i,j,k)) * (V_bac(i,j,k) - V_cab(i,j,k)) ) + + enddo + enddo + enddo + +end + subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) implicit none diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f new file mode 100644 index 00000000..e8fae5cd --- /dev/null +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -0,0 +1,320 @@ +! Main + +subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: t2(nO,nO,nV,nV) + double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) + double precision, intent(out) :: energy + + double precision, allocatable :: W(:,:,:,:,:,:) + double precision, allocatable :: V(:,:,:,:,:,:) + double precision, allocatable :: W_abc(:,:,:), W_cab(:,:,:), W_bca(:,:,:) + double precision, allocatable :: W_bac(:,:,:), W_cba(:,:,:), W_acb(:,:,:) + double precision, allocatable :: V_abc(:,:,:), V_cab(:,:,:), V_bca(:,:,:) + double precision, allocatable :: V_bac(:,:,:), V_cba(:,:,:), V_acb(:,:,:) + double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) + double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) + integer :: i,j,k,l,a,b,c,d + double precision :: e,ta,tb + + call set_multiple_levels_omp(.False.) + + allocate(X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV), X_oovv(nO,nO,nV,nV)) + allocate(T_voov(nV,nO,nO,nV),T_oovv(nO,nO,nV,nV)) + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,T_voov,T_oovv,X_vovv,X_ooov,X_oovv, & + !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) & + !$OMP PRIVATE(a,b,c,d,i,j,k,l) & + !$OMP DEFAULT(NONE) + + !v_vvvo(b,a,d,i) * t2(k,j,c,d) & + !X_vovv(d,i,b,a,i) * T_voov(d,j,c,k) + + !$OMP DO + do a = 1, nV + do b = 1, nV + do i = 1, nO + do d = 1, nV + X_vovv(d,i,b,a) = v_vvvo(b,a,d,i) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO + do c = 1, nV + do j = 1, nO + do k = 1, nO + do d = 1, nV + T_voov(d,k,j,c) = t2(k,j,c,d) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !v_vooo(c,j,k,l) * t2(i,l,a,b) & + !X_ooov(l,j,k,c) * T_oovv(l,i,a,b) & + + !$OMP DO + do c = 1, nV + do k = 1, nO + do j = 1, nO + do l = 1, nO + X_ooov(l,j,k,c) = v_vooo(c,j,k,l) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP DO + do b = 1, nV + do a = 1, nV + do i = 1, nO + do l = 1, nO + T_oovv(l,i,a,b) = t2(i,l,a,b) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !X_oovv(j,k,b,c) * T1_vo(a,i) & + + !$OMP DO + do c = 1, nV + do b = 1, nV + do k = 1, nO + do j = 1, nO + X_oovv(j,k,b,c) = v_vvoo(b,c,j,k) + enddo + enddo + enddo + enddo + !$OMP END DO nowait + + !$OMP END PARALLEL + + double precision, external :: ccsd_t_task_aba + double precision, external :: ccsd_t_task_abc + + double precision, allocatable :: memo(:), Pabc(:), waccu(:) + logical , allocatable :: computed(:) + integer*2 , allocatable :: abc(:,:) + integer*8 :: Nabc, i8 + integer*8, allocatable :: iorder(:) + double precision :: eocc + double precision :: Pabc_norm, sum_w + + + ! Prepare table of triplets (a,b,c) + + Nabc = (int(nV,8) * int(nV+1,8) * int(nV+2,8))/6_8 - nV + allocate (memo(Nabc), computed(Nabc), Pabc(Nabc), waccu(0:Nabc)) + allocate (abc(4,Nabc), iorder(Nabc)) + +! eocc = 3.d0/dble(nO) * sum(f_o(1:nO)) + memo(:) = 0.d0 + computed(:) = .False. + Nabc = 0_8 + do a = 1, nV + do b = a+1, nV + do c = b+1, nV + Nabc = Nabc + 1_8 +! Pabc(Nabc) = 1.d0/((f_v(a) + f_v(b) + f_v(c))*(f_v(a)*f_v(b)*f_v(c))**(1.d0/2.d0)) +! Pabc(Nabc) = 1.d0/((f_v(a) + f_v(b) + f_v(c))**2) + Pabc(Nabc) = 1.d0/(f_v(a) + f_v(b) + f_v(c)) + abc(1,Nabc) = a + abc(2,Nabc) = b + abc(3,Nabc) = c + enddo + + Nabc = Nabc + 1_8 + abc(1,Nabc) = a + abc(2,Nabc) = b + abc(3,Nabc) = a +! Pabc(Nabc) = 1.d0/((f_v(a) + f_v(b) + f_v(a))*(f_v(a)*f_v(b)*f_v(a))**(1.d0/2.d0)) +! Pabc(Nabc) = 1.d0/((f_v(a) + f_v(b) + f_v(a))**2) + Pabc(Nabc) = 1.d0/(2.d0*f_v(a) + f_v(b)) + + Nabc = Nabc + 1_8 + abc(1,Nabc) = b + abc(2,Nabc) = a + abc(3,Nabc) = b +! Pabc(Nabc) = 1.d0/((f_v(a) + f_v(b) + f_v(b))*(f_v(b)*f_v(a)*f_v(b))**(1.d0/2.d0)) +! Pabc(Nabc) = 1.d0/((f_v(a) + f_v(b) + f_v(b))**2) + Pabc(Nabc) = 1.d0/(f_v(a) + 2.d0*f_v(b)) + enddo + enddo + + do i8=1,Nabc + iorder(i8) = i8 + enddo + + ! Sort triplets in decreasing Pabc + call dsort_big(Pabc, iorder, Nabc) + + ! Normalize + Pabc_norm = 0.d0 + do i8=Nabc,1,-1 + Pabc_norm = Pabc_norm + Pabc(i8) + enddo + Pabc_norm = 1.d0/Pabc_norm + do i8=Nabc,1,-1 + Pabc(i8) = Pabc(i8) * Pabc_norm + enddo + + call i8set_order_big(abc, iorder, Nabc) + + + ! Cumulative distribution for sampling + waccu(Nabc) = 0.d0 + sum_w = 0.d0 + do i8=Nabc-1,1,-1 + waccu(i8) = waccu(i8+1) - Pabc(i8) + enddo + waccu(:) = waccu(:) + 1.d0 + waccu(0) = 0.d0 + + Pabc(:) = 1.d0/Pabc(:) * (1.d0/3.d0) + + logical :: converged + double precision :: ET, ET2, eta, variance, average, error, sample + integer*8 :: isample, ieta, Ncomputed + integer*8, external :: find_sample + + allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), & + W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), & + V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), & + V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) ) + + converged = .False. + ET = 0.d0 + ET2 = 0.d0 + Ncomputed = 0_8 + isample = 0_8 + + average = 0.d0 + variance = 0.d0 + double precision :: t00, t01 + call wall_time(t00) +! do ieta=1,Nabc + do while (.not.converged) + call random_number(eta) +! eta = eta/dble(1000) +! do k=0,1000-1 +! ieta = find_sample(eta+dble(k)/dble(1000),waccu,Nabc) + ieta = find_sample(eta,waccu,Nabc) + isample = isample+1_8 + + if (.not.computed(ieta)) then + a = abc(1,ieta) + b = abc(2,ieta) + c = abc(3,ieta) + if (a/=c) then + memo(ieta) = ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov,V_abc, & + V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & + W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) + else + memo(ieta) = ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,V_abc, & + V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & + W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) + endif + computed(ieta) = .True. + Ncomputed += 1_8 + call wall_time(t01) + if (t01-t00 > 1.d0) then + t00 = t01 + print *, average, dsqrt(variance/dble(isample)), real(Ncomputed)/real(Nabc), real(isample)/real(Nabc) + endif +! print *, memo(ieta), Pabc(ieta), memo(ieta) * Pabc(ieta) + endif + sample = memo(ieta) * Pabc(ieta) + ET = ET + sample + ET2 = ET2 + sample*sample + average = ET/dble(isample) + variance = ET2/dble(isample) - average*average + converged = (Ncomputed >= (Nabc*90_8)/100_8) .or. (isample>=1000*Nabc) +! enddo + enddo + print *, average, dsqrt(variance/dble(isample)), real(Ncomputed)/real(Nabc), real(isample)/real(Nabc) + energy = average + +! !$OMP PARALLEL & +! !$OMP PRIVATE(a,b,c,e) & +! !$OMP PRIVATE(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & +! !$OMP V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) & +! !$OMP DEFAULT(SHARED) +! allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), & +! W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), & +! V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), & +! V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) ) +! e = 0d0 +! !$OMP DO SCHEDULE(dynamic) +! do a = 1, nV +! do b = a+1, nV +! do c = b+1, nV +! e = e + ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov,V_abc, & +! V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & +! W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) +! enddo +! enddo +! +! do b = 1, nV +! if (b == a) cycle +! e = e + ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,V_abc, & +! V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & +! W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) +! enddo +! enddo +! !$OMP END DO NOWAIT +! +! !$OMP CRITICAL +! energy = energy + e +! !$OMP END CRITICAL +! +! deallocate(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & +! V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) +! +! !$OMP END PARALLEL + + deallocate(X_vovv,X_ooov,T_voov,T_oovv) +end + + +integer*8 function find_sample(v, w, n) + implicit none + BEGIN_DOC +! Finds sample v in weights w + END_DOC + integer*8, intent(in) :: n + double precision, intent(in) :: v, w(0:n) + integer*8 :: i,l,r + + l=0 + r=n + + do while(r-l > 1) + i = shiftr(r+l,1) + if(w(i) < v) then + l = i + else + r = i + end if + end do + i = r + do r=i+1,n + if (w(r) /= w(i)) then + exit + endif + enddo + find_sample = r-1 +end function + From 134b6d016301d41ca78dfed2443118616d849ec2 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 16 May 2023 01:43:32 +0200 Subject: [PATCH 113/337] Adding tasks --- src/ccsd/ccsd_t_space_orb_abc.irp.f | 153 +++++++++++++++++++--------- 1 file changed, 105 insertions(+), 48 deletions(-) diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index 70900738..294296bf 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -19,14 +19,13 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) integer :: i,j,k,l,a,b,c,d - double precision :: e,ta,tb, delta, delta_abc, x1, x2, x3 + double precision :: e,ta,tb + + call set_multiple_levels_omp(.False.) allocate(X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV), X_oovv(nO,nO,nV,nV)) allocate(T_voov(nV,nO,nO,nV),T_oovv(nO,nO,nV,nV)) - call set_multiple_levels_omp(.False.) - - ! Temporary arrays !$OMP PARALLEL & !$OMP SHARED(nO,nV,T_voov,T_oovv,X_vovv,X_ooov,X_oovv, & !$OMP t1,t2,v_vvvo,v_vooo,v_vvoo) & @@ -36,7 +35,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !v_vvvo(b,a,d,i) * t2(k,j,c,d) & !X_vovv(d,i,b,a,i) * T_voov(d,j,c,k) - !$OMP DO + !$OMP DO do a = 1, nV do b = 1, nV do i = 1, nO @@ -48,7 +47,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) enddo !$OMP END DO nowait - !$OMP DO + !$OMP DO do c = 1, nV do j = 1, nO do k = 1, nO @@ -63,7 +62,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !v_vooo(c,j,k,l) * t2(i,l,a,b) & !X_ooov(l,j,k,c) * T_oovv(l,i,a,b) & - !$OMP DO + !$OMP DO do c = 1, nV do k = 1, nO do j = 1, nO @@ -103,12 +102,13 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !$OMP END PARALLEL - energy = 0d0 + double precision, external :: ccsd_t_task_aba + double precision, external :: ccsd_t_task_abc + !$OMP PARALLEL & - !$OMP PRIVATE(a,b,c,x1) & + !$OMP PRIVATE(a,b,c,e) & !$OMP PRIVATE(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & !$OMP V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) & - !$OMP PRIVATE(i,j,k,e,delta,delta_abc) & !$OMP DEFAULT(SHARED) allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), & W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), & @@ -119,46 +119,18 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) do a = 1, nV do b = a+1, nV do c = b+1, nV - delta_abc = f_v(a) + f_v(b) + f_v(c) - call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) - call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) - do k = 1, nO - do j = 1, nO - do i = 1, nO - delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) - e = e + delta * ( & - (4d0 * (W_abc(i,j,k) - W_cba(i,j,k)) + & - W_bca(i,j,k) - W_bac(i,j,k) + & - W_cab(i,j,k) - W_acb(i,j,k) ) * (V_abc(i,j,k) - V_cba(i,j,k)) + & - (4d0 * (W_acb(i,j,k) - W_bca(i,j,k)) + & - W_cba(i,j,k) - W_cab(i,j,k) + & - W_bac(i,j,k) - W_abc(i,j,k) ) * (V_acb(i,j,k) - V_bca(i,j,k)) + & - (4d0 * (W_bac(i,j,k) - W_cab(i,j,k)) + & - W_acb(i,j,k) - W_abc(i,j,k) + & - W_cba(i,j,k) - W_bca(i,j,k) ) * (V_bac(i,j,k) - V_cab(i,j,k)) ) - enddo - enddo - enddo + e = e + ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov,V_abc, & + V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & + W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) enddo - enddo - c = a - do b = 1, nV - if (b == c) cycle - delta_abc = f_v(a) + f_v(b) + f_v(c) - call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) - call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) - do k = 1, nO - do j = 1, nO - do i = 1, nO - delta = 1.0d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) - e = e + delta * ( & - (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k)) * (V_abc(i,j,k) - V_cba(i,j,k)) + & - (4d0 * W_acb(i,j,k) + W_cba(i,j,k) + W_bac(i,j,k)) * (V_acb(i,j,k) - V_bca(i,j,k)) + & - (4d0 * W_bac(i,j,k) + W_acb(i,j,k) + W_cba(i,j,k)) * (V_bac(i,j,k) - V_cab(i,j,k)) ) - enddo - enddo - enddo + e = e + ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,V_abc, & + V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & + W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) + + e = e + ccsd_t_task_aba(b,a,nO,nV,t1,T_oovv,T_voov,V_abc, & + V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & + W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) enddo enddo !$OMP END DO NOWAIT @@ -178,6 +150,91 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) end +double precision function ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov,& + V_abc,V_acb,V_bac,V_bca,V_cab,V_cba, & + W_abc,W_acb,W_bac,W_bca,W_cab,W_cba, & + X_ooov,X_oovv,X_vovv,f_o,f_v) result(e) + implicit none + integer, intent(in) :: nO,nV,a,b,c + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: X_oovv(nO,nO,nV,nV) + double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) + double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) + double precision, intent(in) :: W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO) + double precision, intent(in) :: W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO) + double precision, intent(in) :: V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO) + double precision, intent(in) :: V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) + + double precision :: delta, delta_abc + integer :: i,j,k + + delta_abc = f_v(a) + f_v(b) + f_v(c) + e = 0.d0 + + call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) + + call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) + + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) + e = e + delta * ( & + (4d0 * (W_abc(i,j,k) - W_cba(i,j,k)) + & + W_bca(i,j,k) - W_bac(i,j,k) + & + W_cab(i,j,k) - W_acb(i,j,k) ) * (V_abc(i,j,k) - V_cba(i,j,k)) +& + (4d0 * (W_acb(i,j,k) - W_bca(i,j,k)) + & + W_cba(i,j,k) - W_cab(i,j,k) + & + W_bac(i,j,k) - W_abc(i,j,k) ) * (V_acb(i,j,k) - V_bca(i,j,k)) +& + (4d0 * (W_bac(i,j,k) - W_cab(i,j,k)) + & + W_acb(i,j,k) - W_abc(i,j,k) + & + W_cba(i,j,k) - W_bca(i,j,k) ) * (V_bac(i,j,k) - V_cab(i,j,k)) ) + enddo + enddo + enddo + +end + +double precision function ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,& + V_abc,V_acb,V_bac,V_bca,V_cab,V_cba, & + W_abc,W_acb,W_bac,W_bca,W_cab,W_cba, & + X_ooov,X_oovv,X_vovv,f_o,f_v) result(e) + implicit none + integer, intent(in) :: nO,nV,a,b + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: X_oovv(nO,nO,nV,nV) + double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) + double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) + double precision, intent(in) :: W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO) + double precision, intent(in) :: W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO) + double precision, intent(in) :: V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO) + double precision, intent(in) :: V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) + + double precision :: delta, delta_abc + integer :: i,j,k + + delta_abc = f_v(a) + f_v(b) + f_v(a) + e = 0.d0 + + call form_w_abc(nO,nV,a,b,a,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) + + call form_v_abc(nO,nV,a,b,a,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) + + do k = 1, nO + do j = 1, nO + do i = 1, nO + delta = 1.d0 / (f_o(i) + f_o(j) + f_o(k) - delta_abc) + e = e + delta * ( & + (4d0 * W_abc(i,j,k) + W_bca(i,j,k) + W_cab(i,j,k)) * (V_abc(i,j,k) - V_cba(i,j,k)) + & + (4d0 * W_acb(i,j,k) + W_cba(i,j,k) + W_bac(i,j,k)) * (V_acb(i,j,k) - V_bca(i,j,k)) + & + (4d0 * W_bac(i,j,k) + W_acb(i,j,k) + W_cba(i,j,k)) * (V_bac(i,j,k) - V_cab(i,j,k)) ) + + enddo + enddo + enddo + +end + subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) implicit none From de07f73ed9da98850002c459015ffc9e1868ed16 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 16 May 2023 18:32:15 +0200 Subject: [PATCH 114/337] Semi-stochastic (T) OK --- src/ccsd/ccsd_t_space_orb_abc.irp.f | 101 ++++---- src/ccsd/ccsd_t_space_orb_stoch.irp.f | 318 ++++++++++++++------------ 2 files changed, 224 insertions(+), 195 deletions(-) diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index 294296bf..1aab6bd7 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -10,12 +10,6 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) double precision, intent(out) :: energy - double precision, allocatable :: W(:,:,:,:,:,:) - double precision, allocatable :: V(:,:,:,:,:,:) - double precision, allocatable :: W_abc(:,:,:), W_cab(:,:,:), W_bca(:,:,:) - double precision, allocatable :: W_bac(:,:,:), W_cba(:,:,:), W_acb(:,:,:) - double precision, allocatable :: V_abc(:,:,:), V_cab(:,:,:), V_bca(:,:,:) - double precision, allocatable :: V_bac(:,:,:), V_cba(:,:,:), V_acb(:,:,:) double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) integer :: i,j,k,l,a,b,c,d @@ -105,32 +99,22 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) double precision, external :: ccsd_t_task_aba double precision, external :: ccsd_t_task_abc - !$OMP PARALLEL & - !$OMP PRIVATE(a,b,c,e) & - !$OMP PRIVATE(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & - !$OMP V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) & - !$OMP DEFAULT(SHARED) - allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), & - W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), & - V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), & - V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) ) + !$OMP PARALLEL PRIVATE(a,b,c,e) DEFAULT(SHARED) e = 0d0 !$OMP DO SCHEDULE(dynamic) do a = 1, nV do b = a+1, nV do c = b+1, nV - e = e + ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov,V_abc, & - V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & - W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) + e = e + ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) enddo - e = e + ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,V_abc, & - V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & - W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) + e = e + ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) + + e = e + ccsd_t_task_aba(b,a,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) - e = e + ccsd_t_task_aba(b,a,nO,nV,t1,T_oovv,T_voov,V_abc, & - V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & - W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) enddo enddo !$OMP END DO NOWAIT @@ -139,9 +123,6 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) energy = energy + e !$OMP END CRITICAL - deallocate(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & - V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) - !$OMP END PARALLEL energy = energy / 3.d0 @@ -151,30 +132,34 @@ end double precision function ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov,& - V_abc,V_acb,V_bac,V_bca,V_cab,V_cba, & - W_abc,W_acb,W_bac,W_bca,W_cab,W_cba, & X_ooov,X_oovv,X_vovv,f_o,f_v) result(e) implicit none - integer, intent(in) :: nO,nV,a,b,c - double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) - double precision, intent(in) :: X_oovv(nO,nO,nV,nV) - double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) - double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) - double precision, intent(in) :: W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO) - double precision, intent(in) :: W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO) - double precision, intent(in) :: V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO) - double precision, intent(in) :: V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) + integer, intent(in) :: nO,nV,a,b,c + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: X_oovv(nO,nO,nV,nV) + double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) + double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) double precision :: delta, delta_abc integer :: i,j,k - delta_abc = f_v(a) + f_v(b) + f_v(c) - e = 0.d0 + double precision, allocatable :: W_abc(:,:,:), W_cab(:,:,:), W_bca(:,:,:) + double precision, allocatable :: W_bac(:,:,:), W_cba(:,:,:), W_acb(:,:,:) + double precision, allocatable :: V_abc(:,:,:), V_cab(:,:,:), V_bca(:,:,:) + double precision, allocatable :: V_bac(:,:,:), V_cba(:,:,:), V_acb(:,:,:) + + allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), & + W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), & + V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), & + V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) ) call form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) call form_v_abc(nO,nV,a,b,c,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) + delta_abc = f_v(a) + f_v(b) + f_v(c) + e = 0.d0 + do k = 1, nO do j = 1, nO do i = 1, nO @@ -193,33 +178,40 @@ double precision function ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov,& enddo enddo + deallocate(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & + V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) + end double precision function ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,& - V_abc,V_acb,V_bac,V_bca,V_cab,V_cba, & - W_abc,W_acb,W_bac,W_bca,W_cab,W_cba, & X_ooov,X_oovv,X_vovv,f_o,f_v) result(e) implicit none - integer, intent(in) :: nO,nV,a,b - double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) - double precision, intent(in) :: X_oovv(nO,nO,nV,nV) - double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) - double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) - double precision, intent(in) :: W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO) - double precision, intent(in) :: W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO) - double precision, intent(in) :: V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO) - double precision, intent(in) :: V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) + integer, intent(in) :: nO,nV,a,b + double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) + double precision, intent(in) :: X_oovv(nO,nO,nV,nV) + double precision, intent(in) :: T_voov(nV,nO,nO,nV), T_oovv(nO,nO,nV,nV) + double precision, intent(in) :: X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV) double precision :: delta, delta_abc integer :: i,j,k - delta_abc = f_v(a) + f_v(b) + f_v(a) - e = 0.d0 + double precision, allocatable :: W_abc(:,:,:), W_cab(:,:,:), W_bca(:,:,:) + double precision, allocatable :: W_bac(:,:,:), W_cba(:,:,:), W_acb(:,:,:) + double precision, allocatable :: V_abc(:,:,:), V_cab(:,:,:), V_bca(:,:,:) + double precision, allocatable :: V_bac(:,:,:), V_cba(:,:,:), V_acb(:,:,:) + + allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), & + W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), & + V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), & + V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) ) call form_w_abc(nO,nV,a,b,a,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) call form_v_abc(nO,nV,a,b,a,t1,X_oovv,W_abc,V_abc,W_cba,V_cba,W_bca,V_bca,W_cab,V_cab,W_bac,V_bac,W_acb,V_acb) + delta_abc = f_v(a) + f_v(b) + f_v(a) + e = 0.d0 + do k = 1, nO do j = 1, nO do i = 1, nO @@ -233,6 +225,9 @@ double precision function ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,& enddo enddo + deallocate(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & + V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) + end subroutine form_w_abc(nO,nV,a,b,c,T_voov,T_oovv,X_vovv,X_ooov,W_abc,W_cba,W_bca,W_cab,W_bac,W_acb) diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f index e8fae5cd..0081e9e7 100644 --- a/src/ccsd/ccsd_t_space_orb_stoch.irp.f +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -1,5 +1,4 @@ ! Main - subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) implicit none @@ -10,12 +9,6 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) double precision, intent(out) :: energy - double precision, allocatable :: W(:,:,:,:,:,:) - double precision, allocatable :: V(:,:,:,:,:,:) - double precision, allocatable :: W_abc(:,:,:), W_cab(:,:,:), W_bca(:,:,:) - double precision, allocatable :: W_bac(:,:,:), W_cba(:,:,:), W_acb(:,:,:) - double precision, allocatable :: V_abc(:,:,:), V_cab(:,:,:), V_bca(:,:,:) - double precision, allocatable :: V_bac(:,:,:), V_cba(:,:,:), V_acb(:,:,:) double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) integer :: i,j,k,l,a,b,c,d @@ -104,33 +97,32 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ double precision, external :: ccsd_t_task_aba double precision, external :: ccsd_t_task_abc +! logical, external :: omp_test_lock double precision, allocatable :: memo(:), Pabc(:), waccu(:) - logical , allocatable :: computed(:) + integer*8, allocatable :: sampled(:) +! integer(omp_lock_kind), allocatable :: lock(:) integer*2 , allocatable :: abc(:,:) integer*8 :: Nabc, i8 integer*8, allocatable :: iorder(:) double precision :: eocc - double precision :: Pabc_norm, sum_w + double precision :: norm + integer :: kiter, isample ! Prepare table of triplets (a,b,c) Nabc = (int(nV,8) * int(nV+1,8) * int(nV+2,8))/6_8 - nV - allocate (memo(Nabc), computed(Nabc), Pabc(Nabc), waccu(0:Nabc)) - allocate (abc(4,Nabc), iorder(Nabc)) + allocate (memo(Nabc), sampled(Nabc), Pabc(Nabc), waccu(Nabc)) + allocate (abc(4,Nabc), iorder(Nabc)) !, lock(Nabc)) ! eocc = 3.d0/dble(nO) * sum(f_o(1:nO)) - memo(:) = 0.d0 - computed(:) = .False. Nabc = 0_8 do a = 1, nV do b = a+1, nV do c = b+1, nV Nabc = Nabc + 1_8 -! Pabc(Nabc) = 1.d0/((f_v(a) + f_v(b) + f_v(c))*(f_v(a)*f_v(b)*f_v(c))**(1.d0/2.d0)) -! Pabc(Nabc) = 1.d0/((f_v(a) + f_v(b) + f_v(c))**2) - Pabc(Nabc) = 1.d0/(f_v(a) + f_v(b) + f_v(c)) + Pabc(Nabc) = -1.d0/(f_v(a) + f_v(b) + f_v(c)) abc(1,Nabc) = a abc(2,Nabc) = b abc(3,Nabc) = c @@ -140,17 +132,13 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ abc(1,Nabc) = a abc(2,Nabc) = b abc(3,Nabc) = a -! Pabc(Nabc) = 1.d0/((f_v(a) + f_v(b) + f_v(a))*(f_v(a)*f_v(b)*f_v(a))**(1.d0/2.d0)) -! Pabc(Nabc) = 1.d0/((f_v(a) + f_v(b) + f_v(a))**2) - Pabc(Nabc) = 1.d0/(2.d0*f_v(a) + f_v(b)) + Pabc(Nabc) = -1.d0/(2.d0*f_v(a) + f_v(b)) Nabc = Nabc + 1_8 abc(1,Nabc) = b abc(2,Nabc) = a abc(3,Nabc) = b -! Pabc(Nabc) = 1.d0/((f_v(a) + f_v(b) + f_v(b))*(f_v(b)*f_v(a)*f_v(b))**(1.d0/2.d0)) -! Pabc(Nabc) = 1.d0/((f_v(a) + f_v(b) + f_v(b))**2) - Pabc(Nabc) = 1.d0/(f_v(a) + 2.d0*f_v(b)) + Pabc(Nabc) = -1.d0/(f_v(a) + 2.d0*f_v(b)) enddo enddo @@ -162,13 +150,13 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ call dsort_big(Pabc, iorder, Nabc) ! Normalize - Pabc_norm = 0.d0 + norm = 0.d0 do i8=Nabc,1,-1 - Pabc_norm = Pabc_norm + Pabc(i8) + norm = norm + Pabc(i8) enddo - Pabc_norm = 1.d0/Pabc_norm - do i8=Nabc,1,-1 - Pabc(i8) = Pabc(i8) * Pabc_norm + norm = 1.d0/norm + do i8=1,Nabc + Pabc(i8) = Pabc(i8) * norm enddo call i8set_order_big(abc, iorder, Nabc) @@ -176,145 +164,191 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ ! Cumulative distribution for sampling waccu(Nabc) = 0.d0 - sum_w = 0.d0 do i8=Nabc-1,1,-1 - waccu(i8) = waccu(i8+1) - Pabc(i8) + waccu(i8) = waccu(i8+1) - Pabc(i8+1) enddo waccu(:) = waccu(:) + 1.d0 - waccu(0) = 0.d0 - Pabc(:) = 1.d0/Pabc(:) * (1.d0/3.d0) + logical :: converged, do_comp + double precision :: eta, variance, error, sample + double precision :: t00, t01 + integer*8 :: ieta, Ncomputed + integer*8, external :: binary_search - logical :: converged - double precision :: ET, ET2, eta, variance, average, error, sample - integer*8 :: isample, ieta, Ncomputed - integer*8, external :: find_sample + integer :: nbuckets + nbuckets = 100 - allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), & - W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), & - V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), & - V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) ) + double precision, allocatable :: wsum(:) + allocate(wsum(nbuckets)) converged = .False. - ET = 0.d0 - ET2 = 0.d0 Ncomputed = 0_8 - isample = 0_8 - average = 0.d0 + energy = 0.d0 variance = 0.d0 - double precision :: t00, t01 - call wall_time(t00) -! do ieta=1,Nabc - do while (.not.converged) - call random_number(eta) -! eta = eta/dble(1000) -! do k=0,1000-1 -! ieta = find_sample(eta+dble(k)/dble(1000),waccu,Nabc) - ieta = find_sample(eta,waccu,Nabc) - isample = isample+1_8 + memo(:) = 0.d0 + sampled(:) = -1_8 - if (.not.computed(ieta)) then + integer*8 :: ileft, iright, imin + ileft = 1_8 + iright = Nabc + integer*8, allocatable :: bounds(:,:) + + allocate (bounds(2,nbuckets)) + do isample=1,nbuckets + eta = 1.d0/dble(nbuckets) * dble(isample) + ieta = binary_search(waccu,eta,Nabc,ileft,iright) + bounds(1,isample) = ileft + bounds(2,isample) = ieta + ileft = ieta+1 + wsum(isample) = sum( Pabc(bounds(1,isample):bounds(2,isample) ) ) + enddo + + Pabc(:) = 1.d0/Pabc(:) + + call wall_time(t00) + imin = 1_8 + !$OMP PARALLEL & + !$OMP PRIVATE(ieta,eta,a,b,c,kiter,isample) & + !$OMP DEFAULT(SHARED) + + do kiter=1,Nabc + + !$OMP MASTER + do while ((imin <= Nabc).and.(sampled(imin)>-1_8)) + imin = imin+1 + enddo + + ! Deterministic part + if (imin < Nabc) then + ieta=imin + sampled(ieta) = 0_8 a = abc(1,ieta) b = abc(2,ieta) c = abc(3,ieta) - if (a/=c) then - memo(ieta) = ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov,V_abc, & - V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & - W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) - else - memo(ieta) = ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,V_abc, & - V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & - W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) - endif - computed(ieta) = .True. Ncomputed += 1_8 - call wall_time(t01) - if (t01-t00 > 1.d0) then - t00 = t01 - print *, average, dsqrt(variance/dble(isample)), real(Ncomputed)/real(Nabc), real(isample)/real(Nabc) + !$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(a,b,c,ieta) + if (a/=c) then + memo(ieta) = ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0 + else + memo(ieta) = ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0 endif -! print *, memo(ieta), Pabc(ieta), memo(ieta) * Pabc(ieta) + !$OMP END TASK endif - sample = memo(ieta) * Pabc(ieta) - ET = ET + sample - ET2 = ET2 + sample*sample - average = ET/dble(isample) - variance = ET2/dble(isample) - average*average - converged = (Ncomputed >= (Nabc*90_8)/100_8) .or. (isample>=1000*Nabc) -! enddo - enddo - print *, average, dsqrt(variance/dble(isample)), real(Ncomputed)/real(Nabc), real(isample)/real(Nabc) - energy = average -! !$OMP PARALLEL & -! !$OMP PRIVATE(a,b,c,e) & -! !$OMP PRIVATE(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & -! !$OMP V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) & -! !$OMP DEFAULT(SHARED) -! allocate( W_abc(nO,nO,nO), W_cab(nO,nO,nO), W_bca(nO,nO,nO), & -! W_bac(nO,nO,nO), W_cba(nO,nO,nO), W_acb(nO,nO,nO), & -! V_abc(nO,nO,nO), V_cab(nO,nO,nO), V_bca(nO,nO,nO), & -! V_bac(nO,nO,nO), V_cba(nO,nO,nO), V_acb(nO,nO,nO) ) -! e = 0d0 -! !$OMP DO SCHEDULE(dynamic) -! do a = 1, nV -! do b = a+1, nV -! do c = b+1, nV -! e = e + ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov,V_abc, & -! V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & -! W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) -! enddo -! enddo -! -! do b = 1, nV -! if (b == a) cycle -! e = e + ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov,V_abc, & -! V_acb,V_bac,V_bca,V_cab,V_cba,W_abc,W_acb,W_bac, & -! W_bca,W_cab,W_cba,X_ooov,X_oovv,X_vovv,f_o,f_v) -! enddo -! enddo -! !$OMP END DO NOWAIT -! -! !$OMP CRITICAL -! energy = energy + e -! !$OMP END CRITICAL -! -! deallocate(W_abc, W_cab, W_bca, W_bac, W_cba, W_acb, & -! V_abc, V_cab, V_bca, V_bac, V_cba, V_acb ) -! -! !$OMP END PARALLEL + ! Stochastic part + call random_number(eta) + do isample=1,nbuckets + if (imin >= bounds(2,isample)) then + cycle + endif + ieta = binary_search(waccu,(eta + dble(isample-1))/dble(nbuckets),Nabc) + + if (sampled(ieta) == -1_8) then + sampled(ieta) = 0_8 + a = abc(1,ieta) + b = abc(2,ieta) + c = abc(3,ieta) + Ncomputed += 1_8 + !$OMP TASK DEFAULT(SHARED) FIRSTPRIVATE(a,b,c,ieta) + if (a/=c) then + memo(ieta) = ccsd_t_task_abc(a,b,c,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0 + else + memo(ieta) = ccsd_t_task_aba(a,b,nO,nV,t1,T_oovv,T_voov, & + X_ooov,X_oovv,X_vovv,f_o,f_v) / 3.d0 + endif + !$OMP END TASK + endif + sampled(ieta) = sampled(ieta)+1_8 + + enddo + + call wall_time(t01) + if (t01-t00 > 1.0d0) then + t00 = t01 + + !$OMP TASKWAIT + + double precision :: ET, ET2 + double precision :: energy_stoch, energy_det + double precision :: scale + double precision :: w + double precision :: tmp + energy_stoch = 0.d0 + energy_det = 0.d0 + norm = 0.d0 + scale = 1.d0 + ET = 0.d0 + ET2 = 0.d0 + + + do isample=1,nbuckets + if (imin >= bounds(2,isample)) then + energy_det = energy_det + sum(memo(bounds(1,isample):bounds(2,isample))) + scale = scale - wsum(isample) + else + exit + endif + enddo + + do ieta=bounds(1,isample), Nabc + w = dble(max(sampled(ieta),0_8)) + tmp = w * memo(ieta) * Pabc(ieta) + ET = ET + tmp + ET2 = ET2 + tmp * memo(ieta) * Pabc(ieta) + norm = norm + w + enddo + norm = norm/scale + if (norm > 0.d0) then + energy_stoch = ET / norm + variance = ET2 / norm - energy_stoch*energy_stoch + endif + + energy = energy_det + energy_stoch + + print *, real(energy), ' +/- ', real(sqrt(variance/(norm-1.d0))), isample, real(Ncomputed)/real(Nabc) + endif + !$OMP END MASTER + if (imin >= Nabc) exit + enddo + + !$OMP END PARALLEL deallocate(X_vovv,X_ooov,T_voov,T_oovv) end -integer*8 function find_sample(v, w, n) - implicit none - BEGIN_DOC -! Finds sample v in weights w - END_DOC - integer*8, intent(in) :: n - double precision, intent(in) :: v, w(0:n) - integer*8 :: i,l,r - l=0 - r=n +integer*8 function binary_search(arr, key, size) + implicit none + BEGIN_DOC +! Searches the key in array arr(1:size) between l_in and r_in, and returns its index + END_DOC + integer*8 :: size, i, j, mid, l_in, r_in + double precision, dimension(size) :: arr(1:size) + double precision :: key - do while(r-l > 1) - i = shiftr(r+l,1) - if(w(i) < v) then - l = i - else - r = i - end if - end do - i = r - do r=i+1,n - if (w(r) /= w(i)) then - exit - endif - enddo - find_sample = r-1 -end function + i = 1_8 + j = size + + do while (j >= i) + mid = i + (j - i) / 2 + if (arr(mid) >= key) then + if (mid > 1 .and. arr(mid - 1) < key) then + binary_search = mid + return + end if + j = mid - 1 + else if (arr(mid) < key) then + i = mid + 1 + else + binary_search = mid + 1 + return + end if + end do + binary_search = i +end function binary_search From ee790fa1d82e94724cbc34cb5c4c802ca001d2b4 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 16 May 2023 19:54:30 +0200 Subject: [PATCH 115/337] Formatted output in (T) --- src/ccsd/ccsd_space_orb_sub.irp.f | 289 +++++++++++++------------- src/ccsd/ccsd_t_space_orb_stoch.irp.f | 15 +- 2 files changed, 158 insertions(+), 146 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 29ecca1c..287d5b03 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -1,5 +1,5 @@ subroutine run_ccsd_space_orb - + implicit none integer :: i,j,k,l,a,b,c,d,tmp_a,tmp_b,tmp_c,tmp_d @@ -12,12 +12,12 @@ subroutine run_ccsd_space_orb double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:) double precision, allocatable :: t1(:,:), r1(:,:) double precision, allocatable :: H_oo(:,:), H_vv(:,:), H_vo(:,:) - + double precision, allocatable :: all_err(:,:), all_t(:,:) integer, allocatable :: list_occ(:), list_vir(:) integer(bit_kind) :: det(N_int,2) integer :: nO, nV, nOa, nOb, nVa, nVb, n_spin(4) - + PROVIDE mo_two_e_integrals_in_map det = psi_det(:,:,cc_ref) @@ -35,11 +35,11 @@ subroutine run_ccsd_space_orb if (cc_ref_is_open_shell) then call abort endif - + ! Number of occ/vir spatial orb nO = nOa nV = nVa - + allocate(list_occ(nO),list_vir(nV)) list_occ = cc_list_occ list_vir = cc_list_vir @@ -47,7 +47,7 @@ subroutine run_ccsd_space_orb !call extract_list_orb_space(det,nO,nV,list_occ,list_vir) !print*,'occ',list_occ !print*,'vir',list_vir - + allocate(t2(nO,nO,nV,nV), r2(nO,nO,nV,nV)) allocate(tau(nO,nO,nV,nV)) allocate(t1(nO,nV), r1(nO,nV)) @@ -76,7 +76,7 @@ subroutine run_ccsd_space_orb print*,'Det energy', uncorr_energy call ccsd_energy_space(nO,nV,tau,t1,energy) print*,'Guess energy', uncorr_energy+energy, energy - + nb_iter = 0 not_converged = .True. max_r1 = 0d0 @@ -86,9 +86,9 @@ subroutine run_ccsd_space_orb write(*,'(A77)') ' | It. | E(CCSD) (Ha) | Correlation (Ha) | Conv. T1 | Conv. T2 |' write(*,'(A77)') ' -----------------------------------------------------------------------------' call wall_time(ta) - + do while (not_converged) - + call compute_H_oo(nO,nV,t1,t2,tau,H_oo) call compute_H_vv(nO,nV,t1,t2,tau,H_vv) call compute_H_vo(nO,nV,t1,t2,H_vo) @@ -97,7 +97,7 @@ subroutine run_ccsd_space_orb call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) max_r = max(max_r1,max_r2) - + ! Update if (cc_update_method == 'diis') then !call update_t_ccsd(nO,nV,nb_iter,f_o,f_v,r1,r2,t1,t2,all_err1,all_err2,all_t1,all_t2) @@ -122,7 +122,7 @@ subroutine run_ccsd_space_orb if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then not_converged = .False. endif - + enddo write(*,'(A77)') ' -----------------------------------------------------------------------------' call wall_time(tb) @@ -141,18 +141,18 @@ subroutine run_ccsd_space_orb call write_t1(nO,nV,t1) call write_t2(nO,nV,t2) - + ! Deallocation if (cc_update_method == 'diis') then deallocate(all_err,all_t) endif deallocate(H_vv,H_oo,H_vo,r1,r2,tau) - + ! CCSD(T) double precision :: e_t - if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then + if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then ! Dumb way !call wall_time(ta) @@ -171,8 +171,11 @@ subroutine run_ccsd_space_orb call wall_time(ta) ! call ccsd_par_t_space_v3(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & ! ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) + + e_t = uncorr_energy + energy ! For print in next call call ccsd_par_t_space_stoch(nO,nV,t1,t2,cc_space_f_o,cc_space_f_v & ,cc_space_v_vvvo,cc_space_v_vvoo,cc_space_v_vooo,e_t) + call wall_time(tb) print*,'Time: ',tb-ta, ' s' @@ -182,7 +185,7 @@ subroutine run_ccsd_space_orb write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + e_t, ' Ha' print*,'' endif - + print*,'Reference determinant:' call print_det(det,N_int) @@ -234,7 +237,7 @@ subroutine ccsd_energy_space(nO,nV,tau,t1,energy) energy = energy + e !$omp end critical !$omp end parallel - + end ! Tau @@ -252,12 +255,12 @@ subroutine update_tau_space(nO,nV,t1,t2,tau) ! internal integer :: i,j,a,b - + !$OMP PARALLEL & !$OMP SHARED(nO,nV,tau,t2,t1) & !$OMP PRIVATE(i,j,a,b) & !$OMP DEFAULT(NONE) - !$OMP DO + !$OMP DO do b = 1, nV do a = 1, nV do j = 1, nO @@ -269,7 +272,7 @@ subroutine update_tau_space(nO,nV,t1,t2,tau) enddo !$OMP END DO !$OMP END PARALLEL - + end ! R1 @@ -285,7 +288,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) ! out double precision, intent(out) :: r1(nO,nV), max_r1 - + ! internal integer :: u,i,j,beta,a,b @@ -306,7 +309,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) ! cc_space_f_vo(a,i) * t1(i,beta) -> X1(nV,nV), O(nV*nV*nO) ! X1(a,beta) * t1(u,a) -> O(nO*nV*nV) ! cc_space_f_vo(a,i) * t1(u,a) -> X1(nO,nO), O(nO*nO*nV) - ! X1(i,u) * t1(i,beta) -> O(nO*nO*nV) + ! X1(i,u) * t1(i,beta) -> O(nO*nO*nV) !do beta = 1, nV ! do u = 1, nO ! do i = 1, nO @@ -326,7 +329,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) call dgemm('T','N', nO, nV, nO, & 1d0, X_oo, size(X_oo,2), & t1 , size(t1,1), & - 1d0, r1 , size(r1,1)) + 1d0, r1 , size(r1,1)) deallocate(X_oo) ! r1(u,beta) = r1(u,beta) + H_vv(a,beta) * t1(u,a) @@ -375,7 +378,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,X_voov,t2,t1) & !$omp private(u,beta,i,a) & !$omp default(none) - !$omp do + !$omp do do beta = 1, nV do u = 1, nO do i = 1, nO @@ -387,16 +390,16 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) enddo !$omp end do !$omp end parallel - + call dgemv('T', nV*nO, nO*nV, & 1d0, X_voov, size(X_voov,1) * size(X_voov,2), & H_vo , 1, & 1d0, r1 , 1) - + deallocate(X_voov) ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta)) * t1(i,a) - ! <=> + ! <=> ! r1(u,beta) = r1(u,beta) + X(i,a,u,beta) !do beta = 1, nV ! do u = 1, nO @@ -414,7 +417,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,cc_space_v_ovov,cc_space_v_voov,X_ovov) & !$omp private(u,beta,i,a) & !$omp default(none) - !$omp do + !$omp do do beta = 1, nV do u = 1, nO do a = 1, nv @@ -431,17 +434,17 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & t1 , 1, & 1d0, r1 , 1) - + deallocate(X_ovov) - ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) - ! r1(u,beta) = r1(u,beta) + W(a,b,i,beta) * T(u,a,b,i) + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) + ! r1(u,beta) = r1(u,beta) + W(a,b,i,beta) * T(u,a,b,i) !do beta = 1, nV ! do u = 1, nO ! do i = 1, nO ! do a = 1, nV ! do b = 1, nV - ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) + ! r1(u,beta) = r1(u,beta) + (2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta)) * tau(i,u,a,b) ! enddo ! enddo ! enddo @@ -454,24 +457,24 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau) & !$omp private(b,beta,i,a) & !$omp default(none) - !$omp do + !$omp do do beta = 1, nV do i = 1, nO do b = 1, nV do a = 1, nV - W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) + W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) enddo enddo enddo enddo !$omp end do nowait - !$omp do + !$omp do do u = 1, nO do i = 1, nO do b = 1, nV do a = 1, nV - T_vvoo(a,b,i,u) = tau(i,u,a,b) + T_vvoo(a,b,i,u) = tau(i,u,a,b) enddo enddo enddo @@ -483,17 +486,17 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) 1d0, T_vvoo, size(T_vvoo,1) * size(T_vvoo,2) * size(T_vvoo,3), & W_vvov, size(W_vvov,1) * size(W_vvov,2) * size(W_vvov,3), & 1d0, r1 , size(r1,1)) - + deallocate(W_vvov,T_vvoo) - ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) - ! r1(u,beta) = r1(u,beta) - W(i,j,a,u) * tau(i,j,a,beta) + ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) + ! r1(u,beta) = r1(u,beta) - W(i,j,a,u) * tau(i,j,a,beta) !do beta = 1, nV ! do u = 1, nO ! do i = 1, nO ! do j = 1, nO ! do a = 1, nV - ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) + ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) ! enddo ! enddo ! enddo @@ -507,7 +510,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp private(u,a,i,j) & !$omp default(none) do u = 1, nO - !$omp do + !$omp do do a = 1, nV do j = 1, nO do i = 1, nO @@ -523,7 +526,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) -1d0, W_oovo, size(W_oovo,1) * size(W_oovo,2) * size(W_oovo,3), & tau , size(tau,1) * size(tau,2) * size(tau,3), & 1d0, r1 , size(r1,1)) - + deallocate(W_oovo) max_r1 = 0d0 @@ -538,7 +541,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp shared(nO,nV,r1) & !$omp private(a,i) & !$omp default(none) - !$omp do + !$omp do do a = 1, nV do i = 1, nO r1(i,a) = -r1(i,a) @@ -546,7 +549,7 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) enddo !$omp end do !$omp end parallel - + end ! H_oo @@ -578,7 +581,7 @@ subroutine compute_H_oo(nO,nV,t1,t2,tau,H_oo) ! enddo ! enddo ! enddo - ! + ! ! enddo !enddo @@ -601,8 +604,8 @@ subroutine compute_H_oo(nO,nV,t1,t2,tau,H_oo) call dgemm('N','T', nO, nO, nO*nV*nV, & 1d0, tau , size(tau,1), & cc_space_w_oovv, size(cc_space_w_oovv,1), & - 1d0, H_oo , size(H_oo,1)) - + 1d0, H_oo , size(H_oo,1)) + end ! H_vv @@ -633,7 +636,7 @@ subroutine compute_H_vv(nO,nV,t1,t2,tau,H_vv) ! enddo ! enddo ! enddo - ! + ! ! enddo !enddo @@ -656,13 +659,13 @@ subroutine compute_H_vv(nO,nV,t1,t2,tau,H_vv) ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tau(i,j,beta,b) ! H_vv(a,beta) = H_vv(a,beta) - cc_space_w_vvoo(a,b,i,j) * tmp_tau(b,i,j,beta) - - !$omp do + + !$omp do do beta = 1, nV do j = 1, nO do i = 1, nO do b = 1, nV - tmp_tau(b,i,j,beta) = tau(i,j,beta,b) + tmp_tau(b,i,j,beta) = tau(i,j,beta,b) enddo enddo enddo @@ -676,7 +679,7 @@ subroutine compute_H_vv(nO,nV,t1,t2,tau,H_vv) 1d0, H_vv , size(H_vv,1)) deallocate(tmp_tau) - + end ! H_vo @@ -704,7 +707,7 @@ subroutine compute_H_vo(nO,nV,t1,t2,H_vo) ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b) ! enddo ! enddo - ! + ! ! enddo !enddo @@ -727,7 +730,7 @@ subroutine compute_H_vo(nO,nV,t1,t2,H_vo) ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b) ! H_vo(a,i) = H_vo(a,i) + w(a,i,j,b) * t1(j,b) - !$omp do + !$omp do do b = 1, nV do j = 1, nO do i = 1, nO @@ -746,7 +749,7 @@ subroutine compute_H_vo(nO,nV,t1,t2,H_vo) 1d0, H_vo, 1) deallocate(w) - + end ! R2 @@ -771,7 +774,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) allocate(g_occ(nO,nO), g_vir(nV,nV)) allocate(J1(nO,nV,nV,nO), K1(nO,nV,nO,nV)) allocate(A1(nO,nO,nO,nO)) - + call compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) call compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) call compute_A1(nO,nV,t1,t2,tau,A1) @@ -787,7 +790,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,cc_space_v_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -863,7 +866,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,t2,X_oovv) & !$omp private(u,v,gam,a) & !$omp default(none) - !$omp do + !$omp do do a = 1, nV do gam = 1, nV do v = 1, nO @@ -875,7 +878,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + call dgemm('N','N',nO*nO*nV,nV,nV, & 1d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), & g_vir, size(g_vir,1), & @@ -885,7 +888,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,Y_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -921,7 +924,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,X_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -957,7 +960,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,X_vovv,cc_space_v_ovvv) & !$omp private(u,a,gam,beta) & !$omp default(none) - !$omp do + !$omp do do gam = 1, nV do beta = 1, nV do u = 1, nO @@ -979,7 +982,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,Y_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -991,7 +994,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + !do gam = 1, nV ! do beta = 1, nV ! do v = 1, nO @@ -1009,13 +1012,13 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !enddo double precision, allocatable :: X_vovo(:,:,:,:), Y_vovv(:,:,:,:) allocate(X_vovo(nV,nO,nV,nO), Y_vovv(nV,nO,nV,nV),X_oovv(nO,nO,nV,nV)) - + !$omp parallel & !$omp shared(nO,nV,X_vovo,cc_space_v_ovov) & !$omp private(u,v,gam,i) & !$omp default(none) do i = 1, nO - !$omp do + !$omp do do gam = 1, nV do u = 1, nO do a = 1, nV @@ -1036,12 +1039,12 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) 1d0, t1, size(t1,1), & Y_vovv, size(Y_vovv,1), & 0d0, X_oovv, size(X_oovv,1)) - + !$omp parallel & !$omp shared(nO,nV,r2,X_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1055,7 +1058,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end parallel deallocate(X_vovo,Y_vovv) - + !do gam = 1, nV ! do beta = 1, nV ! do v = 1, nO @@ -1079,7 +1082,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,X_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1092,7 +1095,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end do !$omp end parallel - + !do gam = 1, nV ! do beta = 1, nV ! do v = 1, nO @@ -1111,13 +1114,13 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) double precision, allocatable :: Y_oovo(:,:,:,:) allocate(X_vovo(nV,nO,nV,nO), Y_oovo(nO,nO,nV,nO)) - + !$omp parallel & !$omp shared(nO,nV,X_vovo,cc_space_v_ovvo) & !$omp private(a,v,gam,i) & !$omp default(none) do i = 1, nO - !$omp do + !$omp do do gam = 1, nV do v = 1, nO do a = 1, nV @@ -1138,12 +1141,12 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) 1d0, Y_oovo, size(Y_oovo,1) * size(Y_oovo,2) * size(Y_oovo,3), & t1 , size(t1,1), & 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) - + !$omp parallel & !$omp shared(nO,nV,r2,X_oovv) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1155,7 +1158,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + deallocate(X_vovo,Y_oovo) !do gam = 1, nV @@ -1183,7 +1186,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp private(u,v,gam,beta,i,a) & !$omp default(none) do i = 1, nO - !$omp do + !$omp do do a = 1, nV do beta = 1, nV do u = 1, nO @@ -1194,7 +1197,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end do nowait enddo - !$omp do + !$omp do do gam = 1, nV do v = 1, nO do i = 1, nO @@ -1206,17 +1209,17 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + call dgemm('N','N', nO*nV,nO*nV,nV*nO, & 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & Y_voov, size(Y_voov,1) * size(Y_voov,2), & 0d0, Z_ovov, size(Z_ovov,1) * size(Z_ovov,2)) - + !$omp parallel & !$omp shared(nO,nV,r2,Z_ovov) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1228,9 +1231,9 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + deallocate(X_ovvo,Y_voov) - + !do gam = 1, nV ! do beta = 1, nV ! do v = 1, nO @@ -1252,7 +1255,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp shared(nO,nV,r2,K1,X_ovov,Y_ovov,t2) & !$omp private(u,a,i,beta,gam) & !$omp default(none) - !$omp do + !$omp do do beta = 1, nV do u = 1, nO do a = 1, nV @@ -1264,7 +1267,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do nowait - !$omp do + !$omp do do gam = 1, nV do v = 1, nO do a = 1, nV @@ -1281,12 +1284,12 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) - + !$omp parallel & !$omp shared(nO,nV,r2,Z_ovov) & !$omp private(u,v,gam,beta) & !$omp default(none) - !$omp do + !$omp do do gam = 1, nV do beta = 1, nV do v = 1, nO @@ -1298,7 +1301,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + !do gam = 1, nV ! do beta = 1, nV ! do v = 1, nO @@ -1343,12 +1346,12 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + call dgemm('N','N',nO*nV,nO*nV,nO*nV, & 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) - + !$omp parallel & !$omp shared(nO,nV,r2,Z_ovov) & !$omp private(u,v,gam,beta) & @@ -1367,7 +1370,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end parallel deallocate(X_ovov,Y_ovov,Z_ovov) - + ! Change the sign for consistency with the code in spin orbitals !$omp parallel & !$omp shared(nO,nV,r2) & @@ -1385,7 +1388,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel - + max_r2 = 0d0 do b = 1, nV do a = 1, nV @@ -1398,7 +1401,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo deallocate(g_occ,g_vir,J1,K1,A1) - + end ! A1 @@ -1427,12 +1430,12 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) ! A1(u,v,i,j) = A1(u,v,i,j) & ! + cc_space_v_ovoo(u,a,i,j) * t1(v,a) & ! + cc_space_v_vooo(a,v,i,j) * t1(u,a) - ! + ! ! do b = 1, nV ! A1(u,v,i,j) = A1(u,v,i,j) + cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b) - ! enddo + ! enddo ! enddo - ! + ! ! enddo ! enddo ! enddo @@ -1440,7 +1443,7 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) double precision, allocatable :: X_vooo(:,:,:,:), Y_oooo(:,:,:,:) allocate(X_vooo(nV,nO,nO,nO), Y_oooo(nO,nO,nO,nO)) - + ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) !$omp parallel & !$omp shared(nO,nV,A1,cc_space_v_oooo,cc_space_v_ovoo,X_vooo) & @@ -1494,7 +1497,7 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) enddo !$omp end do !$omp end parallel - + deallocate(X_vooo,Y_oooo) ! A1(u,v,i,j) += cc_space_v_vooo(a,v,i,j) * t1(u,a) @@ -1508,7 +1511,7 @@ subroutine compute_A1(nO,nV,t1,t2,tau,A1) 1d0, tau , size(tau,1) * size(tau,2), & cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), & 1d0, A1 , size(A1,1) * size(A1,2)) - + end ! B1 @@ -1530,28 +1533,28 @@ subroutine compute_B1(nO,nV,t1,t2,B1) ! do beta = 1, nV ! do b = 1, nV ! do a = 1, nV - ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) ! do i = 1, nO ! B1(a,b,beta,gam) = B1(a,b,beta,gam) & ! - cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & ! - cc_space_v_vvov(a,b,i,gam) * t1(i,beta) ! enddo - ! + ! ! enddo ! enddo ! enddo !enddo - + double precision, allocatable :: X_vvvo(:,:,:,:), Y_vvvv(:,:,:,:) allocate(X_vvvo(nV,nV,nV,nO), Y_vvvv(nV,nV,nV,nV)) - ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) !$omp parallel & !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo) & !$omp private(a,b,beta,gam) & !$omp default(none) - !$omp do + !$omp do do gam = 1, nV do beta = 1, nV do b = 1, nV @@ -1563,7 +1566,7 @@ subroutine compute_B1(nO,nV,t1,t2,B1) enddo !$omp end do nowait do i = 1, nO - !$omp do + !$omp do do gam = 1, nV do b = 1, nV do a = 1, nV @@ -1574,14 +1577,14 @@ subroutine compute_B1(nO,nV,t1,t2,B1) !$omp end do nowait enddo !$omp end parallel - + ! B1(a,b,beta,gam) -= cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & call dgemm('N','N', nV*nV*nV, nV, nO, & -1d0, cc_space_v_vvvo, size(cc_space_v_vvvo,1) * size(cc_space_v_vvvo,2) * size(cc_space_v_vvvo,3), & t1 , size(t1,1), & 1d0, B1 , size(B1,1) * size(B1,2) * size(B1,3)) - + ! B1(a,b,beta,gam) -= cc_space_v_vvov(a,b,i,gam) * t1(i,beta) call dgemm('N','N', nV*nV*nV, nV, nO, & -1d0, X_vvvo, size(X_vvvo,1) * size(X_vvvo,2) * size(X_vvvo,3), & @@ -1592,7 +1595,7 @@ subroutine compute_B1(nO,nV,t1,t2,B1) !$omp shared(nV,B1,Y_vvvv) & !$omp private(a,b,beta,gam) & !$omp default(none) - !$omp do + !$omp do do gam = 1, nV do beta = 1, nV do b = 1, nV @@ -1604,9 +1607,9 @@ subroutine compute_B1(nO,nV,t1,t2,B1) enddo !$omp end do !$omp end parallel - + deallocate(X_vvvo,Y_vvvv) - + end ! g_occ @@ -1627,14 +1630,14 @@ subroutine compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) !do i = 1, nO ! do u = 1, nO ! g_occ(u,i) = H_oo(u,i) - ! + ! ! do a = 1, nV ! g_occ(u,i) = g_occ(u,i) + cc_space_f_vo(a,i) * t1(u,a) - ! + ! ! do j = 1, nO ! g_occ(u,i) = g_occ(u,i) + (2d0 * cc_space_v_ovoo(u,a,i,j) - cc_space_v_ovoo(u,a,j,i)) * t1(j,a) ! enddo - ! + ! ! enddo ! enddo !enddo @@ -1655,8 +1658,8 @@ subroutine compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) enddo enddo !$omp end do - - !$omp do + + !$omp do do i = 1, nO do j = 1, nO do a = 1, nV @@ -1668,7 +1671,7 @@ subroutine compute_g_occ(nO,nV,t1,t2,H_oo,g_occ) enddo !$omp end do !$omp end parallel - + end ! g_vir @@ -1689,23 +1692,23 @@ subroutine compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) !do beta = 1, nV ! do a = 1, nV ! g_vir(a,beta) = H_vv(a,beta) - ! + ! ! do i = 1, nO ! g_vir(a,beta) = g_vir(a,beta) - cc_space_f_vo(a,i) * t1(i,beta) - ! + ! ! do b = 1, nV ! g_vir(a,beta) = g_vir(a,beta) + (2d0 * cc_space_v_vvvo(a,b,beta,i) - cc_space_v_vvvo(b,a,beta,i)) * t1(i,b) ! enddo - ! + ! ! enddo ! enddo !enddo - + call dgemm('N','N',nV,nV,nO, & -1d0, cc_space_f_vo , size(cc_space_f_vo,1), & t1 , size(t1,1), & 0d0, g_vir, size(g_vir,1)) - + !$omp parallel & !$omp shared(nO,nV,g_vir,H_vv, cc_space_v_vvvo,t1) & !$omp private(i,b,a,beta) & @@ -1718,7 +1721,7 @@ subroutine compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) enddo !$omp end do - !$omp do + !$omp do do beta = 1, nV do i = 1, nO do b = 1, nV @@ -1730,7 +1733,7 @@ subroutine compute_g_vir(nO,nV,t1,t2,H_vv,g_vir) enddo !$omp end do !$omp end parallel - + end ! J1 @@ -1763,7 +1766,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) ! do b = 1, nV ! J1(u,a,beta,i) = J1(u,a,beta,i) & - ! + cc_space_v_vvvo(b,a,beta,i) * t1(u,b) + ! + cc_space_v_vvvo(b,a,beta,i) * t1(u,b) ! enddo ! do j = 1, nO @@ -1773,7 +1776,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) ! + 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) ! enddo ! enddo - ! + ! ! enddo ! enddo ! enddo @@ -1781,13 +1784,13 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) double precision, allocatable :: X_ovoo(:,:,:,:), Y_ovov(:,:,:,:) allocate(X_ovoo(nO,nV,nO,nO),Y_ovov(nO,nV,nO,nV)) - + !$omp parallel & !$omp shared(nO,nV,J1,v_ovvo,v_ovoo,X_ovoo) & !$omp private(i,j,a,u,beta) & !$omp default(none) do i = 1, nO - !$omp do + !$omp do do beta = 1, nV do a = 1, nV do u = 1, nO @@ -1810,7 +1813,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo !$omp end do !$omp end parallel - + call dgemm('N','N',nO*nV*nO,nV,nO, & -1d0, X_ovoo, size(X_ovoo,1) * size(X_ovoo,2) * size(X_ovoo,3), & t1 , size(t1,1), & @@ -1821,7 +1824,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp private(i,beta,a,u) & !$omp default(none) do i = 1, nO - !$omp do + !$omp do do beta = 1, nV do a = 1, nV do u = 1, nO @@ -1847,7 +1850,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp shared(nO,nV,t2,t1,Y_ovov,X_voov,v_vvoo) & !$omp private(i,beta,a,u,b,j) & !$omp default(none) - !$omp do + !$omp do do b = 1, nV do j = 1, nO do beta = 1, nV @@ -1859,7 +1862,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo !$omp end do nowait - !$omp do + !$omp do do b = 1, nV do j = 1, nO do i = 1, nO @@ -1885,7 +1888,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp private(i,beta,a,u,j,b) & !$omp default(none) do i = 1, nO - !$omp do + !$omp do do beta = 1, nV do a = 1, nV do u = 1, nO @@ -1895,10 +1898,10 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo !$omp end do nowait enddo - + !+ 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) do j = 1, nO - !$omp do + !$omp do do b = 1, nV do i = 1, nO do a = 1, nV @@ -1908,9 +1911,9 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo !$omp end do nowait enddo - + do j = 1, nO - !$omp do + !$omp do do b = 1, nV do beta = 1, nV do u = 1, nO @@ -1921,7 +1924,7 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp end do nowait enddo !$omp end parallel - + call dgemm('N','T',nO*nV,nV*nO,nV*nO, & 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & Y_vovo, size(Y_vovo,1) * size(Y_vovo,2), & @@ -1944,8 +1947,8 @@ subroutine compute_J1(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) enddo !$omp end parallel - deallocate(X_ovvo,Z_ovvo,Y_ovov) - + deallocate(X_ovvo,Z_ovvo,Y_ovov) + end ! K1 @@ -1980,7 +1983,7 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) ! do b = 1, nV ! K1(u,a,i,beta) = K1(u,a,i,beta) & - ! + cc_space_v_vvov(b,a,i,beta) * t1(u,b) + ! + cc_space_v_vvov(b,a,i,beta) * t1(u,b) ! enddo ! do j = 1, nO @@ -1989,19 +1992,19 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) ! - cc_space_v_vvoo(b,a,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) ! enddo ! enddo - ! + ! ! enddo ! enddo ! enddo !enddo allocate(X(nV,nO,nV,nO),Y(nO,nV,nV,nO),Z(nO,nV,nV,nO)) - + !$omp parallel & !$omp shared(nO,nV,K1,X,Y,v_vvoo,v_ovov,t1,t2) & !$omp private(i,beta,a,u,j,b) & !$omp default(none) - !$omp do + !$omp do do beta = 1, nV do i = 1, nO do a = 1, nV @@ -2072,5 +2075,5 @@ subroutine compute_K1(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) !$omp end parallel deallocate(X,Y,Z) - + end diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f index 0081e9e7..049c57e8 100644 --- a/src/ccsd/ccsd_t_space_orb_stoch.irp.f +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -7,13 +7,14 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ double precision, intent(in) :: t1(nO,nV), f_o(nO), f_v(nV) double precision, intent(in) :: t2(nO,nO,nV,nV) double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO), v_vooo(nV,nO,nO,nO) - double precision, intent(out) :: energy + double precision, intent(inout) :: energy double precision, allocatable :: X_vovv(:,:,:,:), X_ooov(:,:,:,:), X_oovv(:,:,:,:) double precision, allocatable :: T_voov(:,:,:,:), T_oovv(:,:,:,:) integer :: i,j,k,l,a,b,c,d - double precision :: e,ta,tb + double precision :: e,ta,tb,eccsd + eccsd = energy call set_multiple_levels_omp(.False.) allocate(X_vovv(nV,nO,nV,nV), X_ooov(nO,nO,nO,nV), X_oovv(nO,nO,nV,nV)) @@ -206,6 +207,12 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ Pabc(:) = 1.d0/Pabc(:) + print '(A)', '' + print '(A)', ' +----------------------+--------------+----------+' + print '(A)', ' | E(CCSD(T)) | Error | % |' + print '(A)', ' +----------------------+--------------+----------+' + + call wall_time(t00) imin = 1_8 !$OMP PARALLEL & @@ -309,13 +316,15 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ energy = energy_det + energy_stoch - print *, real(energy), ' +/- ', real(sqrt(variance/(norm-1.d0))), isample, real(Ncomputed)/real(Nabc) + print '('' | '',F20.8, '' | '', E12.4,'' | '', F8.2,'' |'')', eccsd+energy, dsqrt(variance/(norm-1.d0)), 100.*real(Ncomputed)/real(Nabc) endif !$OMP END MASTER if (imin >= Nabc) exit enddo !$OMP END PARALLEL + print '(A)', ' +----------------------+--------------+----------+' + print '(A)', '' deallocate(X_vovv,X_ooov,T_voov,T_oovv) end From ae227aac33290e8689f8543d8698afda8af563a5 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Wed, 17 May 2023 01:21:22 +0200 Subject: [PATCH 116/337] todo: multiply F_TC by 0.5 --- src/tc_scf/rh_tcscf_diis.irp.f | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/tc_scf/rh_tcscf_diis.irp.f b/src/tc_scf/rh_tcscf_diis.irp.f index 5901911c..61355498 100644 --- a/src/tc_scf/rh_tcscf_diis.irp.f +++ b/src/tc_scf/rh_tcscf_diis.irp.f @@ -1,5 +1,10 @@ ! --- +! TODO +! level shift of SCF is well adapted +! for 0.5 x F +! + subroutine rh_tcscf_diis() implicit none From 46cbd80b9596a6e2c19e2db13ea800376c8cfb55 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 17 May 2023 10:44:32 +0200 Subject: [PATCH 117/337] Accelerated Cholesky --- src/ao_two_e_ints/EZFIO.cfg | 6 +++ src/ao_two_e_ints/cholesky.irp.f | 81 ++++++++++++++++--------------- src/ccsd/ccsd_space_orb_sub.irp.f | 10 ++-- 3 files changed, 51 insertions(+), 46 deletions(-) diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg index 4ab080ec..9f523fca 100644 --- a/src/ao_two_e_ints/EZFIO.cfg +++ b/src/ao_two_e_ints/EZFIO.cfg @@ -11,6 +11,12 @@ interface: ezfio,provider,ocaml default: 1.e-15 ezfio_name: threshold_ao +[ao_cholesky_threshold] +type: Threshold +doc: If | (ii|jj) | < `ao_cholesky_threshold` then (ii|jj) is zero +interface: ezfio,provider,ocaml +default: 1.e-12 + [do_direct_integrals] type: logical doc: Compute integrals on the fly (very slow, only for debugging) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index d4c201aa..3da827e1 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -4,29 +4,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num_guess ] ! Number of Cholesky vectors in AO basis END_DOC - integer :: i,j,k,l - double precision :: xnorm0, x, integral - double precision, external :: ao_two_e_integral - - cholesky_ao_num_guess = 0 - xnorm0 = 0.d0 - x = 0.d0 - do j=1,ao_num - do i=1,ao_num - integral = ao_two_e_integral(i,i,j,j) - if (integral > ao_integrals_threshold) then - cholesky_ao_num_guess += 1 - else - x += integral - endif - enddo - enddo - print *, 'Cholesky decomposition of AO integrals' - print *, '--------------------------------------' - print *, '' - print *, 'Estimated Error: ', x - print *, 'Guess size: ', cholesky_ao_num_guess, '(', 100.d0*dble(cholesky_ao_num_guess)/dble(ao_num*ao_num), ' %)' - + cholesky_ao_num_guess = ao_num*ao_num / 2 END_PROVIDER BEGIN_PROVIDER [ integer, cholesky_ao_num ] @@ -39,7 +17,7 @@ END_PROVIDER END_DOC type(c_ptr) :: ptr - integer :: fd, i,j,k,l, rank + integer :: fd, i,j,k,l,m,rank double precision, pointer :: ao_integrals(:,:,:,:) double precision, external :: ao_two_e_integral @@ -49,24 +27,49 @@ END_PROVIDER 8, fd, .False., ptr) call c_f_pointer(ptr, ao_integrals, (/ao_num, ao_num, ao_num, ao_num/)) - double precision :: integral + print*, 'Providing the AO integrals (Cholesky)' + call wall_time(wall_1) + call cpu_time(cpu_1) + + ao_integrals = 0.d0 + + double precision :: integral, cpu_1, cpu_2, wall_1, wall_2 logical, external :: ao_two_e_integral_zero - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j,k,l, integral) SCHEDULE(dynamic) - do l=1,ao_num - do j=1,l - do k=1,ao_num - do i=1,k - if (ao_two_e_integral_zero(i,j,k,l)) cycle - integral = ao_two_e_integral(i,k,j,l) - ao_integrals(i,k,j,l) = integral - ao_integrals(k,i,j,l) = integral - ao_integrals(i,k,l,j) = integral - ao_integrals(k,i,l,j) = integral - enddo + + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral, wall_2) + do m=0,9 + do l=1+m,ao_num,10 + !$OMP DO SCHEDULE(dynamic) + do j=1,l + do k=1,ao_num + do i=1,min(k,j) + if (ao_two_e_integral_zero(i,j,k,l)) cycle + integral = ao_two_e_integral(i,k,j,l) + ao_integrals(i,k,j,l) = integral + ao_integrals(k,i,j,l) = integral + ao_integrals(i,k,l,j) = integral + ao_integrals(k,i,l,j) = integral + ao_integrals(j,l,i,k) = integral + ao_integrals(j,l,k,i) = integral + ao_integrals(l,j,i,k) = integral + ao_integrals(l,j,k,i) = integral + enddo + enddo + enddo + !$OMP END DO NOWAIT enddo - enddo + !$OMP MASTER + call wall_time(wall_2) + print '(F10.2,'' % in'', 4X, I10, '' s.'')', (m+1) * 10, wall_2-wall_1 + !$OMP END MASTER enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL + + call wall_time(wall_2) + call cpu_time(cpu_2) + print*, 'AO integrals provided:' + print*, ' cpu time :',cpu_2 - cpu_1, 's' + print*, ' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1+tiny(1.d0)), ' )' ! Call Lapack cholesky_ao_num = cholesky_ao_num_guess diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 287d5b03..2e0ccd8f 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -16,7 +16,7 @@ subroutine run_ccsd_space_orb double precision, allocatable :: all_err(:,:), all_t(:,:) integer, allocatable :: list_occ(:), list_vir(:) integer(bit_kind) :: det(N_int,2) - integer :: nO, nV, nOa, nOb, nVa, nVb, n_spin(4) + integer :: nO, nV, nOa, nVa PROVIDE mo_two_e_integrals_in_map @@ -24,12 +24,8 @@ subroutine run_ccsd_space_orb print*,'Reference determinant:' call print_det(det,N_int) - ! Extract number of occ/vir alpha/beta spin orbitals - !call extract_n_spin(det,n_spin) - nOa = cc_nOa !n_spin(1) - nOb = cc_nOb !n_spin(2) - nVa = cc_nVa !n_spin(3) - nVb = cc_nVb !n_spin(4) + nOa = cc_nOa + nVa = cc_nVa ! Check that the reference is a closed shell determinant if (cc_ref_is_open_shell) then From a8948d091667801acb7800a7510e45b136b59fd3 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 17 May 2023 16:55:29 +0200 Subject: [PATCH 118/337] cholesky in big_array --- src/ao_two_e_ints/cholesky.irp.f | 97 +++++++---- src/ccsd/ccsd_space_orb_sub.irp.f | 2 +- src/ccsd/ccsd_t_space_orb_stoch.irp.f | 2 +- src/mo_two_e_ints/cholesky.irp.f | 30 ++++ src/mo_two_e_ints/integrals_3_index.irp.f | 70 ++++++-- src/mo_two_e_ints/mo_bi_integrals.irp.f | 27 ++- src/utils_cc/energy.irp.f | 5 +- src/utils_cc/mo_integrals_cc.irp.f | 197 +++++++++++----------- 8 files changed, 281 insertions(+), 149 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 3da827e1..bb81b141 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -35,45 +35,82 @@ END_PROVIDER double precision :: integral, cpu_1, cpu_2, wall_1, wall_2 logical, external :: ao_two_e_integral_zero + double precision, external :: get_ao_two_e_integral - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral, wall_2) - do m=0,9 - do l=1+m,ao_num,10 - !$OMP DO SCHEDULE(dynamic) - do j=1,l - do k=1,ao_num - do i=1,min(k,j) - if (ao_two_e_integral_zero(i,j,k,l)) cycle - integral = ao_two_e_integral(i,k,j,l) - ao_integrals(i,k,j,l) = integral - ao_integrals(k,i,j,l) = integral - ao_integrals(i,k,l,j) = integral - ao_integrals(k,i,l,j) = integral - ao_integrals(j,l,i,k) = integral - ao_integrals(j,l,k,i) = integral - ao_integrals(l,j,i,k) = integral - ao_integrals(l,j,k,i) = integral + if (read_ao_two_e_integrals) then + PROVIDE ao_two_e_integrals_in_map + + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral, wall_2) + do m=0,9 + do l=1+m,ao_num,10 + !$OMP DO SCHEDULE(dynamic) + do j=1,l + do k=1,ao_num + do i=1,min(k,j) + if (ao_two_e_integral_zero(i,j,k,l)) cycle + integral = get_ao_two_e_integral(i,j,k,l, ao_integrals_map) + ao_integrals(i,k,j,l) = integral + ao_integrals(k,i,j,l) = integral + ao_integrals(i,k,l,j) = integral + ao_integrals(k,i,l,j) = integral + ao_integrals(j,l,i,k) = integral + ao_integrals(j,l,k,i) = integral + ao_integrals(l,j,i,k) = integral + ao_integrals(l,j,k,i) = integral + enddo enddo enddo + !$OMP END DO NOWAIT enddo - !$OMP END DO NOWAIT + !$OMP MASTER + call wall_time(wall_2) + print '(I10,'' % in'', 4X, F10.2, '' s.'')', (m+1) * 10, wall_2-wall_1 + !$OMP END MASTER enddo - !$OMP MASTER - call wall_time(wall_2) - print '(F10.2,'' % in'', 4X, I10, '' s.'')', (m+1) * 10, wall_2-wall_1 - !$OMP END MASTER - enddo - !$OMP END PARALLEL + !$OMP END PARALLEL - call wall_time(wall_2) - call cpu_time(cpu_2) - print*, 'AO integrals provided:' - print*, ' cpu time :',cpu_2 - cpu_1, 's' - print*, ' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1+tiny(1.d0)), ' )' + else + + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral, wall_2) + do m=0,9 + do l=1+m,ao_num,10 + !$OMP DO SCHEDULE(dynamic) + do j=1,l + do k=1,ao_num + do i=1,min(k,j) + if (ao_two_e_integral_zero(i,j,k,l)) cycle + integral = ao_two_e_integral(i,k,j,l) + ao_integrals(i,k,j,l) = integral + ao_integrals(k,i,j,l) = integral + ao_integrals(i,k,l,j) = integral + ao_integrals(k,i,l,j) = integral + ao_integrals(j,l,i,k) = integral + ao_integrals(j,l,k,i) = integral + ao_integrals(l,j,i,k) = integral + ao_integrals(l,j,k,i) = integral + enddo + enddo + enddo + !$OMP END DO NOWAIT + enddo + !$OMP MASTER + call wall_time(wall_2) + print '(I10,'' % in'', 4X, F10.2, '' s.'')', (m+1) * 10, wall_2-wall_1 + !$OMP END MASTER + enddo + !$OMP END PARALLEL + + call wall_time(wall_2) + call cpu_time(cpu_2) + print*, 'AO integrals provided:' + print*, ' cpu time :',cpu_2 - cpu_1, 's' + print*, ' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1+tiny(1.d0)), ' )' + + endif ! Call Lapack cholesky_ao_num = cholesky_ao_num_guess - call pivoted_cholesky(ao_integrals, cholesky_ao_num, ao_integrals_threshold, ao_num*ao_num, cholesky_ao) + call pivoted_cholesky(ao_integrals, cholesky_ao_num, ao_cholesky_threshold, ao_num*ao_num, cholesky_ao) print *, 'Rank: ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' ! Remove mmap diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 2e0ccd8f..256117d6 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -18,7 +18,7 @@ subroutine run_ccsd_space_orb integer(bit_kind) :: det(N_int,2) integer :: nO, nV, nOa, nVa - PROVIDE mo_two_e_integrals_in_map +! PROVIDE mo_two_e_integrals_in_map det = psi_det(:,:,cc_ref) print*,'Reference determinant:' diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f index 049c57e8..1f3bebc2 100644 --- a/src/ccsd/ccsd_t_space_orb_stoch.irp.f +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -274,7 +274,7 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ enddo call wall_time(t01) - if (t01-t00 > 1.0d0) then + if ((t01-t00 > 1.0d0).or.(imin >= Nabc)) then t00 = t01 !$OMP TASKWAIT diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 14d3c696..b5b39b3b 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -6,11 +6,41 @@ BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_ao_num integer :: k + print *, 'AO->MO Transformation of Cholesky vectors' !$OMP PARALLEL DO PRIVATE(k) do k=1,cholesky_ao_num call ao_to_mo(cholesky_ao(1,1,k),ao_num,cholesky_mo(1,1,k),mo_num) enddo !$OMP END PARALLEL DO + print *, '' + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_ao_num, mo_num, mo_num) ] + implicit none + BEGIN_DOC + ! Cholesky vectors in MO basis + END_DOC + + integer :: i,j,k + double precision, allocatable :: buffer(:,:) + + print *, 'AO->MO Transformation of Cholesky vectors .' + !$OMP PARALLEL PRIVATE(i,j,k,buffer) + allocate(buffer(mo_num,mo_num)) + !$OMP DO SCHEDULE(static) + do k=1,cholesky_ao_num + call ao_to_mo(cholesky_ao(1,1,k),ao_num,buffer,mo_num) + do j=1,mo_num + do i=1,mo_num + cholesky_mo_transp(k,i,j) = buffer(i,j) + enddo + enddo + enddo + !$OMP END DO + deallocate(buffer) + !$OMP END PARALLEL + print *, '' END_PROVIDER diff --git a/src/mo_two_e_ints/integrals_3_index.irp.f b/src/mo_two_e_ints/integrals_3_index.irp.f index 4ffb0134..d807f619 100644 --- a/src/mo_two_e_ints/integrals_3_index.irp.f +++ b/src/mo_two_e_ints/integrals_3_index.irp.f @@ -4,24 +4,68 @@ BEGIN_DOC ! big_array_coulomb_integrals(j,i,k) = = (ik|jj) ! - ! big_array_exchange_integrals(i,j,k) = = (ij|kj) + ! big_array_exchange_integrals(j,i,k) = = (ij|kj) END_DOC - integer :: i,j,k,l + integer :: i,j,k,l,a double precision :: get_two_e_integral double precision :: integral - do k = 1, mo_num - do i = 1, mo_num - do j = 1, mo_num - l = j - integral = get_two_e_integral(i,j,k,l,mo_integrals_map) - big_array_coulomb_integrals(j,i,k) = integral - l = j - integral = get_two_e_integral(i,j,l,k,mo_integrals_map) - big_array_exchange_integrals(j,i,k) = integral + if (do_ao_cholesky) then + + double precision, allocatable :: buffer_jj(:,:), buffer(:,:,:) + allocate(buffer_jj(cholesky_ao_num,mo_num), buffer(mo_num,mo_num,mo_num)) + do j=1,mo_num + buffer_jj(:,j) = cholesky_mo_transp(:,j,j) + enddo + + call dgemm('T','N', mo_num*mo_num,mo_num,cholesky_ao_num, 1.d0, & + cholesky_mo_transp, cholesky_ao_num, & + buffer_jj, cholesky_ao_num, 0.d0, & + buffer, mo_num*mo_num) + + do k = 1, mo_num + do i = 1, mo_num + do j = 1, mo_num + big_array_coulomb_integrals(j,i,k) = buffer(i,k,j) + enddo + enddo + enddo + deallocate(buffer_jj) + + allocate(buffer_jj(mo_num,mo_num)) + + do j = 1, mo_num + + call dgemm('T','N',mo_num,mo_num,cholesky_ao_num, 1.d0, & + cholesky_mo_transp(1,1,j), cholesky_ao_num, & + cholesky_mo_transp(1,1,j), cholesky_ao_num, 0.d0, & + buffer_jj, mo_num) + + do k=1,mo_num + do i=1,mo_num + big_array_exchange_integrals(j,i,k) = buffer_jj(i,k) + enddo + enddo + enddo + + deallocate(buffer_jj) + + else + + do k = 1, mo_num + do i = 1, mo_num + do j = 1, mo_num + l = j + integral = get_two_e_integral(i,j,k,l,mo_integrals_map) + big_array_coulomb_integrals(j,i,k) = integral + l = j + integral = get_two_e_integral(i,j,l,k,mo_integrals_map) + big_array_exchange_integrals(j,i,k) = integral + enddo + enddo enddo - enddo - enddo + + endif END_PROVIDER diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index b7ef901d..a461504e 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -1353,15 +1353,30 @@ END_PROVIDER integer :: i,j double precision :: get_two_e_integral - PROVIDE mo_two_e_integrals_in_map - mo_two_e_integrals_jj = 0.d0 - mo_two_e_integrals_jj_exchange = 0.d0 + + if (do_ao_cholesky) then + do j=1,mo_num + do i=1,mo_num + !TODO: use dgemm + mo_two_e_integrals_jj(i,j) = sum(cholesky_mo_transp(:,i,i)*cholesky_mo_transp(:,j,j)) + mo_two_e_integrals_jj_exchange(i,j) = sum(cholesky_mo_transp(:,i,j)*cholesky_mo_transp(:,j,i)) + enddo + enddo + + else + + do j=1,mo_num + do i=1,mo_num + mo_two_e_integrals_jj(i,j) = get_two_e_integral(i,j,i,j,mo_integrals_map) + mo_two_e_integrals_jj_exchange(i,j) = get_two_e_integral(i,j,j,i,mo_integrals_map) + enddo + enddo + + endif do j=1,mo_num do i=1,mo_num - mo_two_e_integrals_jj(i,j) = get_two_e_integral(i,j,i,j,mo_integrals_map) - mo_two_e_integrals_jj_exchange(i,j) = get_two_e_integral(i,j,j,i,mo_integrals_map) - mo_two_e_integrals_jj_anti(i,j) = mo_two_e_integrals_jj(i,j) - mo_two_e_integrals_jj_exchange(i,j) + mo_two_e_integrals_jj_anti(i,j) = mo_two_e_integrals_jj(i,j) - mo_two_e_integrals_jj_exchange(i,j) enddo enddo diff --git a/src/utils_cc/energy.irp.f b/src/utils_cc/energy.irp.f index 33e0cbae..fc1451ba 100644 --- a/src/utils_cc/energy.irp.f +++ b/src/utils_cc/energy.irp.f @@ -5,9 +5,8 @@ subroutine det_energy(det,energy) integer(bit_kind), intent(in) :: det double precision, intent(out) :: energy + double precision, external :: diag_H_mat_elem - call i_H_j(det,det,N_int,energy) + energy = diag_H_mat_elem(det,N_int) + nuclear_repulsion - energy = energy + nuclear_repulsion - end diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f index 9e244d82..485d7002 100644 --- a/src/utils_cc/mo_integrals_cc.irp.f +++ b/src/utils_cc/mo_integrals_cc.irp.f @@ -13,7 +13,7 @@ subroutine gen_f_space(det,n1,n2,list1,list2,f) integer :: i1,i2,idx1,idx2 allocate(tmp_F(mo_num,mo_num)) - + call get_fock_matrix_spin(det,1,tmp_F) !$OMP PARALLEL & @@ -32,7 +32,7 @@ subroutine gen_f_space(det,n1,n2,list1,list2,f) !$OMP END PARALLEL deallocate(tmp_F) - + end ! V @@ -45,63 +45,66 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v) integer, intent(in) :: list1(n1),list2(n2),list3(n3),list4(n4) double precision, intent(out) :: v(n1,n2,n3,n4) - integer :: i1,i2,i3,i4,idx1,idx2,idx3,idx4 - double precision :: get_two_e_integral - - PROVIDE mo_two_e_integrals_in_map + integer :: i1,i2,i3,i4,idx1,idx2,idx3,idx4,k + double precision, allocatable :: buffer(:,:,:) !$OMP PARALLEL & - !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_integrals_map) & - !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4)& + !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_num,cholesky_mo_transp,cholesky_ao_num) & + !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k,buffer)& !$OMP DEFAULT(NONE) - !$OMP DO collapse(3) + allocate(buffer(mo_num,mo_num,mo_num)) + !$OMP DO do i4 = 1, n4 - do i3 = 1, n3 - do i2 = 1, n2 + idx4 = list4(i4) + call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, & + cholesky_mo_transp, cholesky_ao_num, & + cholesky_mo_transp(1,1,idx4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num) + do i2 = 1, n2 + idx2 = list2(i2) + do i3 = 1, n3 + idx3 = list3(i3) do i1 = 1, n1 - idx4 = list4(i4) - idx3 = list3(i3) - idx2 = list2(i2) idx1 = list1(i1) - v(i1,i2,i3,i4) = get_two_e_integral(idx1,idx2,idx3,idx4,mo_integrals_map) + v(i1,i2,i3,i4) = buffer(idx1,idx3,idx2) enddo enddo enddo enddo !$OMP END DO + deallocate(buffer) !$OMP END PARALLEL - + + end ! full BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)] - implicit none - - integer :: i,j,k,l - double precision :: get_two_e_integral - - PROVIDE mo_two_e_integrals_in_map - + integer :: i1,i2,i3,i4,k + double precision, allocatable :: buffer(:,:,:) !$OMP PARALLEL & - !$OMP SHARED(cc_space_v,mo_num,mo_integrals_map) & - !$OMP PRIVATE(i,j,k,l) & + !$OMP SHARED(cc_space_v,mo_num,cholesky_mo_transp,cholesky_ao_num) & + !$OMP PRIVATE(i1,i2,i3,i4,k,buffer)& !$OMP DEFAULT(NONE) - - !$OMP DO collapse(3) - do l = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do i = 1, mo_num - cc_space_v(i,j,k,l) = get_two_e_integral(i,j,k,l,mo_integrals_map) + allocate(buffer(mo_num,mo_num,mo_num)) + !$OMP DO + do i4 = 1, mo_num + call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, & + cholesky_mo_transp, cholesky_ao_num, & + cholesky_mo_transp(1,1,i4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num) + do i2 = 1, mo_num + do i3 = 1, mo_num + do i1 = 1, mo_num + cc_space_v(i1,i2,i3,i4) = buffer(i1,i3,i2) enddo enddo enddo enddo !$OMP END DO + deallocate(buffer) !$OMP END PARALLEL - + END_PROVIDER ! oooo @@ -280,7 +283,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_ppqq, (cc_n_mo, cc_n_mo)] allocate(tmp_v(cc_n_mo,cc_n_mo,cc_n_mo,cc_n_mo)) call gen_v_space(cc_n_mo,cc_n_mo,cc_n_mo,cc_n_mo, cc_list_gen,cc_list_gen,cc_list_gen,cc_list_gen, tmp_v) - + do q = 1, cc_n_mo do p = 1, cc_n_mo cc_space_v_ppqq(p,q) = tmp_v(p,p,q,q) @@ -382,7 +385,7 @@ BEGIN_PROVIDER [double precision, cc_space_v_aabb, (cc_nVa,cc_nVa)] enddo FREE cc_space_v_vvvv - + END_PROVIDER ! iaia @@ -467,7 +470,7 @@ BEGIN_PROVIDER [double precision, cc_space_w_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_n integer :: i,j,a,b allocate(tmp_v(cc_nOa,cc_nOa,cc_nVa,cc_nVa)) - + call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, tmp_v) !$OMP PARALLEL & @@ -501,7 +504,7 @@ BEGIN_PROVIDER [double precision, cc_space_w_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_n integer :: i,j,a,b allocate(tmp_v(cc_nVa,cc_nVa,cc_nOa,cc_nOa)) - + call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, tmp_v) !$OMP PARALLEL & @@ -613,7 +616,7 @@ subroutine shift_idx_spin(s,n_S,shift) else shift = n_S(1) endif - + end ! F @@ -626,21 +629,22 @@ subroutine gen_f_spin(det, n1,n2, n1_S,n2_S, list1,list2, dim1,dim2, f) ! Compute the Fock matrix corresponding to two lists of spin orbitals. ! Ex: occ/occ, occ/vir,... END_DOC - + integer(bit_kind), intent(in) :: det(N_int,2) integer, intent(in) :: n1,n2, n1_S(2), n2_S(2) integer, intent(in) :: list1(n1,2), list2(n2,2) integer, intent(in) :: dim1, dim2 - + double precision, intent(out) :: f(dim1, dim2) double precision, allocatable :: tmp_F(:,:) integer :: i,j, idx_i,idx_j,i_shift,j_shift integer :: tmp_i,tmp_j integer :: si,sj,s + PROVIDE big_array_exchange_integrals big_array_coulomb_integrals allocate(tmp_F(mo_num,mo_num)) - + do sj = 1, 2 call shift_idx_spin(sj,n2_S,j_shift) do si = 1, 2 @@ -669,9 +673,9 @@ subroutine gen_f_spin(det, n1,n2, n1_S,n2_S, list1,list2, dim1,dim2, f) enddo enddo - + deallocate(tmp_F) - + end ! Get F @@ -683,12 +687,12 @@ subroutine get_fock_matrix_spin(det,s,f) BEGIN_DOC ! Fock matrix alpha or beta of an arbitrary det END_DOC - + integer(bit_kind), intent(in) :: det(N_int,2) integer, intent(in) :: s - + double precision, intent(out) :: f(mo_num,mo_num) - + integer :: p,q,i,s1,s2 integer(bit_kind) :: res(N_int,2) logical :: ok @@ -701,9 +705,11 @@ subroutine get_fock_matrix_spin(det,s,f) s1 = 2 s2 = 1 endif - + + PROVIDE big_array_coulomb_integrals big_array_exchange_integrals + !$OMP PARALLEL & - !$OMP SHARED(f,mo_num,s1,s2,N_int,det,mo_one_e_integrals) & + !$OMP SHARED(f,mo_num,s1,s2,N_int,det,mo_one_e_integrals,big_array_coulomb_integrals,big_array_exchange_integrals) & !$OMP PRIVATE(p,q,ok,i,res)& !$OMP DEFAULT(NONE) !$OMP DO collapse(1) @@ -713,20 +719,21 @@ subroutine get_fock_matrix_spin(det,s,f) do i = 1, mo_num call apply_hole(det, s1, i, res, ok, N_int) if (ok) then - f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) - mo_two_e_integral(p,i,i,q) +! f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) - mo_two_e_integral(p,i,i,q) + f(p,q) = f(p,q) + big_array_coulomb_integrals(i,p,q) - big_array_exchange_integrals(i,p,q) endif enddo do i = 1, mo_num call apply_hole(det, s2, i, res, ok, N_int) if (ok) then - f(p,q) = f(p,q) + mo_two_e_integral(p,i,q,i) + f(p,q) = f(p,q) + big_array_coulomb_integrals(i,p,q) endif enddo enddo enddo !$OMP END DO !$OMP END PARALLEL - + end ! V @@ -752,14 +759,14 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, integer :: si,sj,sk,sl,s PROVIDE cc_space_v - + !$OMP PARALLEL & !$OMP SHARED(cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v) & !$OMP PRIVATE(s,si,sj,sk,sl,i_shift,j_shift,k_shift,l_shift, & !$OMP i,j,k,l,idx_i,idx_j,idx_k,idx_l,& !$OMP tmp_i,tmp_j,tmp_k,tmp_l)& !$OMP DEFAULT(NONE) - + do sl = 1, 2 call shift_idx_spin(sl,n4_S,l_shift) do sk = 1, 2 @@ -768,7 +775,7 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, call shift_idx_spin(sj,n2_S,j_shift) do si = 1, 2 call shift_idx_spin(si,n1_S,i_shift) - + s = si+sj+sk+sl ! or if (s == 4 .or. s == 8) then @@ -776,7 +783,7 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -792,14 +799,14 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, enddo enddo !$OMP END DO - + ! or elseif (si == sk .and. sj == sl) then !$OMP DO collapse(3) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -815,14 +822,14 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, enddo enddo !$OMP END DO - + ! or elseif (si == sl .and. sj == sk) then !$OMP DO collapse(3) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -843,7 +850,7 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -859,13 +866,13 @@ subroutine gen_v_spin(n1,n2,n3,n4, n1_S,n2_S,n3_S,n4_S, list1,list2,list3,list4, enddo !$OMP END DO endif - + enddo enddo enddo enddo !$OMP END PARALLEL - + end ! V_3idx @@ -900,28 +907,28 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, call shift_idx_spin(sl,n4_S,l_shift) tmp_l = idx_l - l_shift l = list4(tmp_l,sl) - + !$OMP PARALLEL & !$OMP SHARED(l,sl,idx_l,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_l) & !$OMP PRIVATE(s,si,sj,sk,i_shift,j_shift,k_shift, & !$OMP i,j,k,idx_i,idx_j,idx_k,& !$OMP tmp_i,tmp_j,tmp_k)& !$OMP DEFAULT(NONE) - + do sk = 1, 2 call shift_idx_spin(sk,n3_S,k_shift) do sj = 1, 2 call shift_idx_spin(sj,n2_S,j_shift) do si = 1, 2 call shift_idx_spin(si,n1_S,i_shift) - + s = si+sj+sk+sl ! or if (s == 4 .or. s == 8) then !$OMP DO collapse(2) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) k = list3(tmp_k,sk) idx_k = tmp_k + k_shift j = list2(tmp_j,sj) @@ -934,13 +941,13 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, enddo enddo !$OMP END DO - + ! or elseif (si == sk .and. sj == sl) then !$OMP DO collapse(2) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) k = list3(tmp_k,sk) idx_k = tmp_k + k_shift j = list2(tmp_j,sj) @@ -953,13 +960,13 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, enddo enddo !$OMP END DO - + ! or elseif (si == sl .and. sj == sk) then !$OMP DO collapse(2) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) k = list3(tmp_k,sk) idx_k = tmp_k + k_shift j = list2(tmp_j,sj) @@ -976,7 +983,7 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, !$OMP DO collapse(2) do tmp_k = 1, n3_S(sk) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) k = list3(tmp_k,sk) idx_k = tmp_k + k_shift j = list2(tmp_j,sj) @@ -989,12 +996,12 @@ subroutine gen_v_spin_3idx(n1,n2,n3,n4, idx_l, n1_S,n2_S,n3_S,n4_S, list1,list2, enddo !$OMP END DO endif - + enddo enddo enddo !$OMP END PARALLEL - + end ! V_3idx_ij_l @@ -1029,28 +1036,28 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l call shift_idx_spin(sk,n3_S,k_shift) tmp_k = idx_k - k_shift k = list3(tmp_k,sk) - + !$OMP PARALLEL & !$OMP SHARED(k,sk,idx_k,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_k) & !$OMP PRIVATE(s,si,sj,sl,i_shift,j_shift,l_shift, & !$OMP i,j,l,idx_i,idx_j,idx_l,& !$OMP tmp_i,tmp_j,tmp_l)& !$OMP DEFAULT(NONE) - + do sl = 1, 2 call shift_idx_spin(sl,n4_S,l_shift) do sj = 1, 2 call shift_idx_spin(sj,n2_S,j_shift) do si = 1, 2 call shift_idx_spin(si,n1_S,i_shift) - + s = si+sj+sk+sl ! or if (s == 4 .or. s == 8) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift j = list2(tmp_j,sj) @@ -1063,13 +1070,13 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l enddo enddo !$OMP END DO - + ! or elseif (si == sk .and. sj == sl) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift j = list2(tmp_j,sj) @@ -1082,13 +1089,13 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l enddo enddo !$OMP END DO - + ! or elseif (si == sl .and. sj == sk) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift j = list2(tmp_j,sj) @@ -1105,7 +1112,7 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_j = 1, n2_S(sj) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift j = list2(tmp_j,sj) @@ -1118,12 +1125,12 @@ subroutine gen_v_spin_3idx_ij_l(n1,n2,n3,n4, idx_k, n1_S,n2_S,n3_S,n4_S, list1,l enddo !$OMP END DO endif - + enddo enddo enddo !$OMP END PARALLEL - + end ! V_3idx_i_kl @@ -1158,28 +1165,28 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l call shift_idx_spin(sj,n2_S,j_shift) tmp_j = idx_j - j_shift j = list2(tmp_j,sj) - + !$OMP PARALLEL & !$OMP SHARED(j,sj,idx_j,cc_space_v,n1_S,n2_S,n3_S,n4_S,list1,list2,list3,list4,v_j) & !$OMP PRIVATE(s,si,sk,sl,i_shift,l_shift,k_shift, & !$OMP i,k,l,idx_i,idx_k,idx_l,& !$OMP tmp_i,tmp_k,tmp_l)& !$OMP DEFAULT(NONE) - + do sl = 1, 2 call shift_idx_spin(sl,n4_S,l_shift) do sk = 1, 2 call shift_idx_spin(sk,n3_S,k_shift) do si = 1, 2 call shift_idx_spin(si,n1_S,i_shift) - + s = si+sj+sk+sl ! or if (s == 4 .or. s == 8) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -1192,13 +1199,13 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l enddo enddo !$OMP END DO - + ! or elseif (si == sk .and. sj == sl) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -1211,13 +1218,13 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l enddo enddo !$OMP END DO - + ! or elseif (si == sl .and. sj == sk) then !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -1234,7 +1241,7 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l !$OMP DO collapse(2) do tmp_l = 1, n4_S(sl) do tmp_k = 1, n3_S(sk) - do tmp_i = 1, n1_S(si) + do tmp_i = 1, n1_S(si) l = list4(tmp_l,sl) idx_l = tmp_l + l_shift k = list3(tmp_k,sk) @@ -1247,10 +1254,10 @@ subroutine gen_v_spin_3idx_i_kl(n1,n2,n3,n4, idx_j, n1_S,n2_S,n3_S,n4_S, list1,l enddo !$OMP END DO endif - + enddo enddo enddo !$OMP END PARALLEL - + end From 5817bbf573c5074ae4c31562cb03c47c69e148f7 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 17 May 2023 17:50:35 +0200 Subject: [PATCH 119/337] Reduced memory in CCSD --- src/ccsd/ccsd_space_orb_sub.irp.f | 105 +++++++++++++++++++++++++++--- 1 file changed, 97 insertions(+), 8 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 256117d6..1467d9a4 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -764,7 +764,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) ! internal double precision, allocatable :: g_occ(:,:), g_vir(:,:), J1(:,:,:,:), K1(:,:,:,:) - double precision, allocatable :: A1(:,:,:,:), B1(:,:,:,:) + double precision, allocatable :: A1(:,:,:,:), B1_gam(:,:,:) integer :: u,v,i,j,beta,gam,a,b allocate(g_occ(nO,nO), g_vir(nV,nV)) @@ -834,13 +834,18 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) ! enddo !enddo - allocate(B1(nV,nV,nV,nV)) - call compute_B1(nO,nV,t1,t2,B1) - call dgemm('N','N',nO*nO,nV*nV,nV*nV, & - 1d0, tau, size(tau,1) * size(tau,2), & - B1 , size(B1,1) * size(B1,2), & - 1d0, r2, size(r2,1) * size(r2,2)) - deallocate(B1) +! allocate(B1(nV,nV,nV,nV)) +! call compute_B1(nO,nV,t1,t2,B1) + allocate(B1_gam(nV,nV,nV)) + do gam=1,nV + call compute_B1_gam(nO,nV,t1,t2,B1_gam,gam) + call dgemm('N','N',nO*nO,nV,nV*nV, & + 1d0, tau, size(tau,1) * size(tau,2), & + B1_gam , size(B1_gam,1) * size(B1_gam,2), & + 1d0, r2(1,1,1,gam), size(r2,1) * size(r2,2)) + enddo + deallocate(B1_gam) + !do gam = 1, nV ! do beta = 1, nV @@ -1512,6 +1517,90 @@ end ! B1 +subroutine compute_B1_gam(nO,nV,t1,t2,B1,gam) + + implicit none + + integer, intent(in) :: nO,nV,gam + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: B1(nV, nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta + +! do beta = 1, nV +! do b = 1, nV +! do a = 1, nV +! B1(a,b,beta) = cc_space_v_vvvv(a,b,beta,gam) +! +! do i = 1, nO +! B1(a,b,beta) = B1(a,b,beta) & +! - cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & +! - cc_space_v_vvov(a,b,i,gam) * t1(i,beta) +! enddo +! +! enddo +! enddo +! enddo + + double precision, allocatable :: X_vvvo(:,:,:), Y_vvvv(:,:,:) + allocate(X_vvvo(nV,nV,nO), Y_vvvv(nV,nV,nV)) +! ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + !$omp parallel & + !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo,gam) & + !$omp private(a,b,beta) & + !$omp default(none) + !$omp do + do beta = 1, nV + do b = 1, nV + do a = 1, nV + B1(a,b,beta) = cc_space_v_vvvv(a,b,beta,gam) + enddo + enddo + enddo + !$omp end do nowait + do i = 1, nO + !$omp do + do b = 1, nV + do a = 1, nV + X_vvvo(a,b,i) = cc_space_v_vvov(a,b,i,gam) + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + +! ! B1(a,b,beta) -= cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & + call dgemm('N','N', nV*nV*nV, 1, nO, & + -1d0, cc_space_v_vvvo, size(cc_space_v_vvvo,1) * size(cc_space_v_vvvo,2) * size(cc_space_v_vvvo,3), & + t1(1,gam), size(t1,1), & + 1d0, B1 , size(B1,1) * size(B1,2) * size(B1,3)) + + ! B1(a,b,beta,gam) -= cc_space_v_vvov(a,b,i,gam) * t1(i,beta) + call dgemm('N','N', nV*nV, nV, nO, & + -1d0, X_vvvo, size(X_vvvo,1) * size(X_vvvo,2), & + t1 , size(t1,1), & + 0d0, Y_vvvv, size(Y_vvvv,1) * size(Y_vvvv,2)) + + !$omp parallel & + !$omp shared(nV,B1,Y_vvvv,gam) & + !$omp private(a,b,beta) & + !$omp default(none) + !$omp do + do beta = 1, nV + do b = 1, nV + do a = 1, nV + B1(a,b,beta) = B1(a,b,beta) + Y_vvvv(a,b,beta) + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vvvo,Y_vvvv) + +end + subroutine compute_B1(nO,nV,t1,t2,B1) implicit none From e3d8e28e23a7c6b19cb52b0aa8b2af0739207f19 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Thu, 18 May 2023 20:57:55 +0200 Subject: [PATCH 120/337] TC-SCF: no rotations for good angles --- src/tc_scf/rh_tcscf_diis.irp.f | 2 +- src/tc_scf/rh_tcscf_simple.irp.f | 2 +- src/tc_scf/routines_rotates.irp.f | 12 ++++++++---- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/tc_scf/rh_tcscf_diis.irp.f b/src/tc_scf/rh_tcscf_diis.irp.f index 61355498..20260a95 100644 --- a/src/tc_scf/rh_tcscf_diis.irp.f +++ b/src/tc_scf/rh_tcscf_diis.irp.f @@ -236,7 +236,7 @@ subroutine rh_tcscf_diis() ! --- print *, ' TCSCF DIIS converged !' - !call print_energy_and_mos() + !call print_energy_and_mos(good_angles) call write_time(6) deallocate(mo_r_coef_save, mo_l_coef_save, F_DIIS, E_DIIS) diff --git a/src/tc_scf/rh_tcscf_simple.irp.f b/src/tc_scf/rh_tcscf_simple.irp.f index 30798e3d..0b79e8ea 100644 --- a/src/tc_scf/rh_tcscf_simple.irp.f +++ b/src/tc_scf/rh_tcscf_simple.irp.f @@ -119,7 +119,7 @@ subroutine rh_tcscf_simple() endif print *, ' TCSCF Simple converged !' - call print_energy_and_mos() + !call print_energy_and_mos(good_angles) deallocate(rho_old, rho_new) diff --git a/src/tc_scf/routines_rotates.irp.f b/src/tc_scf/routines_rotates.irp.f index 4ac66b5f..755c35b9 100644 --- a/src/tc_scf/routines_rotates.irp.f +++ b/src/tc_scf/routines_rotates.irp.f @@ -59,7 +59,7 @@ subroutine minimize_tc_orb_angles() good_angles = .False. thr_deg = thr_degen_tc - call print_energy_and_mos() + call print_energy_and_mos(good_angles) print *, ' Minimizing the angles between the TC orbitals' i = 1 @@ -78,7 +78,7 @@ subroutine minimize_tc_orb_angles() print *, ' Converged ANGLES MINIMIZATION !!' call print_angles_tc() - call print_energy_and_mos() + call print_energy_and_mos(good_angles) end @@ -386,10 +386,11 @@ end ! --- -subroutine print_energy_and_mos() +subroutine print_energy_and_mos(good_angles) implicit none - integer :: i + logical, intent(out) :: good_angles + integer :: i print *, ' ' print *, ' TC energy = ', TC_HF_energy @@ -398,10 +399,13 @@ subroutine print_energy_and_mos() if(max_angle_left_right .lt. 45.d0) then print *, ' Maximum angle BELOW 45 degrees, everthing is OK !' + good_angles = .true. else if(max_angle_left_right .gt. 45.d0 .and. max_angle_left_right .lt. 75.d0) then print *, ' Maximum angle between 45 and 75 degrees, this is not the best for TC-CI calculations ...' + good_angles = .false. else if(max_angle_left_right .gt. 75.d0) then print *, ' Maximum angle between ABOVE 75 degrees, YOU WILL CERTAINLY FIND TROUBLES IN TC-CI calculations ...' + good_angles = .false. endif print *, ' Diag Fock elem, product of left/right norm, angle left/right ' From 5240c6e1cb988bfbfcde75d62b0942ed7bb91188 Mon Sep 17 00:00:00 2001 From: eginer Date: Fri, 19 May 2023 11:33:39 +0200 Subject: [PATCH 121/337] added comments in normal_ordered.irp. --- src/tc_bi_ortho/normal_ordered.irp.f | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index 81f5fb2c..8adc7a63 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -48,10 +48,16 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ h2 = list_act(hh2) do pp2 = 1, n_act_orb p2 = list_act(pp2) - ! opposite spin double excitations + ! all contributions from the 3-e terms to the double excitations + ! s1:(h1-->p1), s2:(h2-->p2) from the HF reference determinant + + + ! opposite spin double excitations : s1 /= s2 call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aba) - ! same spin double excitations with opposite spin contributions + + ! same spin double excitations : s1 == s2 if(h1h2 ! same spin double excitations with same spin contributions if(Ne(2).ge.3)then @@ -60,8 +66,10 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ hthree_aaa = 0.d0 endif else + ! with opposite spin contributions call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) if(Ne(2).ge.3)then + ! same spin double excitations with same spin contributions call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) else hthree_aaa = 0.d0 @@ -246,6 +254,9 @@ END_PROVIDER subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) + BEGIN_DOC +! pure same spin contribution to same spin double excitation s1=h1,p1, s2=h2,p2, with s1==s2 + END_DOC use bitmasks ! you need to include the bitmasks_module.f90 features implicit none From 1d5ff0df6629c3374829327df63c912dedd72e00 Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 22 May 2023 11:52:16 +0200 Subject: [PATCH 122/337] added the possibility to select 3idx, 4-idx and 5idx --- src/tc_bi_ortho/slater_tc_3e.irp.f | 24 +++++++++++++--------- src/tc_bi_ortho/slater_tc_opt_diag.irp.f | 4 ++-- src/tc_bi_ortho/slater_tc_opt_double.irp.f | 8 ++++---- src/tc_bi_ortho/slater_tc_opt_single.irp.f | 2 +- src/tc_bi_ortho/tc_hmat.irp.f | 16 ++++----------- src/tc_keywords/EZFIO.cfg | 18 ++++++++++++++++ 6 files changed, 43 insertions(+), 29 deletions(-) diff --git a/src/tc_bi_ortho/slater_tc_3e.irp.f b/src/tc_bi_ortho/slater_tc_3e.irp.f index 7b73d5f2..f95be64b 100644 --- a/src/tc_bi_ortho/slater_tc_3e.irp.f +++ b/src/tc_bi_ortho/slater_tc_3e.irp.f @@ -4,17 +4,21 @@ subroutine provide_all_three_ints_bi_ortho ! routine that provides all necessary three-electron integrals END_DOC if(three_body_h_tc)then - PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort - PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort - PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_cycle_2_bi_ort - PROVIDE three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort three_e_4_idx_exch12_bi_ort + if(three_e_3_idx_term)then + PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort + PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort + endif + if(three_e_4_idx_term)then + PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_cycle_2_bi_ort + PROVIDE three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort three_e_4_idx_exch12_bi_ort + endif + if(.not.double_normal_ord.and.three_e_5_idx_term)then + PROVIDE three_e_5_idx_direct_bi_ort three_e_5_idx_cycle_1_bi_ort three_e_5_idx_cycle_2_bi_ort + PROVIDE three_e_5_idx_exch23_bi_ort three_e_5_idx_exch13_bi_ort three_e_5_idx_exch12_bi_ort + elseif (double_normal_ord .and. (.not. three_e_5_idx_term))then + PROVIDE normal_two_body_bi_orth + endif endif -if(.not.double_normal_ord)then - PROVIDE three_e_5_idx_direct_bi_ort three_e_5_idx_cycle_1_bi_ort three_e_5_idx_cycle_2_bi_ort - PROVIDE three_e_5_idx_exch23_bi_ort three_e_5_idx_exch13_bi_ort three_e_5_idx_exch12_bi_ort -else - PROVIDE normal_two_body_bi_orth -endif end subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) diff --git a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f index 5a3f9935..1745e362 100644 --- a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f @@ -156,7 +156,7 @@ subroutine ac_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb) htwoe = htwoe + mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb) enddo - if(three_body_h_tc.and.elec_num.gt.2)then + if(three_body_h_tc.and.elec_num.gt.2.and.three_e_3_idx_term)then !!!!! 3-e part !! same-spin/same-spin do j = 1, na @@ -243,7 +243,7 @@ subroutine a_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb) htwoe= htwoe- mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb) enddo - if(three_body_h_tc.and.elec_num.gt.2)then + if(three_body_h_tc.and.elec_num.gt.2.and.three_e_3_idx_term)then !!!!! 3-e part !! same-spin/same-spin do j = 1, na diff --git a/src/tc_bi_ortho/slater_tc_opt_double.irp.f b/src/tc_bi_ortho/slater_tc_opt_double.irp.f index 1b0e43bb..2d6bfb27 100644 --- a/src/tc_bi_ortho/slater_tc_opt_double.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_double.irp.f @@ -42,13 +42,13 @@ subroutine double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, ! opposite spin two-body htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) if(three_body_h_tc.and.elec_num.gt.2)then - if(.not.double_normal_ord)then + if(.not.double_normal_ord.and.three_e_5_idx_term)then if(degree_i>degree_j)then call three_comp_two_e_elem(key_j,h1,h2,p1,p2,s1,s2,hthree) else call three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree) endif - elseif(double_normal_ord.and.elec_num.gt.2)then + elseif(double_normal_ord)then htwoe += normal_two_body_bi_orth(p2,h2,p1,h1) endif endif @@ -59,13 +59,13 @@ subroutine double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, ! exchange terms htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) if(three_body_h_tc.and.elec_num.gt.2)then - if(.not.double_normal_ord)then + if(.not.double_normal_ord.and.three_e_5_idx_term)then if(degree_i>degree_j)then call three_comp_two_e_elem(key_j,h1,h2,p1,p2,s1,s2,hthree) else call three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree) endif - elseif(double_normal_ord.and.elec_num.gt.2)then + elseif(double_normal_ord)then htwoe -= normal_two_body_bi_orth(h2,p1,h1,p2) htwoe += normal_two_body_bi_orth(h1,p1,h2,p2) endif diff --git a/src/tc_bi_ortho/slater_tc_opt_single.irp.f b/src/tc_bi_ortho/slater_tc_opt_single.irp.f index 2f9d83bf..7178d6d9 100644 --- a/src/tc_bi_ortho/slater_tc_opt_single.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_single.irp.f @@ -106,7 +106,7 @@ subroutine get_single_excitation_from_fock_tc(key_i,key_j,h,p,spin,phase,hmono,h htwoe -= buffer_x(i) enddo hthree = 0.d0 - if (three_body_h_tc.and.elec_num.gt.2)then + if (three_body_h_tc.and.elec_num.gt.2.and.three_e_4_idx_term)then call three_comp_fock_elem(key_i,h,p,spin,hthree) endif diff --git a/src/tc_bi_ortho/tc_hmat.irp.f b/src/tc_bi_ortho/tc_hmat.irp.f index 3353d3e7..ec072531 100644 --- a/src/tc_bi_ortho/tc_hmat.irp.f +++ b/src/tc_bi_ortho/tc_hmat.irp.f @@ -9,33 +9,25 @@ implicit none integer :: i, j - double precision :: hmono,htwoe,hthree,htot + double precision :: htot PROVIDE N_int i = 1 j = 1 - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho_tot(psi_det(1,1,j), psi_det(1,1,i), N_int, htot) - !$OMP PARALLEL DO SCHEDULE(GUIDED) DEFAULT(NONE) PRIVATE(i,j,hmono, htwoe, hthree, htot) & + !$OMP PARALLEL DO SCHEDULE(GUIDED) DEFAULT(NONE) PRIVATE(i,j, htot) & !$OMP SHARED (N_det, psi_det, N_int,htilde_matrix_elmt_bi_ortho) do i = 1, N_det do j = 1, N_det ! < J | Htilde | I > - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_opt_bi_ortho_tot(psi_det(1,1,j), psi_det(1,1,i), N_int, htot) - !print *, ' hmono = ', hmono - !print *, ' htwoe = ', htwoe - !print *, ' hthree = ', hthree htilde_matrix_elmt_bi_ortho(j,i) = htot enddo enddo !$OMP END PARALLEL DO -! print*,'htilde_matrix_elmt_bi_ortho = ' -! do i = 1, min(100,N_det) -! write(*,'(100(F16.10,X))')htilde_matrix_elmt_bi_ortho(1:min(100,N_det),i) -! enddo - END_PROVIDER diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index 3a26a6eb..484bd1f0 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -16,6 +16,24 @@ doc: If |true|, three-body terms are included interface: ezfio,provider,ocaml default: True +[three_e_3_idx_term] +type: logical +doc: If |true|, the diagonal 3-idx terms of the 3-e interaction are taken +interface: ezfio,provider,ocaml +default: True + +[three_e_4_idx_term] +type: logical +doc: If |true|, the off-diagonal 4-idx terms of the 3-e interaction are taken +interface: ezfio,provider,ocaml +default: True + +[three_e_5_idx_term] +type: logical +doc: If |true|, the off-diagonal 5-idx terms of the 3-e interaction are taken +interface: ezfio,provider,ocaml +default: True + [pure_three_body_h_tc] type: logical doc: If |true|, pure triple excitation three-body terms are included From daf8b1c3dcef5f066d1add8b2cc751f03544ba98 Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 22 May 2023 18:17:17 +0200 Subject: [PATCH 123/337] renaming the routines in tc slater rules in _slow when they are naively built --- src/tc_bi_ortho/dressing_vectors_lr.irp.f | 8 ++-- src/tc_bi_ortho/e_corr_bi_ortho.irp.f | 18 +++---- src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f | 12 ++--- src/tc_bi_ortho/print_tc_wf.irp.f | 6 +-- src/tc_bi_ortho/pt2_tc_cisd.irp.f | 8 ++-- ...er_tc_3e.irp.f => slater_tc_3e_slow.irp.f} | 28 ++--------- src/tc_bi_ortho/slater_tc_opt.irp.f | 23 +++++++++ src/tc_bi_ortho/slater_tc_opt_diag.irp.f | 4 +- .../{slater_tc.irp.f => slater_tc_slow.irp.f} | 47 +++++++------------ src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f | 12 ++--- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 5 +- src/tc_bi_ortho/tc_som.irp.f | 4 +- src/tc_bi_ortho/tc_utils.irp.f | 4 +- src/tc_bi_ortho/test_normal_order.irp.f | 6 +-- src/tc_bi_ortho/test_s2_tc.irp.f | 2 +- src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 14 +++--- src/tc_bi_ortho/test_tc_fock.irp.f | 7 ++- 17 files changed, 97 insertions(+), 111 deletions(-) rename src/tc_bi_ortho/{slater_tc_3e.irp.f => slater_tc_3e_slow.irp.f} (87%) rename src/tc_bi_ortho/{slater_tc.irp.f => slater_tc_slow.irp.f} (85%) diff --git a/src/tc_bi_ortho/dressing_vectors_lr.irp.f b/src/tc_bi_ortho/dressing_vectors_lr.irp.f index 08913bab..ed663f02 100644 --- a/src/tc_bi_ortho/dressing_vectors_lr.irp.f +++ b/src/tc_bi_ortho/dressing_vectors_lr.irp.f @@ -27,7 +27,7 @@ subroutine get_delta_bitc_right(psidet, psicoef, ndet, Nint, delta) i = 1 j = 1 - call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) call hmat_bi_ortho (psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot) delta = 0.d0 @@ -39,7 +39,7 @@ subroutine get_delta_bitc_right(psidet, psicoef, ndet, Nint, delta) do j = 1, ndet ! < I | Htilde | J > - call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) ! < I | H | J > call hmat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot) @@ -78,7 +78,7 @@ subroutine get_htc_bitc_right(psidet, psicoef, ndet, Nint, delta) i = 1 j = 1 - call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) delta = 0.d0 !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & @@ -88,7 +88,7 @@ subroutine get_htc_bitc_right(psidet, psicoef, ndet, Nint, delta) do j = 1, ndet ! < I | Htilde | J > - call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) + call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) delta(i) = delta(i) + psicoef(j) * htc_tot enddo diff --git a/src/tc_bi_ortho/e_corr_bi_ortho.irp.f b/src/tc_bi_ortho/e_corr_bi_ortho.irp.f index 3a715b44..6d5c3b21 100644 --- a/src/tc_bi_ortho/e_corr_bi_ortho.irp.f +++ b/src/tc_bi_ortho/e_corr_bi_ortho.irp.f @@ -2,7 +2,7 @@ BEGIN_PROVIDER [ double precision, e_tilde_00] implicit none double precision :: hmono,htwoe,hthree,htot - call htilde_mu_mat_bi_ortho(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,htot) + call htilde_mu_mat_bi_ortho_slow(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,htot) e_tilde_00 = htot END_PROVIDER @@ -18,11 +18,11 @@ do i = 1, N_det call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) if(degree == 1 .or. degree == 2)then - call htilde_mu_mat_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) - call htilde_mu_mat_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) delta_e = e_tilde_00 - e_i0 coef_pt1 = htilde_ij / delta_e - call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) e_pt2_tc_bi_orth += coef_pt1 * htilde_ij if(degree == 1)then e_pt2_tc_bi_orth_single += coef_pt1 * htilde_ij @@ -37,7 +37,7 @@ BEGIN_PROVIDER [ double precision, e_tilde_bi_orth_00] implicit none double precision :: hmono,htwoe,hthree,htilde_ij - call htilde_mu_mat_bi_ortho(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,e_tilde_bi_orth_00) + call htilde_mu_mat_bi_ortho_slow(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,e_tilde_bi_orth_00) e_tilde_bi_orth_00 += nuclear_repulsion END_PROVIDER @@ -57,7 +57,7 @@ e_corr_double_bi_orth = 0.d0 do i = 1, N_det call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) - call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) if(degree == 1)then e_corr_single_bi_orth += reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1) e_corr_single_bi_orth_abs += dabs(reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1)) @@ -80,7 +80,7 @@ do i = 1, N_det accu += reigvec_tc_bi_orth(i,1) * leigvec_tc_bi_orth(i,1) do j = 1, N_det - call htilde_mu_mat_bi_ortho(psi_det(1,1,j),psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j),psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) e_tc_left_right += htilde_ij * reigvec_tc_bi_orth(i,1) * leigvec_tc_bi_orth(j,1) enddo enddo @@ -99,8 +99,8 @@ BEGIN_PROVIDER [ double precision, coef_pt1_bi_ortho, (N_det)] if(degree==0)then coef_pt1_bi_ortho(i) = 1.d0 else - call htilde_mu_mat_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) - call htilde_mu_mat_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) delta_e = e_tilde_00 - e_i0 coef_pt1 = htilde_ij / delta_e coef_pt1_bi_ortho(i)= coef_pt1 diff --git a/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f b/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f index b7129d36..1d1b26cc 100644 --- a/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f +++ b/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f @@ -1,4 +1,4 @@ -subroutine htc_bi_ortho_calc_tdav(v, u, N_st, sze) +subroutine htc_bi_ortho_calc_tdav_slow(v, u, N_st, sze) use bitmasks @@ -27,7 +27,7 @@ subroutine htc_bi_ortho_calc_tdav(v, u, N_st, sze) i = 1 j = 1 - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,j), N_int, htot) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, htot) v = 0.d0 !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & @@ -36,7 +36,7 @@ subroutine htc_bi_ortho_calc_tdav(v, u, N_st, sze) do istate = 1, N_st do i = 1, sze do j = 1, sze - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,j), N_int, htot) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, htot) v(i,istate) = v(i,istate) + htot * u(j,istate) enddo enddo @@ -45,7 +45,7 @@ subroutine htc_bi_ortho_calc_tdav(v, u, N_st, sze) end -subroutine htcdag_bi_ortho_calc_tdav(v, u, N_st, sze) +subroutine htcdag_bi_ortho_calc_tdav_slow(v, u, N_st, sze) use bitmasks @@ -71,7 +71,7 @@ subroutine htcdag_bi_ortho_calc_tdav(v, u, N_st, sze) i = 1 j = 1 - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,j), N_int, htot) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, htot) v = 0.d0 @@ -81,7 +81,7 @@ subroutine htcdag_bi_ortho_calc_tdav(v, u, N_st, sze) do istate = 1, N_st do i = 1, sze do j = 1, sze - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,j), psi_det(1,1,i), N_int, htot) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, htot) v(i,istate) = v(i,istate) + htot * u(j,istate) enddo enddo diff --git a/src/tc_bi_ortho/print_tc_wf.irp.f b/src/tc_bi_ortho/print_tc_wf.irp.f index 0cf3ca87..0c4198a9 100644 --- a/src/tc_bi_ortho/print_tc_wf.irp.f +++ b/src/tc_bi_ortho/print_tc_wf.irp.f @@ -49,12 +49,12 @@ subroutine routine do i = 1, N_det call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) if(degree == 1 .or. degree == 2)then - call htilde_mu_mat_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) - call htilde_mu_mat_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0) delta_e = e_tilde_00 - e_i0 coef_pt1 = htilde_ij / delta_e - call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij) contrib_pt = coef_pt1 * htilde_ij e_pt2 += contrib_pt diff --git a/src/tc_bi_ortho/pt2_tc_cisd.irp.f b/src/tc_bi_ortho/pt2_tc_cisd.irp.f index 50d9dd45..9cb9a600 100644 --- a/src/tc_bi_ortho/pt2_tc_cisd.irp.f +++ b/src/tc_bi_ortho/pt2_tc_cisd.irp.f @@ -36,11 +36,11 @@ subroutine routine e_corr_abs = 0.d0 e_corr_pos = 0.d0 e_corr_neg = 0.d0 - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,1), psi_det(1,1,1), N_int, e00) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,1), psi_det(1,1,1), N_int, e00) do i = 2, N_det - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,1), N_int, hi0) - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,1), psi_det(1,1,i), N_int, h0i) - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, ei) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,1), N_int, hi0) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,1), psi_det(1,1,i), N_int, h0i) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, ei) call get_excitation_degree(psi_det(1,1,1), psi_det(1,1,i),degree,N_int) call get_excitation(psi_det(1,1,1), psi_det(1,1,i),exc,degree,phase,N_int) call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) diff --git a/src/tc_bi_ortho/slater_tc_3e.irp.f b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f similarity index 87% rename from src/tc_bi_ortho/slater_tc_3e.irp.f rename to src/tc_bi_ortho/slater_tc_3e_slow.irp.f index f95be64b..6abb6b78 100644 --- a/src/tc_bi_ortho/slater_tc_3e.irp.f +++ b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f @@ -1,27 +1,5 @@ -subroutine provide_all_three_ints_bi_ortho - implicit none - BEGIN_DOC -! routine that provides all necessary three-electron integrals - END_DOC - if(three_body_h_tc)then - if(three_e_3_idx_term)then - PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort - PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort - endif - if(three_e_4_idx_term)then - PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_cycle_2_bi_ort - PROVIDE three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort three_e_4_idx_exch12_bi_ort - endif - if(.not.double_normal_ord.and.three_e_5_idx_term)then - PROVIDE three_e_5_idx_direct_bi_ort three_e_5_idx_cycle_1_bi_ort three_e_5_idx_cycle_2_bi_ort - PROVIDE three_e_5_idx_exch23_bi_ort three_e_5_idx_exch13_bi_ort three_e_5_idx_exch12_bi_ort - elseif (double_normal_ord .and. (.not. three_e_5_idx_term))then - PROVIDE normal_two_body_bi_orth - endif - endif -end -subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) +subroutine diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree) BEGIN_DOC ! diagonal element of htilde ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS @@ -112,7 +90,7 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) end -subroutine single_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree) +subroutine single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) BEGIN_DOC ! for single excitation ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS @@ -207,7 +185,7 @@ end ! --- -subroutine double_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree) +subroutine double_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) BEGIN_DOC ! for double excitation ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index a19d4688..3fd2576a 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -1,3 +1,26 @@ +subroutine provide_all_three_ints_bi_ortho + implicit none + BEGIN_DOC +! routine that provides all necessary three-electron integrals + END_DOC + if(three_body_h_tc)then + if(three_e_3_idx_term)then + PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort + PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort + endif + if(three_e_4_idx_term)then + PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_cycle_2_bi_ort + PROVIDE three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort three_e_4_idx_exch12_bi_ort + endif + if(.not.double_normal_ord.and.three_e_5_idx_term)then + PROVIDE three_e_5_idx_direct_bi_ort three_e_5_idx_cycle_1_bi_ort three_e_5_idx_cycle_2_bi_ort + PROVIDE three_e_5_idx_exch23_bi_ort three_e_5_idx_exch13_bi_ort three_e_5_idx_exch12_bi_ort + elseif (double_normal_ord .and. (.not. three_e_5_idx_term))then + PROVIDE normal_two_body_bi_orth + endif + endif +end + subroutine htilde_mu_mat_opt_bi_ortho_tot(key_j, key_i, Nint, htot) implicit none BEGIN_DOC diff --git a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f index 1745e362..531f0141 100644 --- a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f @@ -7,11 +7,11 @@ ! Various component of the TC energy for the reference "HF" Slater determinant END_DOC double precision :: hmono, htwoe, htot, hthree - call diag_htilde_mu_mat_bi_ortho(N_int,HF_bitmask , hmono, htwoe, htot) + call diag_htilde_mu_mat_bi_ortho_slow(N_int,HF_bitmask , hmono, htwoe, htot) ref_tc_energy_1e = hmono ref_tc_energy_2e = htwoe if(three_body_h_tc)then - call diag_htilde_three_body_ints_bi_ort(N_int, HF_bitmask, hthree) + call diag_htilde_three_body_ints_bi_ort_slow(N_int, HF_bitmask, hthree) ref_tc_energy_3e = hthree else ref_tc_energy_3e = 0.d0 diff --git a/src/tc_bi_ortho/slater_tc.irp.f b/src/tc_bi_ortho/slater_tc_slow.irp.f similarity index 85% rename from src/tc_bi_ortho/slater_tc.irp.f rename to src/tc_bi_ortho/slater_tc_slow.irp.f index 2c0ae2ca..1833d20f 100644 --- a/src/tc_bi_ortho/slater_tc.irp.f +++ b/src/tc_bi_ortho/slater_tc_slow.irp.f @@ -1,7 +1,7 @@ ! --- -subroutine htilde_mu_mat_bi_ortho_tot(key_j, key_i, Nint, htot) +subroutine htilde_mu_mat_bi_ortho_tot_slow(key_j, key_i, Nint, htot) BEGIN_DOC ! where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis @@ -24,14 +24,14 @@ subroutine htilde_mu_mat_bi_ortho_tot(key_j, key_i, Nint, htot) if(degree.gt.2)then htot = 0.d0 else - call htilde_mu_mat_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(key_j, key_i, Nint, hmono, htwoe, hthree, htot) endif -end subroutine htilde_mu_mat_bi_ortho_tot +end subroutine htilde_mu_mat_bi_ortho_tot_slow ! -- -subroutine htilde_mu_mat_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot) +subroutine htilde_mu_mat_bi_ortho_slow(key_j, key_i, Nint, hmono, htwoe, hthree, htot) BEGIN_DOC ! @@ -61,22 +61,22 @@ subroutine htilde_mu_mat_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot if(degree.gt.2) return if(degree == 0)then - call diag_htilde_mu_mat_bi_ortho(Nint, key_i, hmono, htwoe, htot) + call diag_htilde_mu_mat_bi_ortho_slow(Nint, key_i, hmono, htwoe, htot) else if (degree == 1)then - call single_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) + call single_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) else if(degree == 2)then - call double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) + call double_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) endif if(three_body_h_tc) then if(degree == 2) then - if(.not.double_normal_ord) then - call double_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree) + if(.not.double_normal_ord.and.elec_num.gt.2.and.three_e_5_idx_term) then + call double_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) endif - else if(degree == 1) then - call single_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree) - else if(degree == 0) then - call diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree) + else if(degree == 1.and.elec_num.gt.2.and.three_e_4_idx_term) then + call single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) + else if(degree == 0.and.elec_num.gt.2.and.three_e_3_idx_term) then + call diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree) endif endif @@ -89,7 +89,7 @@ end ! --- -subroutine diag_htilde_mu_mat_bi_ortho(Nint, key_i, hmono, htwoe, htot) +subroutine diag_htilde_mu_mat_bi_ortho_slow(Nint, key_i, hmono, htwoe, htot) BEGIN_DOC ! diagonal element of htilde ONLY FOR ONE- AND TWO-BODY TERMS @@ -188,7 +188,7 @@ end -subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) +subroutine double_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) BEGIN_DOC ! for double excitation ONLY FOR ONE- AND TWO-BODY TERMS @@ -227,18 +227,7 @@ subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) return endif -! if(core_tc_op)then -! print*,'core_tc_op not already taken into account for bi ortho' -! print*,'stopping ...' -! stop -! do i = 1, Nint -! key_i_core(i,1) = xor(key_i(i,1),core_bitmask(i,1)) -! key_i_core(i,2) = xor(key_i(i,2),core_bitmask(i,2)) -! enddo -! call bitstring_to_list_ab(key_i_core, occ, Ne, Nint) -! else call bitstring_to_list_ab(key_i, occ, Ne, Nint) -! endif call get_double_excitation(key_i, key_j, exc, phase, Nint) call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2) @@ -246,7 +235,7 @@ subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) ! opposite spin two-body ! key_j, key_i htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) - if(double_normal_ord.and.+Ne(1).gt.2)then + if(three_body_h_tc.and.double_normal_ord.and.+Ne(1).gt.2)then htwoe += normal_two_body_bi_orth(p2,h2,p1,h1)!!! WTF ??? endif else @@ -255,7 +244,7 @@ subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) ! exchange terms htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) - if(double_normal_ord.and.+Ne(1).gt.2)then + if(three_body_h_tc.and.double_normal_ord.and.+Ne(1).gt.2)then htwoe -= normal_two_body_bi_orth(h2,p1,h1,p2)!!! WTF ??? htwoe += normal_two_body_bi_orth(h1,p1,h2,p2)!!! WTF ??? endif @@ -266,7 +255,7 @@ subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) end -subroutine single_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot) +subroutine single_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) BEGIN_DOC ! for single excitation ONLY FOR ONE- AND TWO-BODY TERMS diff --git a/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f b/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f index 4ae44148..4c3c0788 100644 --- a/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f +++ b/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f @@ -11,10 +11,10 @@ allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),eigval_tmp(N_states)) dressing_dets = 0.d0 do i = 1, N_det - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) if(degree == 1 .or. degree == 2)then - call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,h0j(i)) + call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,h0j(i)) endif enddo reigvec_tc_bi_orth_tmp = 0.d0 @@ -29,7 +29,7 @@ vec_tmp(istate,istate) = 1.d0 enddo print*,'Diagonalizing the TC CISD ' - call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) + call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav_slow) do i = 1, N_det e_corr_dets(i) = reigvec_tc_bi_orth_tmp(i,1) * h0j(i)/reigvec_tc_bi_orth_tmp(1,1) enddo @@ -41,8 +41,8 @@ it = 0 dressing_dets = 0.d0 double precision, allocatable :: H_jj(:),vec_tmp(:,:),eigval_tmp(:) - external htc_bi_ortho_calc_tdav - external htcdag_bi_ortho_calc_tdav + external htc_bi_ortho_calc_tdav_slow + external htcdag_bi_ortho_calc_tdav_slow logical :: converged do while (dabs(E_before-E_current).gt.thr) it += 1 @@ -66,7 +66,7 @@ do istate = N_states+1, n_states_diag vec_tmp(istate,istate) = 1.d0 enddo - call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) + call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav_slow) print*,'outside Davidson' print*,'eigval_tmp(1) = ',eigval_tmp(1) do i = 1, N_det diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index a83d6cd0..db4c5e28 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -207,8 +207,6 @@ end else ! n_det > N_det_max_full double precision, allocatable :: H_jj(:),vec_tmp(:,:) - external htc_bi_ortho_calc_tdav - external htcdag_bi_ortho_calc_tdav external H_tc_u_0_opt external H_tc_dagger_u_0_opt external H_tc_s2_dagger_u_0_opt @@ -217,7 +215,7 @@ end allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag)) do i = 1, N_det - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) enddo print*,'---------------------------------' @@ -259,7 +257,6 @@ end do istate = N_states+1, n_states_diag vec_tmp(istate,istate) = 1.d0 enddo - !call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) !call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt) converged = .False. i_it = 0 diff --git a/src/tc_bi_ortho/tc_som.irp.f b/src/tc_bi_ortho/tc_som.irp.f index 291c52ef..a7e4d09e 100644 --- a/src/tc_bi_ortho/tc_som.irp.f +++ b/src/tc_bi_ortho/tc_som.irp.f @@ -56,8 +56,8 @@ subroutine main() U_SOM = 0.d0 do i = 1, N_det if(i == i_HF) cycle - call htilde_mu_mat_bi_ortho(psi_det(1,1,i_HF), psi_det(1,1,i), N_int, hmono_1, htwoe_1, hthree_1, htot_1) - call htilde_mu_mat_bi_ortho(psi_det(1,1,i), psi_det(1,1,i_HF), N_int, hmono_2, htwoe_2, hthree_2, htot_2) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i_HF), psi_det(1,1,i), N_int, hmono_1, htwoe_1, hthree_1, htot_1) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i), psi_det(1,1,i_HF), N_int, hmono_2, htwoe_2, hthree_2, htot_2) U_SOM += htot_1 * htot_2 enddo U_SOM = 0.5d0 * U_SOM diff --git a/src/tc_bi_ortho/tc_utils.irp.f b/src/tc_bi_ortho/tc_utils.irp.f index f8f648e8..24bb7017 100644 --- a/src/tc_bi_ortho/tc_utils.irp.f +++ b/src/tc_bi_ortho/tc_utils.irp.f @@ -12,7 +12,7 @@ subroutine write_tc_energy() do i = 1, N_det do j = 1, N_det !htot = htilde_matrix_elmt_bi_ortho(i,j) - call htilde_mu_mat_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) E_TC = E_TC + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(j,k) * htot !E_TC = E_TC + leigvec_tc_bi_orth(i,k) * reigvec_tc_bi_orth(j,k) * htot enddo @@ -45,7 +45,7 @@ subroutine write_tc_var() SIGMA_TC = 0.d0 do j = 2, N_det - call htilde_mu_mat_bi_ortho(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) SIGMA_TC = SIGMA_TC + htot * htot enddo diff --git a/src/tc_bi_ortho/test_normal_order.irp.f b/src/tc_bi_ortho/test_normal_order.irp.f index 118e481a..cb0c355c 100644 --- a/src/tc_bi_ortho/test_normal_order.irp.f +++ b/src/tc_bi_ortho/test_normal_order.irp.f @@ -35,7 +35,7 @@ subroutine test det_i = ref_bitmask call do_single_excitation(det_i,h1,p1,s1,i_ok) call do_single_excitation(det_i,h2,p2,s2,i_ok) - call htilde_mu_mat_bi_ortho(det_i,HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(det_i,HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) call get_excitation_degree(ref_bitmask,det_i,degree,N_int) call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) hthree *= phase @@ -67,7 +67,7 @@ do h1 = 1, elec_alpha_num if(i_ok.ne.1)cycle call do_single_excitation(det_i,h2,p2,s2,i_ok) if(i_ok.ne.1)cycle - call htilde_mu_mat_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) call get_excitation_degree(ref_bitmask,det_i,degree,N_int) call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) integer :: hh1, pp1, hh2, pp2, ss1, ss2 @@ -103,7 +103,7 @@ do h1 = 1, elec_beta_num if(i_ok.ne.1)cycle call do_single_excitation(det_i,h2,p2,s2,i_ok) if(i_ok.ne.1)cycle - call htilde_mu_mat_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + call htilde_mu_mat_bi_ortho_slow(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) call get_excitation_degree(ref_bitmask,det_i,degree,N_int) call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) call decode_exc(exc, 2, hh1, pp1, hh2, pp2, ss1, ss2) diff --git a/src/tc_bi_ortho/test_s2_tc.irp.f b/src/tc_bi_ortho/test_s2_tc.irp.f index 4debe2e2..1f7bdfda 100644 --- a/src/tc_bi_ortho/test_s2_tc.irp.f +++ b/src/tc_bi_ortho/test_s2_tc.irp.f @@ -91,7 +91,7 @@ subroutine routine_test_s2_davidson external H_tc_s2_u_0_opt allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),energies(n_states_diag), s2(n_states_diag)) do i = 1, N_det - call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) + call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) enddo ! Preparing the left-eigenvector print*,'Computing the left-eigenvector ' diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index 6721c285..df86ea65 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -31,7 +31,7 @@ subroutine test_h_u0 u_0(i) = psi_r_coef_bi_ortho(i,1) enddo call H_tc_u_0_nstates_openmp(v_0_new,u_0,N_states,N_det, do_right) - call htc_bi_ortho_calc_tdav (v_0_ref,u_0,N_states,N_det) + call htc_bi_ortho_calc_tdav_slow (v_0_ref,u_0,N_states,N_det) print*,'difference right ' accu = 0.d0 do i = 1, N_det @@ -42,7 +42,7 @@ subroutine test_h_u0 do_right = .False. v_0_new = 0.d0 call H_tc_u_0_nstates_openmp(v_0_new,u_0,N_states,N_det, do_right) - call htcdag_bi_ortho_calc_tdav(v_0_ref_dagger,u_0,N_states,N_det, do_right) + call htcdag_bi_ortho_calc_tdav_slow(v_0_ref_dagger,u_0,N_states,N_det, do_right) print*,'difference left' accu = 0.d0 do i = 1, N_det @@ -63,7 +63,7 @@ subroutine test_slater_tc_opt i_count = 0.d0 do i = 1, N_det do j = 1,N_det - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hnewmono, hnewtwoe, hnewthree, hnewtot) if(dabs(htot).gt.1.d-15)then i_count += 1.D0 @@ -99,7 +99,7 @@ subroutine timing_tot do j = 1, N_det ! call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int) i_count += 1.d0 - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) enddo enddo call wall_time(wall1) @@ -146,7 +146,7 @@ subroutine timing_diag do i = 1, N_det do j = i,i i_count += 1.d0 - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) enddo enddo call wall_time(wall1) @@ -183,7 +183,7 @@ subroutine timing_single if(degree.ne.1)cycle i_count += 1.d0 call wall_time(wall0) - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) call wall_time(wall1) accu += wall1 - wall0 enddo @@ -225,7 +225,7 @@ subroutine timing_double if(degree.ne.2)cycle i_count += 1.d0 call wall_time(wall0) - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) call wall_time(wall1) accu += wall1 - wall0 enddo diff --git a/src/tc_bi_ortho/test_tc_fock.irp.f b/src/tc_bi_ortho/test_tc_fock.irp.f index ebd43a7a..b7de067f 100644 --- a/src/tc_bi_ortho/test_tc_fock.irp.f +++ b/src/tc_bi_ortho/test_tc_fock.irp.f @@ -25,8 +25,7 @@ subroutine test_3e implicit none double precision :: integral_aaa,integral_aab,integral_abb,integral_bbb,accu double precision :: hmono, htwoe, hthree, htot - call htilde_mu_mat_bi_ortho(ref_bitmask, ref_bitmask, N_int, hmono, htwoe, hthree, htot) -! call diag_htilde_three_body_ints_bi_ort(N_int, ref_bitmask, hthree) + call htilde_mu_mat_bi_ortho_slow(ref_bitmask, ref_bitmask, N_int, hmono, htwoe, hthree, htot) print*,'hmono = ',hmono print*,'htwoe = ',htwoe print*,'hthree= ',hthree @@ -88,7 +87,7 @@ subroutine routine_3() print*, ' excited det' call debug_det(det_i, N_int) - call htilde_mu_mat_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) + call htilde_mu_mat_bi_ortho_slow(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) if(dabs(hthree).lt.1.d-10)cycle ref = hthree if(s1 == 1)then @@ -156,7 +155,7 @@ subroutine routine_tot() stop endif - call htilde_mu_mat_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) + call htilde_mu_mat_bi_ortho_slow(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) print*,htilde_ij if(dabs(htilde_ij).lt.1.d-10)cycle print*, ' excited det' From fd051ae020927be5e495dc3da3fa661ba55cd6ee Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 22 May 2023 18:39:48 +0200 Subject: [PATCH 124/337] some cleaning in slow and no slow tc routines --- src/cipsi_tc_bi_ortho/selection.irp.f | 5 ++--- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/cipsi_tc_bi_ortho/selection.irp.f b/src/cipsi_tc_bi_ortho/selection.irp.f index 4c271a4b..77377554 100644 --- a/src/cipsi_tc_bi_ortho/selection.irp.f +++ b/src/cipsi_tc_bi_ortho/selection.irp.f @@ -868,7 +868,6 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d ! and transpose ! ------------------------------------------- -! call htilde_mu_mat_bi_ortho_tot(det, det, N_int, Hii) double precision :: hmono, htwoe, hthree call diag_htilde_mu_mat_fock_bi_ortho(N_int, det, hmono, htwoe, hthree, hii) do istate = 1,N_states @@ -878,8 +877,8 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d psi_h_alpha = 0.d0 alpha_h_psi = 0.d0 do iii = 1, N_det_selectors - call htilde_mu_mat_bi_ortho_tot(psi_selectors(1,1,iii), det, N_int, i_h_alpha) - call htilde_mu_mat_bi_ortho_tot(det, psi_selectors(1,1,iii), N_int, alpha_h_i) + call htilde_mu_mat_bi_ortho_tot_slow(psi_selectors(1,1,iii), det, N_int, i_h_alpha) + call htilde_mu_mat_bi_ortho_tot_slow(det, psi_selectors(1,1,iii), N_int, alpha_h_i) call get_excitation_degree(psi_selectors(1,1,iii), det,degree,N_int) if(degree == 0)then print*,'problem !!!' diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index db4c5e28..fa946d6a 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -215,7 +215,7 @@ end allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag)) do i = 1, N_det - call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) + call htilde_mu_mat_opt_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) enddo print*,'---------------------------------' From 4d9cdf9df1d8e0d61006c1d348f28e96c0946464 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 24 May 2023 11:06:00 +0200 Subject: [PATCH 125/337] added new mu(r) jastrow --- src/non_h_ints_mu/jast_deriv.irp.f | 167 ++++++++++++++++++++++++++++- src/tc_keywords/EZFIO.cfg | 6 ++ 2 files changed, 172 insertions(+), 1 deletion(-) diff --git a/src/non_h_ints_mu/jast_deriv.irp.f b/src/non_h_ints_mu/jast_deriv.irp.f index cbd0b406..5e99600e 100644 --- a/src/non_h_ints_mu/jast_deriv.irp.f +++ b/src/non_h_ints_mu/jast_deriv.irp.f @@ -187,6 +187,19 @@ end function j12_mu subroutine grad1_j12_mu(r1, r2, grad) + BEGIN_DOC +! gradient of j(mu(r1,r2),r12) form of jastrow. +! +! if mu(r1,r2) = cst ---> j1b_type < 200 and +! +! d/dx1 j(mu,r12) = 0.5 * (1 - erf(mu *r12))/r12 * (x1 - x2) +! +! if mu(r1,r2) /= cst ---> 200 < j1b_type < 300 and +! +! d/dx1 j(mu(r1,r2),r12) = exp(-(mu(r1,r2)*r12)**2) /(2 *sqrt(pi) * mu(r1,r2)**2 ) d/dx1 mu(r1,r2) +! +! + 0.5 * (1 - erf(mu(r1,r2) *r12))/r12 * (x1 - x2) + END_DOC include 'constants.include.F' implicit none @@ -515,6 +528,9 @@ subroutine mu_r_val_and_grad(r1, r2, mu_val, mu_der) double precision :: r(3) double precision :: dm_a(1), dm_b(1), grad_dm_a(3,1), grad_dm_b(3,1) double precision :: dm_tot, tmp1, tmp2, tmp3 + double precision :: rho1, grad_rho1(3),rho2,rho_tot,inv_rho_tot + double precision :: f_rho1, f_rho2, d_drho_f_rho1 + double precision :: d_dx1_f_rho1(3),d_dx_rho_f_rho(3),nume if(j1b_type .eq. 200) then @@ -578,8 +594,84 @@ subroutine mu_r_val_and_grad(r1, r2, mu_val, mu_der) mu_der(2) = tmp3 * (grad_dm_a(2,1) + grad_dm_b(2,1)) mu_der(3) = tmp3 * (grad_dm_a(3,1) + grad_dm_b(3,1)) - else + elseif(j1b_type .eq. 202) then + ! mu(r1,r2) = {rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]} / RHO + ! + ! RHO = rho(r1) + rho(r2) + ! + ! f[rho] = alpha rho^beta + mu0 exp(-rho) + ! + ! d/dx1 mu(r1,r2) = 1/RHO^2 * {RHO * d/dx1 (rho(r1) f[rho(r1)]) + ! - d/dx1 rho(r1) * [rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]] } + ! + ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) - mu0 exp(-rho(r1))] (d rho(r1) / dx1) + ! + ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1) + + !!!!!!!!! rho1,rho2,rho1+rho2 + call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) + rho_tot = rho1 + rho2 + if(rho_tot.lt.1.d-10)rho_tot = 1.d-10 + inv_rho_tot = 1.d0/rho_tot + ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf * exp(-rho) + call get_all_f_rho(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2) + d_dx1_f_rho1(1:3) = d_drho_f_rho1 * grad_rho1(1:3) + d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3) + nume = rho1 * f_rho1 + rho2 * f_rho2 + mu_val = nume * inv_rho_tot + mu_der(1:3) = inv_rho_tot*inv_rho_tot * (rho_tot * d_dx_rho_f_rho(1:3) - grad_rho1(1:3) * nume) + elseif(j1b_type .eq. 203) then + + ! mu(r1,r2) = {rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]} / RHO + ! + ! RHO = rho(r1) + rho(r2) + ! + ! f[rho] = alpha rho^beta + mu0 + ! + ! d/dx1 mu(r1,r2) = 1/RHO^2 * {RHO * d/dx1 (rho(r1) f[rho(r1)]) + ! - d/dx1 rho(r1) * [rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]] } + ! + ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) ] (d rho(r1) / dx1) + ! + ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1) + + !!!!!!!!! rho1,rho2,rho1+rho2 + call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) + rho_tot = rho1 + rho2 + if(rho_tot.lt.1.d-10)rho_tot = 1.d-10 + inv_rho_tot = 1.d0/rho_tot + ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf + call get_all_f_rho_simple(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2) + d_dx1_f_rho1(1:3) = d_drho_f_rho1 * grad_rho1(1:3) + d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3) + nume = rho1 * f_rho1 + rho2 * f_rho2 + mu_val = nume * inv_rho_tot + mu_der(1:3) = inv_rho_tot*inv_rho_tot * (rho_tot * d_dx_rho_f_rho(1:3) - grad_rho1(1:3) * nume) + elseif(j1b_type .eq. 204) then + + ! mu(r1,r2) = 1/2 * (f[rho(r1)] + f[rho(r2)]} + ! + ! f[rho] = alpha rho^beta + mu0 + ! + ! d/dx1 mu(r1,r2) = 1/2 * d/dx1 (rho(r1) f[rho(r1)]) + ! + ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) ] (d rho(r1) / dx1) + ! + ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1) + + !!!!!!!!! rho1,rho2,rho1+rho2 + call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) + rho_tot = rho1 + rho2 + if(rho_tot.lt.1.d-10)rho_tot = 1.d-10 + inv_rho_tot = 1.d0/rho_tot + ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf + call get_all_f_rho_simple(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2) + d_dx1_f_rho1(1:3) = d_drho_f_rho1 * grad_rho1(1:3) + d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3) + mu_val = 0.5d0 * ( f_rho1 + f_rho2) + mu_der(1:3) = d_dx_rho_f_rho(1:3) + else print *, ' j1b_type = ', j1b_type, 'not implemented yet' stop @@ -684,3 +776,76 @@ end function j12_mu_square ! --- +subroutine f_mu_and_deriv_mu(rho,alpha,mu0,beta,f_mu,d_drho_f_mu) + implicit none + BEGIN_DOC +! function giving mu as a function of rho +! +! f_mu = alpha * rho**beta + mu0 * exp(-rho) +! +! and its derivative with respect to rho d_drho_f_mu + END_DOC + double precision, intent(in) :: rho,alpha,mu0,beta + double precision, intent(out) :: f_mu,d_drho_f_mu + f_mu = alpha * (rho)**beta + mu0 * dexp(-rho) + d_drho_f_mu = alpha * beta * rho**(beta-1.d0) - mu0 * dexp(-rho) + +end + + +subroutine get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) + implicit none + BEGIN_DOC +! returns the density in r1,r2 and grad_rho at r1 + END_DOC + double precision, intent(in) :: r1(3),r2(3) + double precision, intent(out):: grad_rho1(3),rho1,rho2 + double precision :: dm_a(1), dm_b(1), grad_dm_a(3,1), grad_dm_b(3,1) + call density_and_grad_alpha_beta(r1, dm_a, dm_b, grad_dm_a, grad_dm_b) + rho1 = dm_a(1) + dm_b(1) + grad_rho1(1:3) = grad_dm_a(1:3,1) + grad_dm_b(1:3,1) + call density_and_grad_alpha_beta(r2, dm_a, dm_b, grad_dm_a, grad_dm_b) + rho2 = dm_a(1) + dm_b(1) +end + +subroutine get_all_f_rho(rho1,rho2,alpha,mu0,beta,f_rho1,d_drho_f_rho1,f_rho2) + implicit none + BEGIN_DOC +! returns the values f(mu(r1)), f(mu(r2)) and d/drho(1) f(mu(r1)) + END_DOC + double precision, intent(in) :: rho1,rho2,alpha,mu0,beta + double precision, intent(out):: f_rho1,d_drho_f_rho1,f_rho2 + double precision :: tmp + call f_mu_and_deriv_mu(rho1,alpha,mu0,beta,f_rho1,d_drho_f_rho1) + call f_mu_and_deriv_mu(rho2,alpha,mu0,beta,f_rho2,tmp) +end + + +subroutine get_all_f_rho_simple(rho1,rho2,alpha,mu0,beta,f_rho1,d_drho_f_rho1,f_rho2) + implicit none + BEGIN_DOC +! returns the values f(mu(r1)), f(mu(r2)) and d/drho(1) f(mu(r1)) + END_DOC + double precision, intent(in) :: rho1,rho2,alpha,mu0,beta + double precision, intent(out):: f_rho1,d_drho_f_rho1,f_rho2 + double precision :: tmp + call f_mu_and_deriv_mu_simple(rho1,alpha,mu0,beta,f_rho1,d_drho_f_rho1) + call f_mu_and_deriv_mu_simple(rho2,alpha,mu0,beta,f_rho2,tmp) +end + +subroutine f_mu_and_deriv_mu_simple(rho,alpha,mu0,beta,f_mu,d_drho_f_mu) + implicit none + BEGIN_DOC +! function giving mu as a function of rho +! +! f_mu = alpha * rho**beta + mu0 +! +! and its derivative with respect to rho d_drho_f_mu + END_DOC + double precision, intent(in) :: rho,alpha,mu0,beta + double precision, intent(out) :: f_mu,d_drho_f_mu + f_mu = alpha * (rho)**beta + mu0 + d_drho_f_mu = alpha * beta * rho**(beta-1.d0) + +end + diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index 484bd1f0..62adb068 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -148,6 +148,12 @@ doc: a parameter used to define mu(r) interface: ezfio, provider, ocaml default: 6.203504908994001e-1 +[beta_rho_power] +type: double precision +doc: a parameter used to define mu(r) +interface: ezfio, provider, ocaml +default: 0.5 + [thr_degen_tc] type: Threshold doc: Threshold to determine if two orbitals are degenerate in TCSCF in order to avoid random quasi orthogonality between the right- and left-eigenvector for the same eigenvalue From 7e5f1ffc0c8fb9edc23e33f5be163b3d93ff124f Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 25 May 2023 12:57:58 +0200 Subject: [PATCH 126/337] added plot_mu_of_r.irp.f --- src/non_h_ints_mu/plot_mu_of_r.irp.f | 33 ++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 src/non_h_ints_mu/plot_mu_of_r.irp.f diff --git a/src/non_h_ints_mu/plot_mu_of_r.irp.f b/src/non_h_ints_mu/plot_mu_of_r.irp.f new file mode 100644 index 00000000..1100cd7c --- /dev/null +++ b/src/non_h_ints_mu/plot_mu_of_r.irp.f @@ -0,0 +1,33 @@ +program plot_mu_of_r + implicit none + read_wf = .False. + touch read_wf + call routine_print + +end + + +subroutine routine_print + implicit none + character*(128) :: output + integer :: i_unit_output,getUnitAndOpen + output=trim(ezfio_filename)//'.mu_of_r' + i_unit_output = getUnitAndOpen(output,'w') + integer :: ipoint,nx + double precision :: xmax,xmin,r(3),dx + double precision :: mu_val, mu_der(3),dm_a,dm_b,grad + xmax = 5.D0 + xmin = -5.D0 + nx = 10000 + dx = (xmax - xmin)/dble(nx) + r = 0.d0 + r(1) = xmin + do ipoint = 1, nx + call mu_r_val_and_grad(r, r, mu_val, mu_der) + call dm_dft_alpha_beta_at_r(r,dm_a,dm_b) + grad = mu_der(1)**2 + mu_der(2)**2 + mu_der(3)**2 + grad = dsqrt(grad) + write(i_unit_output,'(100(F16.7,X))')r(1),mu_val,dm_a+dm_b,grad + r(1) += dx + enddo +end From 86b48454127d011a0d361dd651a6ef6ef2b798d5 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Fri, 26 May 2023 08:10:18 +0200 Subject: [PATCH 127/337] IO TC normal ordering added --- src/tc_bi_ortho/normal_ordered.irp.f | 147 +++++++++++++++------------ src/tc_keywords/EZFIO.cfg | 6 ++ 2 files changed, 90 insertions(+), 63 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index 8adc7a63..f9728d05 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -1,3 +1,6 @@ + +! --- + BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC @@ -15,79 +18,97 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ integer(bit_kind), allocatable :: key_i_core(:,:) double precision :: hthree_aba,hthree_aaa,hthree_aab double precision :: wall0,wall1 - - PROVIDE N_int - allocate( occ(N_int*bit_kind_size,2) ) - allocate( key_i_core(N_int,2) ) - - if(core_tc_op) then - do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) - enddo - call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) - else - call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) - endif - - normal_two_body_bi_orth = 0.d0 - print*,'Providing normal_two_body_bi_orth ...' + print*,' Providing normal_two_body_bi_orth ...' call wall_time(wall0) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aba, hthree_aab, hthree_aaa) & - !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth) - !$OMP DO SCHEDULE (static) - do hh1 = 1, n_act_orb - h1 = list_act(hh1) - do pp1 = 1, n_act_orb - p1 = list_act(pp1) - do hh2 = 1, n_act_orb - h2 = list_act(hh2) - do pp2 = 1, n_act_orb - p2 = list_act(pp2) - ! all contributions from the 3-e terms to the double excitations - ! s1:(h1-->p1), s2:(h2-->p2) from the HF reference determinant - + if(read_tc_norm_ord) then - ! opposite spin double excitations : s1 /= s2 - call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aba) + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="read") + read(11) normal_two_body_bi_orth + close(11) - ! same spin double excitations : s1 == s2 - if(h1h2 - ! same spin double excitations with same spin contributions - if(Ne(2).ge.3)then - call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 - else - hthree_aaa = 0.d0 - endif - else - ! with opposite spin contributions - call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) - if(Ne(2).ge.3)then - ! same spin double excitations with same spin contributions - call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) - else - hthree_aaa = 0.d0 - endif - endif - normal_two_body_bi_orth(p2,h2,p1,h1) = 0.5d0*(hthree_aba + hthree_aab + hthree_aaa) + else + + PROVIDE N_int + + allocate( occ(N_int*bit_kind_size,2) ) + allocate( key_i_core(N_int,2) ) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) + else + call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) + endif + + normal_two_body_bi_orth = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aba, hthree_aab, hthree_aaa) & + !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth) + !$OMP DO SCHEDULE (static) + do hh1 = 1, n_act_orb + h1 = list_act(hh1) + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + do hh2 = 1, n_act_orb + h2 = list_act(hh2) + do pp2 = 1, n_act_orb + p2 = list_act(pp2) + ! all contributions from the 3-e terms to the double excitations + ! s1:(h1-->p1), s2:(h2-->p2) from the HF reference determinant + + + ! opposite spin double excitations : s1 /= s2 + call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aba) + + ! same spin double excitations : s1 == s2 + if(h1h2 + ! same spin double excitations with same spin contributions + if(Ne(2).ge.3)then + call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 + else + hthree_aaa = 0.d0 + endif + else + ! with opposite spin contributions + call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) + if(Ne(2).ge.3)then + ! same spin double excitations with same spin contributions + call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) + else + hthree_aaa = 0.d0 + endif + endif + normal_two_body_bi_orth(p2,h2,p1,h1) = 0.5d0*(hthree_aba + hthree_aab + hthree_aaa) + enddo enddo enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL + + deallocate( occ ) + deallocate( key_i_core ) + endif + + if(write_tc_norm_ord.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="write") + call ezfio_set_work_empty(.False.) + write(11) normal_two_body_bi_orth + close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') + endif call wall_time(wall1) - print*,'Wall time for normal_two_body_bi_orth ',wall1-wall0 - - deallocate( occ ) - deallocate( key_i_core ) + print*,' Wall time for normal_two_body_bi_orth ', wall1-wall0 END_PROVIDER diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index 3a26a6eb..de638da9 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -202,6 +202,12 @@ doc: Read/Write integrals int2_grad1_u12_ao, tc_grad_square_ao and tc_grad_and_l interface: ezfio,provider,ocaml default: None +[io_tc_norm_ord] +type: Disk_access +doc: Read/Write normal_two_body_bi_orth from/to disk [ Write | Read | None ] +interface: ezfio,provider,ocaml +default: None + [debug_tc_pt2] type: integer doc: If :: 1 then you compute the TC-PT2 the old way, :: 2 then you check with the new version but without three-body From f2ca86ef604b906c3c10032691ac5e0e0ff83b53 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 26 May 2023 11:48:08 +0200 Subject: [PATCH 128/337] Improved cholesky --- src/ao_two_e_ints/cholesky.irp.f | 25 +++++++++++++++++++++++-- src/cipsi/selection.irp.f | 3 +++ src/mo_two_e_ints/cholesky.irp.f | 1 + 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index d4c201aa..5aab12d9 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -51,8 +51,9 @@ END_PROVIDER double precision :: integral logical, external :: ao_two_e_integral_zero - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j,k,l, integral) SCHEDULE(dynamic) + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral) do l=1,ao_num + !$OMP DO SCHEDULE(dynamic) do j=1,l do k=1,ao_num do i=1,k @@ -65,8 +66,28 @@ END_PROVIDER enddo enddo enddo + !$OMP END DO NOWAIT enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL + + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral) + do l=1,ao_num + !$OMP DO SCHEDULE(dynamic) + do j=1,l + do k=1,ao_num + do i=1,k + if (ao_two_e_integral_zero(i,j,k,l)) cycle + integral = ao_two_e_integral(i,k,j,l) + ao_integrals(i,k,j,l) = integral + ao_integrals(k,i,j,l) = integral + ao_integrals(i,k,l,j) = integral + ao_integrals(k,i,l,j) = integral + enddo + enddo + enddo + !$OMP END DO NOWAIT + enddo + !$OMP END PARALLEL ! Call Lapack cholesky_ao_num = cholesky_ao_num_guess diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index 6f40a809..0705d103 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -76,6 +76,8 @@ subroutine select_connected(i_generator,E0,pt2_data,b,subset,csubset) double precision, allocatable :: fock_diag_tmp(:,:) + if (csubset == 0) return + allocate(fock_diag_tmp(2,mo_num+1)) call build_fock_tmp(fock_diag_tmp,psi_det_generators(1,1,i_generator),N_int) @@ -177,6 +179,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d monoAdo = .true. monoBdo = .true. + if (csubset == 0) return do k=1,N_int hole (k,1) = iand(psi_det_generators(k,1,i_generator), hole_mask(k,1)) diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 14d3c696..1706b2ec 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -6,6 +6,7 @@ BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_ao_num integer :: k + call set_multiple_levels_omp(.False.) !$OMP PARALLEL DO PRIVATE(k) do k=1,cholesky_ao_num call ao_to_mo(cholesky_ao(1,1,k),ao_num,cholesky_mo(1,1,k),mo_num) From b8bfab1d7cd8576c9597d92f70822d903628a6a6 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 27 May 2023 22:34:40 +0200 Subject: [PATCH 129/337] start working on NO --- src/tc_bi_ortho/normal_ordered.irp.f | 252 ++++++++++++++++----------- 1 file changed, 150 insertions(+), 102 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index 8adc7a63..c30cd1ef 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -1,3 +1,6 @@ + +! --- + BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC @@ -8,13 +11,16 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ implicit none - integer :: i,h1,p1,h2,p2 - integer :: hh1,hh2,pp1,pp2 + integer :: i, h1, p1, h2, p2 + integer :: hh1, hh2, pp1, pp2 integer :: Ne(2) + double precision :: hthree_aba, hthree_aaa, hthree_aab + double precision :: wall0, wall1 integer, allocatable :: occ(:,:) integer(bit_kind), allocatable :: key_i_core(:,:) - double precision :: hthree_aba,hthree_aaa,hthree_aab - double precision :: wall0,wall1 + + print*,' Providing normal_two_body_bi_orth ...' + call wall_time(wall0) PROVIDE N_int @@ -23,23 +29,21 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ if(core_tc_op) then do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) enddo - call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) else - call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) endif - normal_two_body_bi_orth = 0.d0 - print*,'Providing normal_two_body_bi_orth ...' - call wall_time(wall0) + normal_two_body_bi_orth(1:mo_num,1:mo_num,1:mo_num,1:mo_num) = 0.d0 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aba, hthree_aab, hthree_aaa) & - !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth) - !$OMP DO SCHEDULE (static) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aba, hthree_aab, hthree_aaa) & + !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth) + !$OMP DO SCHEDULE (static) do hh1 = 1, n_act_orb h1 = list_act(hh1) do pp1 = 1, n_act_orb @@ -48,50 +52,57 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ h2 = list_act(hh2) do pp2 = 1, n_act_orb p2 = list_act(pp2) + ! all contributions from the 3-e terms to the double excitations ! s1:(h1-->p1), s2:(h2-->p2) from the HF reference determinant - ! opposite spin double excitations : s1 /= s2 call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aba) ! same spin double excitations : s1 == s2 - if(h1h2 - ! same spin double excitations with same spin contributions - if(Ne(2).ge.3)then - call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 - else - hthree_aaa = 0.d0 - endif - else - ! with opposite spin contributions - call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) - if(Ne(2).ge.3)then + if((h1 .lt. h2) .and. (p1 .gt. p2)) then + + ! with opposite spin contributions + call give_aab_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aab) ! exchange h1<->h2 + ! same spin double excitations with same spin contributions - call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) - else - hthree_aaa = 0.d0 - endif + if(Ne(2) .ge. 3) then + call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 + else + hthree_aaa = 0.d0 + endif + + else + + ! with opposite spin contributions + call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) + + if(Ne(2) .ge. 3) then + ! same spin double excitations with same spin contributions + call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) + else + hthree_aaa = 0.d0 + endif + endif - normal_two_body_bi_orth(p2,h2,p1,h1) = 0.5d0*(hthree_aba + hthree_aab + hthree_aaa) + + normal_two_body_bi_orth(p2,h2,p1,h1) = 0.5d0 * (hthree_aba + hthree_aab + hthree_aaa) enddo enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print*,'Wall time for normal_two_body_bi_orth ',wall1-wall0 + !$OMP END DO + !$OMP END PARALLEL deallocate( occ ) deallocate( key_i_core ) + call wall_time(wall1) + print*,' Wall time for normal_two_body_bi_orth ', wall1-wall0 + END_PROVIDER - +! --- subroutine give_aba_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) @@ -106,30 +117,41 @@ subroutine give_aba_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) !!!! double alpha/beta hthree = 0.d0 + do ii = 1, Ne(2) ! purely closed shell part i = occ(ii,2) - call give_integrals_3_body_bi_ort(i ,p2,p1,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) int_direct = -1.d0 * integral - call give_integrals_3_body_bi_ort(p1,p2, i,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) int_exc_13 = -1.d0 * integral - call give_integrals_3_body_bi_ort(p2, i,p1,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) int_exc_12 = -1.d0 * integral - hthree += 2.d0 * int_direct - 1.d0 * ( int_exc_13 + int_exc_12) + + hthree += 2.d0 * int_direct - 1.d0 * (int_exc_13 + int_exc_12) enddo + do ii = Ne(2) + 1, Ne(1) ! purely open-shell part - i = occ(ii,1) - call give_integrals_3_body_bi_ort(i ,p2,p1,i,h2,h1,integral) + i = occ(ii,1) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) int_direct = -1.d0 * integral - call give_integrals_3_body_bi_ort(p1,p2, i,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) int_exc_13 = -1.d0 * integral - call give_integrals_3_body_bi_ort(p2, i,p1,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) int_exc_12 = -1.d0 * integral - hthree += 1.d0 * int_direct - 0.5d0* ( int_exc_13 + int_exc_12) + + hthree += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) enddo -end subroutine give_aba_contraction - + return +end +! --- BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_ab, (mo_num, mo_num, mo_num, mo_num)] @@ -152,29 +174,31 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_ab, (mo_num, mo_num, allocate( key_i_core(N_int,2) ) allocate( occ(N_int*bit_kind_size,2) ) - if(core_tc_op)then - do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) - enddo - call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) else - call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) + call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) endif + normal_two_body_bi_orth_ab = 0.d0 do hh1 = 1, n_act_orb - h1 = list_act(hh1) - do pp1 = 1, n_act_orb - p1 = list_act(pp1) - do hh2 = 1, n_act_orb - h2 = list_act(hh2) - do pp2 = 1, n_act_orb - p2 = list_act(pp2) - call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree) - normal_two_body_bi_orth_ab(p2,h2,p1,h1) = hthree - enddo + h1 = list_act(hh1) + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + do hh2 = 1, n_act_orb + h2 = list_act(hh2) + do pp2 = 1, n_act_orb + p2 = list_act(pp2) + call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree) + + normal_two_body_bi_orth_ab(p2,h2,p1,h1) = hthree + enddo + enddo enddo - enddo enddo deallocate( key_i_core ) @@ -182,7 +206,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_ab, (mo_num, mo_num, END_PROVIDER - +! --- BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_aa_bb, (n_act_orb, n_act_orb, n_act_orb, n_act_orb)] @@ -250,13 +274,14 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_aa_bb, (n_act_orb, n_ END_PROVIDER - +! --- subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) BEGIN_DOC -! pure same spin contribution to same spin double excitation s1=h1,p1, s2=h2,p2, with s1==s2 + ! pure same spin contribution to same spin double excitation s1=h1,p1, s2=h2,p2, with s1==s2 END_DOC + use bitmasks ! you need to include the bitmasks_module.f90 features implicit none @@ -270,48 +295,64 @@ subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) hthree = 0.d0 do ii = 1, Ne(2) ! purely closed shell part i = occ(ii,2) - call give_integrals_3_body_bi_ort(i ,p2,p1,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) int_direct = -1.d0 * integral - call give_integrals_3_body_bi_ort(p2,p1,i ,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(p2, p1, i, i, h2, h1, integral) int_exc_l = -1.d0 * integral - call give_integrals_3_body_bi_ort(p1,i ,p2,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) int_exc_ll= -1.d0 * integral - call give_integrals_3_body_bi_ort(p2,i ,p1,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) int_exc_12= -1.d0 * integral - call give_integrals_3_body_bi_ort(p1,p2, i,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) int_exc_13= -1.d0 * integral - call give_integrals_3_body_bi_ort(i ,p1,p2,i,h2,h1,integral) + + call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) int_exc_23= -1.d0 * integral - hthree += 1.d0 * int_direct + int_exc_l + int_exc_ll -( int_exc_12+ int_exc_13+ int_exc_23 ) + hthree += 1.d0 * int_direct + int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23) enddo + do ii = Ne(2)+1,Ne(1) ! purely open-shell part i = occ(ii,1) - call give_integrals_3_body_bi_ort(i ,p2,p1,i,h2,h1,integral) - int_direct = -1.d0 * integral - call give_integrals_3_body_bi_ort(p2,p1,i ,i,h2,h1,integral) - int_exc_l = -1.d0 * integral - call give_integrals_3_body_bi_ort(p1,i ,p2,i,h2,h1,integral) - int_exc_ll= -1.d0 * integral - call give_integrals_3_body_bi_ort(p2,i ,p1,i,h2,h1,integral) - int_exc_12= -1.d0 * integral - call give_integrals_3_body_bi_ort(p1,p2, i,i,h2,h1,integral) - int_exc_13= -1.d0 * integral - call give_integrals_3_body_bi_ort(i ,p1,p2,i,h2,h1,integral) - int_exc_23= -1.d0 * integral - hthree += 1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll -( int_exc_12+ int_exc_13+ int_exc_23 )) + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, p1, i , i, h2, h1, integral) + int_exc_l = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) + int_exc_ll = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) + int_exc_12 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) + int_exc_13 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) + int_exc_23 = -1.d0 * integral + + hthree += 1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) enddo -end subroutine give_aaa_contraction - + return +end +! --- subroutine give_aab_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) - implicit none + use bitmasks ! you need to include the bitmasks_module.f90 features - integer, intent(in) :: Nint, h1, h2, p1, p2 - integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) + + implicit none + integer, intent(in) :: Nint, h1, h2, p1, p2 + integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) double precision, intent(out) :: hthree integer :: ii, i double precision :: int_direct, int_exc_12, int_exc_13, int_exc_23 @@ -320,11 +361,18 @@ subroutine give_aab_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) hthree = 0.d0 do ii = 1, Ne(2) ! purely closed shell part i = occ(ii,2) - call give_integrals_3_body_bi_ort(p2,p1,i,h2,h1,i,integral) + + call give_integrals_3_body_bi_ort(p2, p1, i, h2, h1, i, integral) int_direct = -1.d0 * integral - call give_integrals_3_body_bi_ort(p1,p2,i,h2,h1,i,integral) + + call give_integrals_3_body_bi_ort(p1, p2, i, h2, h1, i, integral) int_exc_23= -1.d0 * integral - hthree += 1.d0 * int_direct - int_exc_23 + + hthree += 1.d0 * int_direct - int_exc_23 enddo -end subroutine give_aab_contraction + return +end + +! --- + From f0ad63966adf94b1bbe794186a0f35d07c744013 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 30 May 2023 13:48:34 +0200 Subject: [PATCH 130/337] Fixes for numerical orbitals in qp_import --- ocaml/Input_ao_basis.ml | 47 ++++---- ocaml/Input_mo_basis.ml | 5 +- scripts/qp_import_trexio.py | 146 +++++++++++++++-------- src/trexio/import_trexio_integrals.irp.f | 136 ++++++++++++++++----- 4 files changed, 236 insertions(+), 98 deletions(-) diff --git a/ocaml/Input_ao_basis.ml b/ocaml/Input_ao_basis.ml index 841089ea..506cf069 100644 --- a/ocaml/Input_ao_basis.ml +++ b/ocaml/Input_ao_basis.ml @@ -44,8 +44,12 @@ end = struct let get_default = Qpackage.get_ezfio_default "ao_basis";; let read_ao_basis () = - Ezfio.get_ao_basis_ao_basis () - |> AO_basis_name.of_string + let result = + Ezfio.get_ao_basis_ao_basis () + in + if result <> "None" then + AO_basis_name.of_string result + else failwith "No basis" ;; let read_ao_num () = @@ -192,7 +196,7 @@ end = struct ao_expo ; ao_cartesian ; ao_normalized ; - primitives_normalized ; + primitives_normalized ; } = b in write_md5 b ; @@ -207,7 +211,7 @@ end = struct Ezfio.set_ao_basis_ao_prim_num (Ezfio.ezfio_array_of_list ~rank:1 ~dim:[| ao_num |] ~data:ao_prim_num) ; - let ao_nucl = + let ao_nucl = Array.to_list ao_nucl |> list_map Nucl_number.to_int in @@ -215,7 +219,7 @@ end = struct ~rank:1 ~dim:[| ao_num |] ~data:ao_nucl) ; let ao_power = - let l = Array.to_list ao_power in + let l = Array.to_list ao_power in List.concat [ (list_map (fun a -> Positive_int.to_int a.Angmom.Xyz.x) l) ; (list_map (fun a -> Positive_int.to_int a.Angmom.Xyz.y) l) ; @@ -227,7 +231,7 @@ end = struct Ezfio.set_ao_basis_ao_cartesian(ao_cartesian); Ezfio.set_ao_basis_ao_normalized(ao_normalized); Ezfio.set_ao_basis_primitives_normalized(primitives_normalized); - + let ao_coef = Array.to_list ao_coef |> list_map AO_coef.to_float @@ -267,7 +271,10 @@ end = struct |> Ezfio.set_ao_basis_ao_md5 ; Some result with - | _ -> (Ezfio.set_ao_basis_ao_md5 "None" ; None) + | _ -> ( "None" + |> Digest.string + |> Digest.to_hex + |> Ezfio.set_ao_basis_ao_md5 ; None) ;; @@ -276,7 +283,7 @@ end = struct to_basis b |> Long_basis.of_basis |> Array.of_list - and unordered_basis = + and unordered_basis = to_long_basis b |> Array.of_list in @@ -289,15 +296,15 @@ end = struct (a.(i) <- None ; i) else find x a (i+1) - and find2 (s,g,n) a i = + and find2 (s,g,n) a i = if i = Array.length a then -1 else - match a.(i) with + match a.(i) with | None -> find2 (s,g,n) a (i+1) | Some (s', g', n') -> if s <> s' || n <> n' then find2 (s,g,n) a (i+1) else - let lc = list_map (fun (prim, _) -> prim) g.Gto.lc + let lc = list_map (fun (prim, _) -> prim) g.Gto.lc and lc' = list_map (fun (prim, _) -> prim) g'.Gto.lc in if lc <> lc' then find2 (s,g,n) a (i+1) else (a.(i) <- None ; i) @@ -313,13 +320,13 @@ end = struct let ao_num = List.length long_basis |> AO_number.of_int in let ao_prim_num = list_map (fun (_,g,_) -> List.length g.Gto.lc - |> AO_prim_number.of_int ) long_basis + |> AO_prim_number.of_int ) long_basis |> Array.of_list and ao_nucl = - list_map (fun (_,_,n) -> n) long_basis + list_map (fun (_,_,n) -> n) long_basis |> Array.of_list and ao_power = - list_map (fun (x,_,_) -> x) long_basis + list_map (fun (x,_,_) -> x) long_basis |> Array.of_list in let ao_prim_num_max = Array.fold_left (fun s x -> @@ -329,16 +336,16 @@ end = struct in let gtos = - list_map (fun (_,x,_) -> x) long_basis + list_map (fun (_,x,_) -> x) long_basis in let create_expo_coef ec = let coefs = begin match ec with | `Coefs -> list_map (fun x-> - list_map (fun (_,coef) -> AO_coef.to_float coef) x.Gto.lc ) gtos + list_map (fun (_,coef) -> AO_coef.to_float coef) x.Gto.lc ) gtos | `Expos -> list_map (fun x-> list_map (fun (prim,_) -> AO_expo.to_float - prim.GaussianPrimitive.expo) x.Gto.lc ) gtos + prim.GaussianPrimitive.expo) x.Gto.lc ) gtos end in let rec get_n n accu = function @@ -360,7 +367,7 @@ end = struct let ao_coef = create_expo_coef `Coefs |> Array.of_list |> Array.map AO_coef.of_float - and ao_expo = create_expo_coef `Expos + and ao_expo = create_expo_coef `Expos |> Array.of_list |> Array.map AO_expo.of_float in @@ -372,7 +379,7 @@ end = struct } ;; - let reorder b = + let reorder b = let order = ordering b in let f a = Array.init (Array.length a) (fun i -> a.(order.(i))) in let ao_prim_num_max = AO_prim_number.to_int b.ao_prim_num_max @@ -464,7 +471,7 @@ Basis set (read-only) :: | line :: tail -> let line = String.trim line in if line = "Basis set (read-only) ::" then - String.concat "\n" tail + String.concat "\n" tail else extract_basis tail in diff --git a/ocaml/Input_mo_basis.ml b/ocaml/Input_mo_basis.ml index a4e6176a..832b464e 100644 --- a/ocaml/Input_mo_basis.ml +++ b/ocaml/Input_mo_basis.ml @@ -56,7 +56,10 @@ end = struct let read_ao_md5 () = let ao_md5 = match (Input_ao_basis.Ao_basis.read ()) with - | None -> failwith "Unable to read AO basis" + | None -> ("None" + |> Digest.string + |> Digest.to_hex + |> MD5.of_string) | Some result -> Input_ao_basis.Ao_basis.to_md5 result in let result = diff --git a/scripts/qp_import_trexio.py b/scripts/qp_import_trexio.py index eb19e16b..e7bc0f78 100755 --- a/scripts/qp_import_trexio.py +++ b/scripts/qp_import_trexio.py @@ -132,60 +132,113 @@ def write_ezfio(trexio_filename, filename): try: basis_type = trexio.read_basis_type(trexio_file) - if basis_type.lower() not in ["gaussian", "slater"]: - raise TypeError + if basis_type.lower() in ["gaussian", "slater"]: + shell_num = trexio.read_basis_shell_num(trexio_file) + prim_num = trexio.read_basis_prim_num(trexio_file) + ang_mom = trexio.read_basis_shell_ang_mom(trexio_file) + nucl_index = trexio.read_basis_nucleus_index(trexio_file) + exponent = trexio.read_basis_exponent(trexio_file) + coefficient = trexio.read_basis_coefficient(trexio_file) + shell_index = trexio.read_basis_shell_index(trexio_file) + ao_shell = trexio.read_ao_shell(trexio_file) - shell_num = trexio.read_basis_shell_num(trexio_file) - prim_num = trexio.read_basis_prim_num(trexio_file) - ang_mom = trexio.read_basis_shell_ang_mom(trexio_file) - nucl_index = trexio.read_basis_nucleus_index(trexio_file) - exponent = trexio.read_basis_exponent(trexio_file) - coefficient = trexio.read_basis_coefficient(trexio_file) - shell_index = trexio.read_basis_shell_index(trexio_file) - ao_shell = trexio.read_ao_shell(trexio_file) + ezfio.set_basis_basis("Read from TREXIO") + ezfio.set_ao_basis_ao_basis("Read from TREXIO") + ezfio.set_basis_shell_num(shell_num) + ezfio.set_basis_prim_num(prim_num) + ezfio.set_basis_shell_ang_mom(ang_mom) + ezfio.set_basis_basis_nucleus_index([ x+1 for x in nucl_index ]) + ezfio.set_basis_prim_expo(exponent) + ezfio.set_basis_prim_coef(coefficient) - ezfio.set_basis_basis("Read from TREXIO") - ezfio.set_basis_shell_num(shell_num) - ezfio.set_basis_prim_num(prim_num) - ezfio.set_basis_shell_ang_mom(ang_mom) - ezfio.set_basis_basis_nucleus_index([ x+1 for x in nucl_index ]) - ezfio.set_basis_prim_expo(exponent) - ezfio.set_basis_prim_coef(coefficient) + nucl_shell_num = [] + prev = None + m = 0 + for i in ao_shell: + if i != prev: + m += 1 + if prev is None or nucl_index[i] != nucl_index[prev]: + nucl_shell_num.append(m) + m = 0 + prev = i + assert (len(nucl_shell_num) == nucl_num) - nucl_shell_num = [] - prev = None - m = 0 - for i in ao_shell: - if i != prev: - m += 1 - if prev is None or nucl_index[i] != nucl_index[prev]: - nucl_shell_num.append(m) - m = 0 - prev = i - assert (len(nucl_shell_num) == nucl_num) + shell_prim_num = [] + prev = shell_index[0] + count = 0 + for i in shell_index: + if i != prev: + shell_prim_num.append(count) + count = 0 + count += 1 + prev = i + shell_prim_num.append(count) - shell_prim_num = [] - prev = shell_index[0] - count = 0 - for i in shell_index: - if i != prev: - shell_prim_num.append(count) - count = 0 - count += 1 - prev = i - shell_prim_num.append(count) + assert (len(shell_prim_num) == shell_num) - assert (len(shell_prim_num) == shell_num) - - ezfio.set_basis_shell_prim_num(shell_prim_num) - ezfio.set_basis_shell_index([x+1 for x in shell_index]) - ezfio.set_basis_nucleus_shell_num(nucl_shell_num) + ezfio.set_basis_shell_prim_num(shell_prim_num) + ezfio.set_basis_shell_index([x+1 for x in shell_index]) + ezfio.set_basis_nucleus_shell_num(nucl_shell_num) - shell_factor = trexio.read_basis_shell_factor(trexio_file) - prim_factor = trexio.read_basis_prim_factor(trexio_file) + shell_factor = trexio.read_basis_shell_factor(trexio_file) + prim_factor = trexio.read_basis_prim_factor(trexio_file) - print("OK") + elif basis_type.lower() == "numerical": + + shell_num = trexio.read_basis_shell_num(trexio_file) + prim_num = shell_num + ang_mom = trexio.read_basis_shell_ang_mom(trexio_file) + nucl_index = trexio.read_basis_nucleus_index(trexio_file) + exponent = [1.]*prim_num + coefficient = [1.]*prim_num + shell_index = [i for i in range(shell_num)] + ao_shell = trexio.read_ao_shell(trexio_file) + + ezfio.set_basis_basis("None") + ezfio.set_ao_basis_ao_basis("None") + ezfio.set_basis_shell_num(shell_num) + ezfio.set_basis_prim_num(prim_num) + ezfio.set_basis_shell_ang_mom(ang_mom) + ezfio.set_basis_basis_nucleus_index([ x+1 for x in nucl_index ]) + ezfio.set_basis_prim_expo(exponent) + ezfio.set_basis_prim_coef(coefficient) + + nucl_shell_num = [] + prev = None + m = 0 + for i in ao_shell: + if i != prev: + m += 1 + if prev is None or nucl_index[i] != nucl_index[prev]: + nucl_shell_num.append(m) + m = 0 + prev = i + assert (len(nucl_shell_num) == nucl_num) + + shell_prim_num = [] + prev = shell_index[0] + count = 0 + for i in shell_index: + if i != prev: + shell_prim_num.append(count) + count = 0 + count += 1 + prev = i + shell_prim_num.append(count) + + assert (len(shell_prim_num) == shell_num) + + ezfio.set_basis_shell_prim_num(shell_prim_num) + ezfio.set_basis_shell_index([x+1 for x in shell_index]) + ezfio.set_basis_nucleus_shell_num(nucl_shell_num) + + shell_factor = trexio.read_basis_shell_factor(trexio_file) + prim_factor = [1.]*prim_num + else: + raise TypeError + + print(basis_type) except: print("None") ezfio.set_ao_basis_ao_cartesian(True) @@ -262,7 +315,6 @@ def write_ezfio(trexio_filename, filename): # ezfio.set_ao_basis_ao_prim_num_max(prim_num_max) ezfio.set_ao_basis_ao_coef(coef) ezfio.set_ao_basis_ao_expo(expo) - ezfio.set_ao_basis_ao_basis("Read from TREXIO") print("OK") diff --git a/src/trexio/import_trexio_integrals.irp.f b/src/trexio/import_trexio_integrals.irp.f index 9f9ad9d6..8c6b79d7 100644 --- a/src/trexio/import_trexio_integrals.irp.f +++ b/src/trexio/import_trexio_integrals.irp.f @@ -3,6 +3,7 @@ program import_integrals_ao implicit none integer(trexio_t) :: f ! TREXIO file handle integer(trexio_exit_code) :: rc + PROVIDE mo_num f = trexio_open(trexio_filename, 'r', TREXIO_AUTO, rc) if (f == 0_8) then @@ -42,10 +43,10 @@ subroutine run(f) if (trexio_has_nucleus_repulsion(f) == TREXIO_SUCCESS) then rc = trexio_read_nucleus_repulsion(f, s) - call trexio_assert(rc, TREXIO_SUCCESS) if (rc /= TREXIO_SUCCESS) then print *, irp_here, rc print *, 'Error reading nuclear repulsion' + call trexio_assert(rc, TREXIO_SUCCESS) stop -1 endif call ezfio_set_nuclei_nuclear_repulsion(s) @@ -63,6 +64,7 @@ subroutine run(f) if (rc /= TREXIO_SUCCESS) then print *, irp_here print *, 'Error reading AO overlap' + call trexio_assert(rc, TREXIO_SUCCESS) stop -1 endif call ezfio_set_ao_one_e_ints_ao_integrals_overlap(A) @@ -74,6 +76,7 @@ subroutine run(f) if (rc /= TREXIO_SUCCESS) then print *, irp_here print *, 'Error reading AO kinetic integrals' + call trexio_assert(rc, TREXIO_SUCCESS) stop -1 endif call ezfio_set_ao_one_e_ints_ao_integrals_kinetic(A) @@ -85,6 +88,7 @@ subroutine run(f) ! if (rc /= TREXIO_SUCCESS) then ! print *, irp_here ! print *, 'Error reading AO ECP local integrals' +! call trexio_assert(rc, TREXIO_SUCCESS) ! stop -1 ! endif ! call ezfio_set_ao_one_e_ints_ao_integrals_pseudo(A) @@ -96,6 +100,7 @@ subroutine run(f) if (rc /= TREXIO_SUCCESS) then print *, irp_here print *, 'Error reading AO potential N-e integrals' + call trexio_assert(rc, TREXIO_SUCCESS) stop -1 endif call ezfio_set_ao_one_e_ints_ao_integrals_n_e(A) @@ -106,41 +111,112 @@ subroutine run(f) ! AO 2e integrals ! --------------- - PROVIDE ao_integrals_map - integer*4 :: BUFSIZE - BUFSIZE=ao_num**2 - allocate(buffer_i(BUFSIZE), buffer_values(BUFSIZE)) - allocate(Vi(4,BUFSIZE), V(BUFSIZE)) + rc = trexio_has_ao_2e_int(f) + PROVIDE ao_num + if (rc /= TREXIO_HAS_NOT) then + PROVIDE ao_integrals_map - integer*8 :: offset, icount + integer*4 :: BUFSIZE + BUFSIZE=ao_num**2 + allocate(buffer_i(BUFSIZE), buffer_values(BUFSIZE)) + allocate(Vi(4,BUFSIZE), V(BUFSIZE)) - offset = 0_8 - icount = BUFSIZE - rc = TREXIO_SUCCESS - do while (icount == size(V)) - rc = trexio_read_ao_2e_int_eri(f, offset, icount, Vi, V) - do m=1,icount - i = Vi(1,m) - j = Vi(2,m) - k = Vi(3,m) - l = Vi(4,m) - integral = V(m) - call two_e_integrals_index(i, j, k, l, buffer_i(m) ) - buffer_values(m) = integral - enddo - call insert_into_ao_integrals_map(int(icount,4),buffer_i,buffer_values) - offset = offset + icount + integer*8 :: offset, icount + + offset = 0_8 + icount = BUFSIZE + rc = TREXIO_SUCCESS + do while (icount == size(V)) + rc = trexio_read_ao_2e_int_eri(f, offset, icount, Vi, V) + do m=1,icount + i = Vi(1,m) + j = Vi(2,m) + k = Vi(3,m) + l = Vi(4,m) + integral = V(m) + call two_e_integrals_index(i, j, k, l, buffer_i(m) ) + buffer_values(m) = integral + enddo + call insert_into_ao_integrals_map(int(icount,4),buffer_i,buffer_values) + offset = offset + icount + if (rc /= TREXIO_SUCCESS) then + exit + endif + end do + n_integrals = offset + + call map_sort(ao_integrals_map) + call map_unique(ao_integrals_map) + + call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map) + call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals('Read') + + deallocate(buffer_i, buffer_values, Vi, V) + print *, 'AO integrals read from TREXIO file' + else + print *, 'AO integrals not found in TREXIO file' + endif + + ! MO integrals + ! ------------ + + allocate(A(mo_num, mo_num)) + if (trexio_has_mo_1e_int_core_hamiltonian(f) == TREXIO_SUCCESS) then + rc = trexio_read_mo_1e_int_core_hamiltonian(f, A) if (rc /= TREXIO_SUCCESS) then - exit + print *, irp_here + print *, 'Error reading MO 1e integrals' + call trexio_assert(rc, TREXIO_SUCCESS) + stop -1 endif - end do - n_integrals = offset + call ezfio_set_mo_one_e_ints_mo_one_e_integrals(A) + call ezfio_set_mo_one_e_ints_io_mo_one_e_integrals('Read') + endif + deallocate(A) - call map_sort(ao_integrals_map) - call map_unique(ao_integrals_map) + ! MO 2e integrals + ! --------------- - call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints',ao_integrals_map) - call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals('Read') + rc = trexio_has_mo_2e_int(f) + if (rc /= TREXIO_HAS_NOT) then + + BUFSIZE=mo_num**2 + allocate(buffer_i(BUFSIZE), buffer_values(BUFSIZE)) + allocate(Vi(4,BUFSIZE), V(BUFSIZE)) + + + offset = 0_8 + icount = BUFSIZE + rc = TREXIO_SUCCESS + do while (icount == size(V)) + rc = trexio_read_mo_2e_int_eri(f, offset, icount, Vi, V) + do m=1,icount + i = Vi(1,m) + j = Vi(2,m) + k = Vi(3,m) + l = Vi(4,m) + integral = V(m) + call two_e_integrals_index(i, j, k, l, buffer_i(m) ) + buffer_values(m) = integral + enddo + call map_append(mo_integrals_map, buffer_i, buffer_values, int(icount,4)) + offset = offset + icount + if (rc /= TREXIO_SUCCESS) then + exit + endif + end do + n_integrals = offset + + call map_sort(mo_integrals_map) + call map_unique(mo_integrals_map) + + call map_save_to_disk(trim(ezfio_filename)//'/work/mo_ints',mo_integrals_map) + call ezfio_set_mo_two_e_ints_io_mo_two_e_integrals('Read') + deallocate(buffer_i, buffer_values, Vi, V) + print *, 'MO integrals read from TREXIO file' + else + print *, 'MO integrals not found in TREXIO file' + endif end From ff5d62f840ee0c685120df72724d6ba049a07037 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 31 May 2023 11:01:51 +0200 Subject: [PATCH 131/337] Fix normalization factor in trexio --- src/trexio/EZFIO.cfg | 30 ++-- src/trexio/export_trexio.irp.f | 2 +- src/trexio/export_trexio_routines.irp.f | 176 +++++++++++++----------- 3 files changed, 119 insertions(+), 89 deletions(-) diff --git a/src/trexio/EZFIO.cfg b/src/trexio/EZFIO.cfg index 8606e908..8c11478e 100644 --- a/src/trexio/EZFIO.cfg +++ b/src/trexio/EZFIO.cfg @@ -10,11 +10,17 @@ doc: Name of the exported TREXIO file interface: ezfio, ocaml, provider default: None -[export_rdm] +[export_basis] type: logical -doc: If True, export two-body reduced density matrix +doc: If True, export basis set and AOs interface: ezfio, ocaml, provider -default: False +default: True + +[export_mos] +type: logical +doc: If True, export basis set and AOs +interface: ezfio, ocaml, provider +default: True [export_ao_one_e_ints] type: logical @@ -22,12 +28,6 @@ doc: If True, export one-electron integrals in AO basis interface: ezfio, ocaml, provider default: False -[export_mo_one_e_ints] -type: logical -doc: If True, export one-electron integrals in MO basis -interface: ezfio, ocaml, provider -default: False - [export_ao_two_e_ints] type: logical doc: If True, export two-electron integrals in AO basis @@ -40,6 +40,12 @@ doc: If True, export Cholesky-decomposed two-electron integrals in AO basis interface: ezfio, ocaml, provider default: False +[export_mo_one_e_ints] +type: logical +doc: If True, export one-electron integrals in MO basis +interface: ezfio, ocaml, provider +default: False + [export_mo_two_e_ints] type: logical doc: If True, export two-electron integrals in MO basis @@ -52,3 +58,9 @@ doc: If True, export Cholesky-decomposed two-electron integrals in MO basis interface: ezfio, ocaml, provider default: False +[export_rdm] +type: logical +doc: If True, export two-body reduced density matrix +interface: ezfio, ocaml, provider +default: False + diff --git a/src/trexio/export_trexio.irp.f b/src/trexio/export_trexio.irp.f index 3ae0dcb4..f9ecc17f 100644 --- a/src/trexio/export_trexio.irp.f +++ b/src/trexio/export_trexio.irp.f @@ -2,6 +2,6 @@ program export_trexio_prog implicit none read_wf = .True. SOFT_TOUCH read_wf - call export_trexio + call export_trexio(.False.) end diff --git a/src/trexio/export_trexio_routines.irp.f b/src/trexio/export_trexio_routines.irp.f index c55ddc5e..f25ae370 100644 --- a/src/trexio/export_trexio_routines.irp.f +++ b/src/trexio/export_trexio_routines.irp.f @@ -1,15 +1,17 @@ -subroutine export_trexio +subroutine export_trexio(update) use trexio implicit none BEGIN_DOC ! Exports the wave function in TREXIO format END_DOC + logical, intent(in) :: update integer(trexio_t) :: f(N_states) ! TREXIO file handle integer(trexio_exit_code) :: rc integer :: k double precision, allocatable :: factor(:) character*(256) :: filenames(N_states) + character :: rw filenames(1) = trexio_filename do k=2,N_states @@ -18,15 +20,26 @@ subroutine export_trexio do k=1,N_states print *, 'TREXIO file : ', trim(filenames(k)) - call system('test -f '//trim(filenames(k))//' && mv '//trim(filenames(k))//' '//trim(filenames(k))//'.bak') + if (update) then + call system('test -f '//trim(filenames(k))//' && cp -r '//trim(filenames(k))//' '//trim(filenames(k))//'.bak') + else + call system('test -f '//trim(filenames(k))//' && mv '//trim(filenames(k))//' '//trim(filenames(k))//'.bak') + endif enddo print *, '' + if (update) then + rw = 'u' + else + rw = 'w' + endif + + do k=1,N_states if (backend == 0) then - f(k) = trexio_open(filenames(k), 'u', TREXIO_HDF5, rc) + f(k) = trexio_open(filenames(k), rw, TREXIO_HDF5, rc) else if (backend == 1) then - f(k) = trexio_open(filenames(k), 'u', TREXIO_TEXT, rc) + f(k) = trexio_open(filenames(k), rw, TREXIO_TEXT, rc) endif if (f(k) == 0_8) then print *, 'Unable to open TREXIO file for writing' @@ -171,92 +184,95 @@ subroutine export_trexio endif + if (export_basis) then + ! Basis ! ----- - print *, 'Basis' + print *, 'Basis' + rc = trexio_write_basis_type(f(1), 'Gaussian', len('Gaussian')) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_type(f(1), 'Gaussian', len('Gaussian')) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_prim_num(f(1), prim_num) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_prim_num(f(1), prim_num) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_shell_num(f(1), shell_num) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_shell_num(f(1), shell_num) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_nucleus_index(f(1), basis_nucleus_index) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_nucleus_index(f(1), basis_nucleus_index) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_shell_ang_mom(f(1), shell_ang_mom) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_shell_ang_mom(f(1), shell_ang_mom) - call trexio_assert(rc, TREXIO_SUCCESS) + allocate(factor(shell_num)) +! if (ao_normalized) then +! factor(1:shell_num) = shell_normalization_factor(1:shell_num) +! else + factor(1:shell_num) = 1.d0 +! endif + rc = trexio_write_basis_shell_factor(f(1), factor) + call trexio_assert(rc, TREXIO_SUCCESS) - allocate(factor(shell_num)) - if (ao_normalized) then - factor(1:shell_num) = shell_normalization_factor(1:shell_num) - else - factor(1:shell_num) = 1.d0 - endif - rc = trexio_write_basis_shell_factor(f(1), factor) - call trexio_assert(rc, TREXIO_SUCCESS) + deallocate(factor) - deallocate(factor) + rc = trexio_write_basis_shell_index(f(1), shell_index) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_shell_index(f(1), shell_index) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_exponent(f(1), prim_expo) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_exponent(f(1), prim_expo) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_basis_coefficient(f(1), prim_coef) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_basis_coefficient(f(1), prim_coef) - call trexio_assert(rc, TREXIO_SUCCESS) - - allocate(factor(prim_num)) - if (primitives_normalized) then - factor(1:prim_num) = prim_normalization_factor(1:prim_num) - else - factor(1:prim_num) = 1.d0 - endif - rc = trexio_write_basis_prim_factor(f(1), factor) - call trexio_assert(rc, TREXIO_SUCCESS) - deallocate(factor) + allocate(factor(prim_num)) + if (primitives_normalized) then + factor(1:prim_num) = prim_normalization_factor(1:prim_num) + else + factor(1:prim_num) = 1.d0 + endif + rc = trexio_write_basis_prim_factor(f(1), factor) + call trexio_assert(rc, TREXIO_SUCCESS) + deallocate(factor) ! Atomic orbitals ! --------------- - print *, 'AOs' + print *, 'AOs' - rc = trexio_write_ao_num(f(1), ao_num) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_ao_num(f(1), ao_num) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ao_cartesian(f(1), 1) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_ao_cartesian(f(1), 1) + call trexio_assert(rc, TREXIO_SUCCESS) - rc = trexio_write_ao_shell(f(1), ao_shell) - call trexio_assert(rc, TREXIO_SUCCESS) + rc = trexio_write_ao_shell(f(1), ao_shell) + call trexio_assert(rc, TREXIO_SUCCESS) - integer :: i, pow0(3), powA(3), j, l, nz - double precision :: normA, norm0, C_A(3), overlap_x, overlap_z, overlap_y, c - nz=100 + integer :: i, pow0(3), powA(3), j, l, nz + double precision :: normA, norm0, C_A(3), overlap_x, overlap_z, overlap_y, c + nz=100 - C_A(1) = 0.d0 - C_A(2) = 0.d0 - C_A(3) = 0.d0 + C_A(1) = 0.d0 + C_A(2) = 0.d0 + C_A(3) = 0.d0 + + allocate(factor(ao_num)) + if (ao_normalized) then + do i=1,ao_num + l = ao_first_of_shell(ao_shell(i)) + factor(i) = (ao_coef_normalized(i,1)+tiny(1.d0))/(ao_coef_normalized(l,1)+tiny(1.d0)) + enddo + else + factor(:) = 1.d0 + endif + rc = trexio_write_ao_normalization(f(1), factor) + call trexio_assert(rc, TREXIO_SUCCESS) + deallocate(factor) - allocate(factor(ao_num)) - if (ao_normalized) then - do i=1,ao_num - l = ao_first_of_shell(ao_shell(i)) - factor(i) = (ao_coef_normalized(i,1)+tiny(1.d0))/(ao_coef_normalized(l,1)+tiny(1.d0)) - enddo - else - factor(:) = 1.d0 endif - rc = trexio_write_ao_normalization(f(1), factor) - call trexio_assert(rc, TREXIO_SUCCESS) - deallocate(factor) ! One-e AO integrals ! ------------------ @@ -375,28 +391,30 @@ subroutine export_trexio ! Molecular orbitals ! ------------------ - print *, 'MOs' + if (export_mos) then + print *, 'MOs' - rc = trexio_write_mo_type(f(1), mo_label, len(trim(mo_label))) - call trexio_assert(rc, TREXIO_SUCCESS) - - do k=1,N_states - rc = trexio_write_mo_num(f(k), mo_num) + rc = trexio_write_mo_type(f(1), mo_label, len(trim(mo_label))) call trexio_assert(rc, TREXIO_SUCCESS) - enddo - rc = trexio_write_mo_coefficient(f(1), mo_coef) - call trexio_assert(rc, TREXIO_SUCCESS) + do k=1,N_states + rc = trexio_write_mo_num(f(k), mo_num) + call trexio_assert(rc, TREXIO_SUCCESS) + enddo - if ( (trim(mo_label) == 'Canonical').and. & - (export_mo_two_e_ints_cholesky.or.export_mo_two_e_ints) ) then - rc = trexio_write_mo_energy(f(1), fock_matrix_diag_mo) + rc = trexio_write_mo_coefficient(f(1), mo_coef) + call trexio_assert(rc, TREXIO_SUCCESS) + + if ( (trim(mo_label) == 'Canonical').and. & + (export_mo_two_e_ints_cholesky.or.export_mo_two_e_ints) ) then + rc = trexio_write_mo_energy(f(1), fock_matrix_diag_mo) + call trexio_assert(rc, TREXIO_SUCCESS) + endif + + rc = trexio_write_mo_class(f(1), mo_class, len(mo_class(1))) call trexio_assert(rc, TREXIO_SUCCESS) endif - rc = trexio_write_mo_class(f(1), mo_class, len(mo_class(1))) - call trexio_assert(rc, TREXIO_SUCCESS) - ! One-e MO integrals ! ------------------ From 3306d26e0e0f08cd407df73de4f44388ebc6a919 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 31 May 2023 11:47:53 +0200 Subject: [PATCH 132/337] Fix import_trexio --- scripts/qp_import_trexio.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/qp_import_trexio.py b/scripts/qp_import_trexio.py index e7bc0f78..89096387 100755 --- a/scripts/qp_import_trexio.py +++ b/scripts/qp_import_trexio.py @@ -340,6 +340,7 @@ def write_ezfio(trexio_filename, filename): except: label = "None" ezfio.set_mo_basis_mo_label(label) + ezfio.set_determinants_mo_label(label) try: clss = trexio.read_mo_class(trexio_file) From 87090d73978169b167e103e3fd867682d9f5b32f Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 31 May 2023 18:11:54 +0200 Subject: [PATCH 133/337] fixed nuclear repulsion in fci_tc_bi_ortho --- src/fci_tc_bi/diagonalize_ci.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fci_tc_bi/diagonalize_ci.irp.f b/src/fci_tc_bi/diagonalize_ci.irp.f index b6ec073f..df753449 100644 --- a/src/fci_tc_bi/diagonalize_ci.irp.f +++ b/src/fci_tc_bi/diagonalize_ci.irp.f @@ -39,7 +39,7 @@ subroutine diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) write(*,'(A28,X,I10,X,100(F16.8,X))')'Ndet,E,E+PT2,E+RPT2,|PT2|=',ndet,E_tc ,E_tc + pt2_tmp/norm,E_tc + rpt2_tmp/norm,abs_pt2 print*,'*****' endif - psi_energy(1:N_states) = eigval_right_tc_bi_orth(1:N_states) + psi_energy(1:N_states) = eigval_right_tc_bi_orth(1:N_states) - nuclear_repulsion psi_s2(1:N_states) = s2_eigvec_tc_bi_orth(1:N_states) E_tc = eigval_right_tc_bi_orth(1) From 00be08932321f73b425987f99e4e5fcc685425f9 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 1 Jun 2023 09:56:06 +0200 Subject: [PATCH 134/337] Removed duplicate provider in cosgto --- src/cosgtos_ao_int/EZFIO.cfg | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/cosgtos_ao_int/EZFIO.cfg b/src/cosgtos_ao_int/EZFIO.cfg index 8edeecd0..fe57b652 100644 --- a/src/cosgtos_ao_int/EZFIO.cfg +++ b/src/cosgtos_ao_int/EZFIO.cfg @@ -10,10 +10,3 @@ doc: If true, use cosgtos for AO integrals interface: ezfio,provider,ocaml default: False -[ao_integrals_threshold] -type: Threshold -doc: If | (pq|rs) | < `ao_integrals_threshold` then (pq|rs) is zero -interface: ezfio,provider,ocaml -default: 1.e-15 -ezfio_name: threshold_ao - From 77186e0560d7febb0cf0f0ce7c6c03f98e1b9d9c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 1 Jun 2023 11:11:29 +0200 Subject: [PATCH 135/337] Cleaned cosgtos --- src/ao_basis/EZFIO.cfg | 11 +++++++++++ src/ao_one_e_ints/NEED | 1 - .../aos_cosgtos.irp.f | 0 .../one_e_Coul_integrals_cosgtos.irp.f} | 0 .../one_e_kin_integrals_cosgtos.irp.f} | 0 .../gauss_legendre.irp.f | 0 .../two_e_Coul_integrals_cosgtos.irp.f} | 0 src/cosgtos_ao_int/EZFIO.cfg | 12 ------------ src/cosgtos_ao_int/NEED | 2 -- src/cosgtos_ao_int/README.rst | 4 ---- src/cosgtos_ao_int/cosgtos_ao_int.irp.f | 7 ------- 11 files changed, 11 insertions(+), 26 deletions(-) rename src/{cosgtos_ao_int => ao_one_e_ints}/aos_cosgtos.irp.f (100%) rename src/{cosgtos_ao_int/one_e_Coul_integrals.irp.f => ao_one_e_ints/one_e_Coul_integrals_cosgtos.irp.f} (100%) rename src/{cosgtos_ao_int/one_e_kin_integrals.irp.f => ao_one_e_ints/one_e_kin_integrals_cosgtos.irp.f} (100%) rename src/{cosgtos_ao_int => ao_two_e_ints}/gauss_legendre.irp.f (100%) rename src/{cosgtos_ao_int/two_e_Coul_integrals.irp.f => ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f} (100%) delete mode 100644 src/cosgtos_ao_int/EZFIO.cfg delete mode 100644 src/cosgtos_ao_int/NEED delete mode 100644 src/cosgtos_ao_int/README.rst delete mode 100644 src/cosgtos_ao_int/cosgtos_ao_int.irp.f diff --git a/src/ao_basis/EZFIO.cfg b/src/ao_basis/EZFIO.cfg index 51d726da..a203e3f0 100644 --- a/src/ao_basis/EZFIO.cfg +++ b/src/ao_basis/EZFIO.cfg @@ -67,3 +67,14 @@ doc: Use normalized primitive functions interface: ezfio, provider default: true +[ao_expoim_cosgtos] +type: double precision +doc: imag part for Exponents for each primitive of each cosGTOs |AO| +size: (ao_basis.ao_num,ao_basis.ao_prim_num_max) +interface: ezfio, provider + +[use_cosgtos] +type: logical +doc: If true, use cosgtos for AO integrals +interface: ezfio,provider +default: False diff --git a/src/ao_one_e_ints/NEED b/src/ao_one_e_ints/NEED index b9caaf5d..61d23b1e 100644 --- a/src/ao_one_e_ints/NEED +++ b/src/ao_one_e_ints/NEED @@ -1,3 +1,2 @@ ao_basis pseudo -cosgtos_ao_int diff --git a/src/cosgtos_ao_int/aos_cosgtos.irp.f b/src/ao_one_e_ints/aos_cosgtos.irp.f similarity index 100% rename from src/cosgtos_ao_int/aos_cosgtos.irp.f rename to src/ao_one_e_ints/aos_cosgtos.irp.f diff --git a/src/cosgtos_ao_int/one_e_Coul_integrals.irp.f b/src/ao_one_e_ints/one_e_Coul_integrals_cosgtos.irp.f similarity index 100% rename from src/cosgtos_ao_int/one_e_Coul_integrals.irp.f rename to src/ao_one_e_ints/one_e_Coul_integrals_cosgtos.irp.f diff --git a/src/cosgtos_ao_int/one_e_kin_integrals.irp.f b/src/ao_one_e_ints/one_e_kin_integrals_cosgtos.irp.f similarity index 100% rename from src/cosgtos_ao_int/one_e_kin_integrals.irp.f rename to src/ao_one_e_ints/one_e_kin_integrals_cosgtos.irp.f diff --git a/src/cosgtos_ao_int/gauss_legendre.irp.f b/src/ao_two_e_ints/gauss_legendre.irp.f similarity index 100% rename from src/cosgtos_ao_int/gauss_legendre.irp.f rename to src/ao_two_e_ints/gauss_legendre.irp.f diff --git a/src/cosgtos_ao_int/two_e_Coul_integrals.irp.f b/src/ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f similarity index 100% rename from src/cosgtos_ao_int/two_e_Coul_integrals.irp.f rename to src/ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f diff --git a/src/cosgtos_ao_int/EZFIO.cfg b/src/cosgtos_ao_int/EZFIO.cfg deleted file mode 100644 index fe57b652..00000000 --- a/src/cosgtos_ao_int/EZFIO.cfg +++ /dev/null @@ -1,12 +0,0 @@ -[ao_expoim_cosgtos] -type: double precision -doc: imag part for Exponents for each primitive of each cosGTOs |AO| -size: (ao_basis.ao_num,ao_basis.ao_prim_num_max) -interface: ezfio, provider - -[use_cosgtos] -type: logical -doc: If true, use cosgtos for AO integrals -interface: ezfio,provider,ocaml -default: False - diff --git a/src/cosgtos_ao_int/NEED b/src/cosgtos_ao_int/NEED deleted file mode 100644 index 932f88a3..00000000 --- a/src/cosgtos_ao_int/NEED +++ /dev/null @@ -1,2 +0,0 @@ -ezfio_files -ao_basis diff --git a/src/cosgtos_ao_int/README.rst b/src/cosgtos_ao_int/README.rst deleted file mode 100644 index 01f25d6d..00000000 --- a/src/cosgtos_ao_int/README.rst +++ /dev/null @@ -1,4 +0,0 @@ -============== -cosgtos_ao_int -============== - diff --git a/src/cosgtos_ao_int/cosgtos_ao_int.irp.f b/src/cosgtos_ao_int/cosgtos_ao_int.irp.f deleted file mode 100644 index d65dfba5..00000000 --- a/src/cosgtos_ao_int/cosgtos_ao_int.irp.f +++ /dev/null @@ -1,7 +0,0 @@ -program cosgtos_ao_int - implicit none - BEGIN_DOC -! TODO : Put the documentation of the program here - END_DOC - print *, 'Hello world' -end From d05e4ed0b310fb083bf4318e8c7dee481dda302f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 1 Jun 2023 17:46:07 +0200 Subject: [PATCH 136/337] Fix ao_basis/use_cosgtos not found in EZFIO file --- src/ao_basis/EZFIO.cfg | 3 ++- src/ao_basis/cosgtos.irp.f | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 src/ao_basis/cosgtos.irp.f diff --git a/src/ao_basis/EZFIO.cfg b/src/ao_basis/EZFIO.cfg index a203e3f0..6ad9b998 100644 --- a/src/ao_basis/EZFIO.cfg +++ b/src/ao_basis/EZFIO.cfg @@ -76,5 +76,6 @@ interface: ezfio, provider [use_cosgtos] type: logical doc: If true, use cosgtos for AO integrals -interface: ezfio,provider +interface: ezfio default: False + diff --git a/src/ao_basis/cosgtos.irp.f b/src/ao_basis/cosgtos.irp.f new file mode 100644 index 00000000..721a3e57 --- /dev/null +++ b/src/ao_basis/cosgtos.irp.f @@ -0,0 +1,33 @@ +BEGIN_PROVIDER [ logical, use_cosgtos ] + implicit none + BEGIN_DOC +! If true, use cosgtos for AO integrals + END_DOC + + logical :: has + PROVIDE ezfio_filename + if (mpi_master) then + call ezfio_has_ao_basis_use_cosgtos(has) + if (has) then +! write(6,'(A)') '.. >>>>> [ IO READ: use_cosgtos ] <<<<< ..' + call ezfio_get_ao_basis_use_cosgtos(use_cosgtos) + else + use_cosgtos = .False. + endif + endif + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF + IRP_IF MPI + include 'mpif.h' + integer :: ierr + call MPI_BCAST( use_cosgtos, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + stop 'Unable to read use_cosgtos with MPI' + endif + IRP_ENDIF + +! call write_time(6) + +END_PROVIDER From 5ab6a1d7fba6fbff88ac858747783bb9292b9a89 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Thu, 1 Jun 2023 19:59:25 +0200 Subject: [PATCH 137/337] few modifs --- src/bi_ort_ints/semi_num_ints_mo.irp.f | 5 +++ src/bi_ort_ints/three_body_ints_bi_ort.irp.f | 2 +- src/non_h_ints_mu/grad_squared.irp.f | 21 ++++++++++- src/non_h_ints_mu/new_grad_tc.irp.f | 1 + src/non_h_ints_mu/tc_integ.irp.f | 6 +++ src/non_h_ints_mu/total_tc_int.irp.f | 5 +++ src/tc_scf/rh_tcscf_diis.irp.f | 39 +++++++++++++++++++- src/tc_scf/tc_scf.irp.f | 4 ++ 8 files changed, 80 insertions(+), 3 deletions(-) diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f index 0d727785..771d3274 100644 --- a/src/bi_ort_ints/semi_num_ints_mo.irp.f +++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f @@ -138,10 +138,13 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3, enddo enddo + FREE int2_grad1_u12_ao + endif call wall_time(wall1) print *, ' wall time for int2_grad1_u12_ao_transp ', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -200,6 +203,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, enddo enddo + FREE int2_grad1_u12_bimo_transp + END_PROVIDER ! --- diff --git a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f index e8b56307..5a3730b3 100644 --- a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f +++ b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f @@ -79,7 +79,7 @@ subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral) integer, intent(in) :: n, l, k, m, j, i double precision, intent(out) :: integral integer :: ipoint - double precision :: weight + double precision :: weight, tmp PROVIDE mo_l_coef mo_r_coef PROVIDE int2_grad1_u12_bimo_t diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f index 3f1a9bf5..44a6ae65 100644 --- a/src/non_h_ints_mu/grad_squared.irp.f +++ b/src/non_h_ints_mu/grad_squared.irp.f @@ -231,6 +231,7 @@ BEGIN_PROVIDER [ double precision, grad12_j12, (ao_num, ao_num, n_points_final_g call wall_time(time0) PROVIDE j1b_type + PROVIDE int2_grad1u2_grad2u2_j1b2 do ipoint = 1, n_points_final_grid tmp1 = v_1b(ipoint) @@ -242,6 +243,8 @@ BEGIN_PROVIDER [ double precision, grad12_j12, (ao_num, ao_num, n_points_final_g enddo enddo + FREE int2_grad1u2_grad2u2_j1b2 + !if(j1b_type .eq. 0) then ! grad12_j12 = 0.d0 ! do ipoint = 1, n_points_final_grid @@ -262,6 +265,7 @@ BEGIN_PROVIDER [ double precision, grad12_j12, (ao_num, ao_num, n_points_final_g call wall_time(time1) print*, ' Wall time for grad12_j12 = ', time1 - time0 + call print_memory_usage() END_PROVIDER @@ -278,6 +282,9 @@ BEGIN_PROVIDER [double precision, u12sq_j1bsq, (ao_num, ao_num, n_points_final_g print*, ' providing u12sq_j1bsq ...' call wall_time(time0) + ! do not free here + PROVIDE int2_u2_j1b2 + do ipoint = 1, n_points_final_grid tmp_x = v_1b_grad(1,ipoint) tmp_y = v_1b_grad(2,ipoint) @@ -292,6 +299,7 @@ BEGIN_PROVIDER [double precision, u12sq_j1bsq, (ao_num, ao_num, n_points_final_g call wall_time(time1) print*, ' Wall time for u12sq_j1bsq = ', time1 - time0 + call print_memory_usage() END_PROVIDER @@ -310,6 +318,9 @@ BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b, (ao_num, ao_num, print*, ' providing u12_grad1_u12_j1b_grad1_j1b ...' call wall_time(time0) + PROVIDE int2_u_grad1u_j1b2 + PROVIDE int2_u_grad1u_x_j1b2 + do ipoint = 1, n_points_final_grid x = final_grid_points(1,ipoint) @@ -340,14 +351,17 @@ BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b, (ao_num, ao_num, enddo enddo + FREE int2_u_grad1u_j1b2 + FREE int2_u_grad1u_x_j1b2 + call wall_time(time1) print*, ' Wall time for u12_grad1_u12_j1b_grad1_j1b = ', time1 - time0 + call print_memory_usage() END_PROVIDER ! --- - BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao_num)] BEGIN_DOC @@ -401,6 +415,8 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao , int2_grad1_u12_square_ao(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & , 0.d0, tc_grad_square_ao, ao_num*ao_num) + FREE int2_grad1_u12_square_ao + ! --- if(((j1b_type .eq. 3) .or. (j1b_type .eq. 4)) .and. use_ipp) then @@ -442,6 +458,8 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & , int2_u2_j1b2(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & , 1.d0, tc_grad_square_ao, ao_num*ao_num) + + FREE int2_u2_j1b2 endif ! --- @@ -478,6 +496,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao call wall_time(time1) print*, ' Wall time for tc_grad_square_ao = ', time1 - time0 + call print_memory_usage() END_PROVIDER diff --git a/src/non_h_ints_mu/new_grad_tc.irp.f b/src/non_h_ints_mu/new_grad_tc.irp.f index 24e7e743..499ffe9d 100644 --- a/src/non_h_ints_mu/new_grad_tc.irp.f +++ b/src/non_h_ints_mu/new_grad_tc.irp.f @@ -284,6 +284,7 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, call wall_time(time1) print*, ' Wall time for tc_grad_and_lapl_ao = ', time1 - time0 + call print_memory_usage() END_PROVIDER diff --git a/src/non_h_ints_mu/tc_integ.irp.f b/src/non_h_ints_mu/tc_integ.irp.f index d5995ae5..8251fc71 100644 --- a/src/non_h_ints_mu/tc_integ.irp.f +++ b/src/non_h_ints_mu/tc_integ.irp.f @@ -176,6 +176,7 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f call wall_time(time1) print*, ' wall time for int2_grad1_u12_ao =', time1-time0 + call print_memory_usage() END_PROVIDER @@ -242,6 +243,8 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p !$OMP END DO !$OMP END PARALLEL + FREE u12sq_j1bsq grad12_j12 + else PROVIDE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12 @@ -262,6 +265,8 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p !$OMP END DO !$OMP END PARALLEL + FREE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12 + endif elseif(j1b_type .ge. 100) then @@ -324,6 +329,7 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p call wall_time(time1) print*, ' wall time for int2_grad1_u12_square_ao =', time1-time0 + call print_memory_usage() END_PROVIDER diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f index 450bbef0..2034872a 100644 --- a/src/non_h_ints_mu/total_tc_int.irp.f +++ b/src/non_h_ints_mu/total_tc_int.irp.f @@ -84,8 +84,13 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao enddo endif + FREE tc_grad_square_ao + FREE tc_grad_and_lapl_ao + FREE ao_two_e_coul + call wall_time(wall1) print *, ' wall time for ao_tc_int_chemist ', wall1 - wall0 + call print_memory_usage() END_PROVIDER diff --git a/src/tc_scf/rh_tcscf_diis.irp.f b/src/tc_scf/rh_tcscf_diis.irp.f index 20260a95..0504373c 100644 --- a/src/tc_scf/rh_tcscf_diis.irp.f +++ b/src/tc_scf/rh_tcscf_diis.irp.f @@ -11,6 +11,7 @@ subroutine rh_tcscf_diis() integer :: i, j, it integer :: dim_DIIS, index_dim_DIIS + logical :: converged double precision :: etc_tot, etc_1e, etc_2e, etc_3e, e_save, e_delta double precision :: tc_grad, g_save, g_delta, g_delta_th double precision :: level_shift_save, rate_th @@ -92,8 +93,9 @@ subroutine rh_tcscf_diis() PROVIDE FQS_SQF_ao Fock_matrix_tc_ao_tot + converged = .false. !do while((tc_grad .gt. dsqrt(thresh_tcscf)) .and. (er_DIIS .gt. dsqrt(thresh_tcscf))) - do while(er_DIIS .gt. dsqrt(thresh_tcscf)) + do while(.not. converged) call wall_time(t0) @@ -218,21 +220,56 @@ subroutine rh_tcscf_diis() !g_delta_th = dabs(tc_grad) ! g_delta) er_delta_th = dabs(er_DIIS) !er_delta) + converged = er_DIIS .lt. dsqrt(thresh_tcscf) + call wall_time(t1) !write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)') & ! it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0 write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)') & it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0 + +! Write data in JSON file + + call lock_io + if (it == 1) then + write(json_unit, json_dict_uopen_fmt) + else + write(json_unit, json_dict_close_uopen_fmt) + endif + write(json_unit, json_int_fmt) ' iteration ', it + write(json_unit, json_real_fmt) ' SCF TC Energy ', etc_tot + write(json_unit, json_real_fmt) ' E(1e) ', etc_1e + write(json_unit, json_real_fmt) ' E(2e) ', etc_2e + write(json_unit, json_real_fmt) ' E(3e) ', etc_3e + write(json_unit, json_real_fmt) ' delta Energy ', e_delta + write(json_unit, json_real_fmt) ' DIIS error ', er_DIIS + write(json_unit, json_real_fmt) ' level_shift ', level_shift_tcscf + write(json_unit, json_real_fmt) ' DIIS ', dim_DIIS + write(json_unit, json_real_fmt) ' Wall time (min)', (t1-t0)/60.d0 + call unlock_io + if(er_delta .lt. 0.d0) then call ezfio_set_tc_scf_bitc_energy(etc_tot) call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) + write(json_unit, json_true_fmt) 'saved' + else + write(json_unit, json_false_fmt) 'saved' endif + call lock_io + if (converged) then + write(json_unit, json_true_fmtx) 'converged' + else + write(json_unit, json_false_fmtx) 'converged' + endif + call unlock_io if(qp_stop()) exit enddo + write(json_unit, json_dict_close_fmtx) + ! --- print *, ' TCSCF DIIS converged !' diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f index 88ddd26c..04c4f92d 100644 --- a/src/tc_scf/tc_scf.irp.f +++ b/src/tc_scf/tc_scf.irp.f @@ -8,6 +8,8 @@ program tc_scf implicit none + write(json_unit,json_array_open_fmt) 'tc-scf' + print *, ' starting ...' my_grid_becke = .True. @@ -57,6 +59,8 @@ program tc_scf endif + write(json_unit,json_array_close_fmtx) + call json_close end From 6971bf186cf020ce66d0bac091d06ae850bd803f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 1 Jun 2023 21:42:02 +0200 Subject: [PATCH 138/337] Accelerated multiply_poly --- src/ao_one_e_ints/pot_ao_ints.irp.f | 21 +- src/ao_two_e_ints/two_e_integrals.irp.f | 136 +------ src/bi_ort_ints/three_body_ints_bi_ort.irp.f | 52 +-- src/utils/integration.irp.f | 366 +++++++++++++------ 4 files changed, 305 insertions(+), 270 deletions(-) diff --git a/src/ao_one_e_ints/pot_ao_ints.irp.f b/src/ao_one_e_ints/pot_ao_ints.irp.f index 928053ad..446bf730 100644 --- a/src/ao_one_e_ints/pot_ao_ints.irp.f +++ b/src/ao_one_e_ints/pot_ao_ints.irp.f @@ -455,10 +455,12 @@ recursive subroutine I_x1_pol_mult_one_e(a,c,R1x,R1xp,R2x,d,nd,n_pt_in) do ix=0,nx X(ix) *= dble(c) enddo - call multiply_poly(X,nx,R2x,2,d,nd) +! call multiply_poly(X,nx,R2x,2,d,nd) + call multiply_poly_c2(X,nx,R2x,d,nd) ny=0 call I_x2_pol_mult_one_e(c,R1x,R1xp,R2x,Y,ny,n_pt_in) - call multiply_poly(Y,ny,R1x,2,d,nd) +! call multiply_poly(Y,ny,R1x,2,d,nd) + call multiply_poly_c2(Y,ny,R1x,d,nd) else do ix=0,n_pt_in X(ix) = 0.d0 @@ -469,7 +471,8 @@ recursive subroutine I_x1_pol_mult_one_e(a,c,R1x,R1xp,R2x,d,nd,n_pt_in) do ix=0,nx X(ix) *= dble(a-1) enddo - call multiply_poly(X,nx,R2x,2,d,nd) +! call multiply_poly(X,nx,R2x,2,d,nd) + call multiply_poly_c2(X,nx,R2x,d,nd) nx = nd do ix=0,n_pt_in @@ -479,10 +482,12 @@ recursive subroutine I_x1_pol_mult_one_e(a,c,R1x,R1xp,R2x,d,nd,n_pt_in) do ix=0,nx X(ix) *= dble(c) enddo - call multiply_poly(X,nx,R2x,2,d,nd) +! call multiply_poly(X,nx,R2x,2,d,nd) + call multiply_poly_c2(X,nx,R2x,d,nd) ny=0 call I_x1_pol_mult_one_e(a-1,c,R1x,R1xp,R2x,Y,ny,n_pt_in) - call multiply_poly(Y,ny,R1x,2,d,nd) +! call multiply_poly(Y,ny,R1x,2,d,nd) + call multiply_poly_c2(Y,ny,R1x,d,nd) endif end @@ -519,7 +524,8 @@ recursive subroutine I_x2_pol_mult_one_e(c,R1x,R1xp,R2x,d,nd,dim) do ix=0,nx X(ix) *= dble(c-1) enddo - call multiply_poly(X,nx,R2x,2,d,nd) +! call multiply_poly(X,nx,R2x,2,d,nd) + call multiply_poly_c2(X,nx,R2x,d,nd) ny = 0 do ix=0,dim Y(ix) = 0.d0 @@ -527,7 +533,8 @@ recursive subroutine I_x2_pol_mult_one_e(c,R1x,R1xp,R2x,d,nd,dim) call I_x1_pol_mult_one_e(0,c-1,R1x,R1xp,R2x,Y,ny,dim) if(ny.ge.0)then - call multiply_poly(Y,ny,R1xp,2,d,nd) +! call multiply_poly(Y,ny,R1xp,2,d,nd) + call multiply_poly_c2(Y,ny,R1xp,d,nd) endif endif end diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f index 835dc89a..85ff5bcf 100644 --- a/src/ao_two_e_ints/two_e_integrals.irp.f +++ b/src/ao_two_e_ints/two_e_integrals.irp.f @@ -975,18 +975,7 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt ! !DIR$ FORCEINLINE ! call multiply_poly(X,nx,B_10,2,d,nd) - if (nx >= 0) then - integer :: ib - do ib=0,nx - d(ib ) = d(ib ) + B_10(0) * X(ib) - d(ib+1) = d(ib+1) + B_10(1) * X(ib) - d(ib+2) = d(ib+2) + B_10(2) * X(ib) - enddo - - do nd = nx+2,0,-1 - if (d(nd) /= 0.d0) exit - enddo - endif + call multiply_poly_c2(X,nx,B_10,d,nd) nx = nd !DIR$ LOOP COUNT(8) @@ -1009,17 +998,7 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt endif ! !DIR$ FORCEINLINE ! call multiply_poly(X,nx,B_00,2,d,nd) - if (nx >= 0) then - do ib=0,nx - d(ib ) = d(ib ) + B_00(0) * X(ib) - d(ib+1) = d(ib+1) + B_00(1) * X(ib) - d(ib+2) = d(ib+2) + B_00(2) * X(ib) - enddo - - do nd = nx+2,0,-1 - if (d(nd) /= 0.d0) exit - enddo - endif + call multiply_poly_c2(X,nx,B_00,d,nd) endif ny=0 @@ -1038,17 +1017,7 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt ! !DIR$ FORCEINLINE ! call multiply_poly(Y,ny,C_00,2,d,nd) - if (ny >= 0) then - do ib=0,ny - d(ib ) = d(ib ) + C_00(0) * Y(ib) - d(ib+1) = d(ib+1) + C_00(1) * Y(ib) - d(ib+2) = d(ib+2) + C_00(2) * Y(ib) - enddo - - do nd = ny+2,0,-1 - if (d(nd) /= 0.d0) exit - enddo - endif + call multiply_poly_c2(Y,ny,C_00,d,nd) end recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) @@ -1088,18 +1057,7 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) ! !DIR$ FORCEINLINE ! call multiply_poly(X,nx,B_00,2,d,nd) - if (nx >= 0) then - integer :: ib - do ib=0,nx - d(ib ) = d(ib ) + B_00(0) * X(ib) - d(ib+1) = d(ib+1) + B_00(1) * X(ib) - d(ib+2) = d(ib+2) + B_00(2) * X(ib) - enddo - - do nd = nx+2,0,-1 - if (d(nd) /= 0.d0) exit - enddo - endif + call multiply_poly_c2(X,nx,B_00,d,nd) ny=0 @@ -1111,17 +1069,7 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) ! !DIR$ FORCEINLINE ! call multiply_poly(Y,ny,C_00,2,d,nd) - if (ny >= 0) then - do ib=0,ny - d(ib ) = d(ib ) + C_00(0) * Y(ib) - d(ib+1) = d(ib+1) + C_00(1) * Y(ib) - d(ib+2) = d(ib+2) + C_00(2) * Y(ib) - enddo - - do nd = ny+2,0,-1 - if (d(nd) /= 0.d0) exit - enddo - endif + call multiply_poly_c2(Y,ny,C_00,d,nd) end @@ -1150,18 +1098,7 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) ! !DIR$ FORCEINLINE ! call multiply_poly(X,nx,B_10,2,d,nd) - if (nx >= 0) then - integer :: ib - do ib=0,nx - d(ib ) = d(ib ) + B_10(0) * X(ib) - d(ib+1) = d(ib+1) + B_10(1) * X(ib) - d(ib+2) = d(ib+2) + B_10(2) * X(ib) - enddo - - do nd = nx+2,0,-1 - if (d(nd) /= 0.d0) exit - enddo - endif + call multiply_poly_c2(X,nx,B_10,d,nd) nx = nd !DIR$ LOOP COUNT(8) @@ -1181,17 +1118,7 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) ! !DIR$ FORCEINLINE ! call multiply_poly(X,nx,B_00,2,d,nd) - if (nx >= 0) then - do ib=0,nx - d(ib ) = d(ib ) + B_00(0) * X(ib) - d(ib+1) = d(ib+1) + B_00(1) * X(ib) - d(ib+2) = d(ib+2) + B_00(2) * X(ib) - enddo - - do nd = nx+2,0,-1 - if (d(nd) /= 0.d0) exit - enddo - endif + call multiply_poly_c2(X,nx,B_00,d,nd) ny=0 !DIR$ LOOP COUNT(8) @@ -1203,17 +1130,7 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) ! !DIR$ FORCEINLINE ! call multiply_poly(Y,ny,C_00,2,d,nd) - if (ny >= 0) then - do ib=0,ny - d(ib ) = d(ib ) + C_00(0) * Y(ib) - d(ib+1) = d(ib+1) + C_00(1) * Y(ib) - d(ib+2) = d(ib+2) + C_00(2) * Y(ib) - enddo - - do nd = ny+2,0,-1 - if (d(nd) /= 0.d0) exit - enddo - endif + call multiply_poly_c2(Y,ny,C_00,d,nd) end recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) @@ -1262,18 +1179,7 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) ! !DIR$ FORCEINLINE ! call multiply_poly(Y,ny,D_00,2,d,nd) - if (ny >= 0) then - integer :: ib - do ib=0,ny - d(ib ) = d(ib ) + D_00(0) * Y(ib) - d(ib+1) = d(ib+1) + D_00(1) * Y(ib) - d(ib+2) = d(ib+2) + D_00(2) * Y(ib) - enddo - - do nd = ny+2,0,-1 - if (d(nd) /= 0.d0) exit - enddo - endif + call multiply_poly_c2(Y,ny,D_00,d,nd) return @@ -1293,17 +1199,7 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) ! !DIR$ FORCEINLINE ! call multiply_poly(X,nx,B_01,2,d,nd) - if (nx >= 0) then - do ib=0,nx - d(ib ) = d(ib ) + B_01(0) * X(ib) - d(ib+1) = d(ib+1) + B_01(1) * X(ib) - d(ib+2) = d(ib+2) + B_01(2) * X(ib) - enddo - - do nd = nx+2,0,-1 - if (d(nd) /= 0.d0) exit - enddo - endif + call multiply_poly_c2(X,nx,B_01,d,nd) ny = 0 !DIR$ LOOP COUNT(6) @@ -1314,17 +1210,7 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) ! !DIR$ FORCEINLINE ! call multiply_poly(Y,ny,D_00,2,d,nd) - if (ny >= 0) then - do ib=0,ny - d(ib ) = d(ib ) + D_00(0) * Y(ib) - d(ib+1) = d(ib+1) + D_00(1) * Y(ib) - d(ib+2) = d(ib+2) + D_00(2) * Y(ib) - enddo - - do nd = ny+2,0,-1 - if (d(nd) /= 0.d0) exit - enddo - endif + call multiply_poly_c2(Y,ny,D_00,d,nd) end select end diff --git a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f index e8b56307..a72cd682 100644 --- a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f +++ b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f @@ -4,7 +4,7 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC -! matrix element of the -L three-body operator +! matrix element of the -L three-body operator ! ! notice the -1 sign: in this way three_body_ints_bi_ort can be directly used to compute Slater rules :) END_DOC @@ -12,7 +12,7 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_n implicit none integer :: i, j, k, l, m, n double precision :: integral, wall1, wall0 - character*(128) :: name_file + character*(128) :: name_file three_body_ints_bi_ort = 0.d0 print *, ' Providing the three_body_ints_bi_ort ...' @@ -27,12 +27,12 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_n ! call read_array_6_index_tensor(mo_num,three_body_ints_bi_ort,name_file) ! else - !provide x_W_ki_bi_ortho_erf_rk + !provide x_W_ki_bi_ortho_erf_rk provide mos_r_in_r_array_transp mos_l_in_r_array_transp !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,l,m,n,integral) & + !$OMP PRIVATE (i,j,k,l,m,n,integral) & !$OMP SHARED (mo_num,three_body_ints_bi_ort) !$OMP DO SCHEDULE (dynamic) do i = 1, mo_num @@ -43,7 +43,7 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_n do n = 1, mo_num call give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral) - three_body_ints_bi_ort(n,l,k,m,j,i) = -1.d0 * integral + three_body_ints_bi_ort(n,l,k,m,j,i) = -1.d0 * integral enddo enddo enddo @@ -63,7 +63,7 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_n ! call ezfio_set_three_body_ints_bi_ort_io_three_body_ints_bi_ort("Read") ! endif -END_PROVIDER +END_PROVIDER ! --- @@ -71,7 +71,7 @@ subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral) BEGIN_DOC ! - ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS + ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS ! END_DOC @@ -79,28 +79,30 @@ subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral) integer, intent(in) :: n, l, k, m, j, i double precision, intent(out) :: integral integer :: ipoint - double precision :: weight + double precision :: weight, tmp PROVIDE mo_l_coef mo_r_coef PROVIDE int2_grad1_u12_bimo_t integral = 0.d0 do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) - integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & + tmp = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & * ( int2_grad1_u12_bimo_t(ipoint,1,n,m) * int2_grad1_u12_bimo_t(ipoint,1,l,j) & + int2_grad1_u12_bimo_t(ipoint,2,n,m) * int2_grad1_u12_bimo_t(ipoint,2,l,j) & + int2_grad1_u12_bimo_t(ipoint,3,n,m) * int2_grad1_u12_bimo_t(ipoint,3,l,j) ) - integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & + + tmp = tmp + mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & * ( int2_grad1_u12_bimo_t(ipoint,1,n,m) * int2_grad1_u12_bimo_t(ipoint,1,k,i) & + int2_grad1_u12_bimo_t(ipoint,2,n,m) * int2_grad1_u12_bimo_t(ipoint,2,k,i) & + int2_grad1_u12_bimo_t(ipoint,3,n,m) * int2_grad1_u12_bimo_t(ipoint,3,k,i) ) - integral += weight * mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,m) & + + tmp = tmp + mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,m) & * ( int2_grad1_u12_bimo_t(ipoint,1,l,j) * int2_grad1_u12_bimo_t(ipoint,1,k,i) & + int2_grad1_u12_bimo_t(ipoint,2,l,j) * int2_grad1_u12_bimo_t(ipoint,2,k,i) & + int2_grad1_u12_bimo_t(ipoint,3,l,j) * int2_grad1_u12_bimo_t(ipoint,3,k,i) ) + integral = integral + tmp * final_weight_at_r_vector(ipoint) enddo end subroutine give_integrals_3_body_bi_ort @@ -111,7 +113,7 @@ subroutine give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, integral) BEGIN_DOC ! - ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS + ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS ! END_DOC @@ -123,13 +125,13 @@ subroutine give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, integral) integral = 0.d0 do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) + weight = final_weight_at_r_vector(ipoint) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -! integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & +! integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & ! * ( x_W_ki_bi_ortho_erf_rk(ipoint,1,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,1,l,j) & ! + x_W_ki_bi_ortho_erf_rk(ipoint,2,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,2,l,j) & ! + x_W_ki_bi_ortho_erf_rk(ipoint,3,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,3,l,j) ) -! integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & +! integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & ! * ( x_W_ki_bi_ortho_erf_rk(ipoint,1,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,1,k,i) & ! + x_W_ki_bi_ortho_erf_rk(ipoint,2,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,2,k,i) & ! + x_W_ki_bi_ortho_erf_rk(ipoint,3,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,3,k,i) ) @@ -138,11 +140,11 @@ subroutine give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, integral) ! + x_W_ki_bi_ortho_erf_rk(ipoint,2,l,j) * x_W_ki_bi_ortho_erf_rk(ipoint,2,k,i) & ! + x_W_ki_bi_ortho_erf_rk(ipoint,3,l,j) * x_W_ki_bi_ortho_erf_rk(ipoint,3,k,i) ) -! integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & +! integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & ! * ( int2_grad1_u12_bimo(1,n,m,ipoint) * int2_grad1_u12_bimo(1,l,j,ipoint) & ! + int2_grad1_u12_bimo(2,n,m,ipoint) * int2_grad1_u12_bimo(2,l,j,ipoint) & ! + int2_grad1_u12_bimo(3,n,m,ipoint) * int2_grad1_u12_bimo(3,l,j,ipoint) ) -! integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & +! integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & ! * ( int2_grad1_u12_bimo(1,n,m,ipoint) * int2_grad1_u12_bimo(1,k,i,ipoint) & ! + int2_grad1_u12_bimo(2,n,m,ipoint) * int2_grad1_u12_bimo(2,k,i,ipoint) & ! + int2_grad1_u12_bimo(3,n,m,ipoint) * int2_grad1_u12_bimo(3,k,i,ipoint) ) @@ -151,13 +153,13 @@ subroutine give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, integral) ! + int2_grad1_u12_bimo(2,l,j,ipoint) * int2_grad1_u12_bimo(2,k,i,ipoint) & ! + int2_grad1_u12_bimo(3,l,j,ipoint) * int2_grad1_u12_bimo(3,k,i,ipoint) ) -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & + integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & * ( int2_grad1_u12_bimo_transp(n,m,1,ipoint) * int2_grad1_u12_bimo_transp(l,j,1,ipoint) & + int2_grad1_u12_bimo_transp(n,m,2,ipoint) * int2_grad1_u12_bimo_transp(l,j,2,ipoint) & + int2_grad1_u12_bimo_transp(n,m,3,ipoint) * int2_grad1_u12_bimo_transp(l,j,3,ipoint) ) - integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & + integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & * ( int2_grad1_u12_bimo_transp(n,m,1,ipoint) * int2_grad1_u12_bimo_transp(k,i,1,ipoint) & + int2_grad1_u12_bimo_transp(n,m,2,ipoint) * int2_grad1_u12_bimo_transp(k,i,2,ipoint) & + int2_grad1_u12_bimo_transp(n,m,3,ipoint) * int2_grad1_u12_bimo_transp(k,i,3,ipoint) ) @@ -176,7 +178,7 @@ subroutine give_integrals_3_body_bi_ort_ao(n, l, k, m, j, i, integral) BEGIN_DOC ! - ! < n l k | -L | m j i > with a BI-ORTHONORMAL ATOMIC ORBITALS + ! < n l k | -L | m j i > with a BI-ORTHONORMAL ATOMIC ORBITALS ! END_DOC @@ -188,13 +190,13 @@ subroutine give_integrals_3_body_bi_ort_ao(n, l, k, m, j, i, integral) integral = 0.d0 do ipoint = 1, n_points_final_grid - weight = final_weight_at_r_vector(ipoint) + weight = final_weight_at_r_vector(ipoint) - integral += weight * aos_in_r_array_transp(ipoint,k) * aos_in_r_array_transp(ipoint,i) & + integral += weight * aos_in_r_array_transp(ipoint,k) * aos_in_r_array_transp(ipoint,i) & * ( int2_grad1_u12_ao_t(ipoint,1,n,m) * int2_grad1_u12_ao_t(ipoint,1,l,j) & + int2_grad1_u12_ao_t(ipoint,2,n,m) * int2_grad1_u12_ao_t(ipoint,2,l,j) & + int2_grad1_u12_ao_t(ipoint,3,n,m) * int2_grad1_u12_ao_t(ipoint,3,l,j) ) - integral += weight * aos_in_r_array_transp(ipoint,l) * aos_in_r_array_transp(ipoint,j) & + integral += weight * aos_in_r_array_transp(ipoint,l) * aos_in_r_array_transp(ipoint,j) & * ( int2_grad1_u12_ao_t(ipoint,1,n,m) * int2_grad1_u12_ao_t(ipoint,1,k,i) & + int2_grad1_u12_ao_t(ipoint,2,n,m) * int2_grad1_u12_ao_t(ipoint,2,k,i) & + int2_grad1_u12_ao_t(ipoint,3,n,m) * int2_grad1_u12_ao_t(ipoint,3,k,i) ) diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f index b60e3bc1..21179dac 100644 --- a/src/utils/integration.irp.f +++ b/src/utils/integration.irp.f @@ -56,7 +56,7 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha, ! * [ sum (l_y = 0,i_order(2)) P_new(l_y,2) * (y-P_center(2))^l_y ] exp (- p (y-P_center(2))^2 ) ! * [ sum (l_z = 0,i_order(3)) P_new(l_z,3) * (z-P_center(3))^l_z ] exp (- p (z-P_center(3))^2 ) ! - ! WARNING ::: IF fact_k is too smal then: + ! WARNING ::: IF fact_k is too smal then: ! returns a "s" function centered in zero ! with an inifinite exponent and a zero polynom coef END_DOC @@ -86,7 +86,7 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha, !DIR$ FORCEINLINE call gaussian_product(alpha,A_center,beta,B_center,fact_k,p,P_center) if (fact_k < thresh) then - ! IF fact_k is too smal then: + ! IF fact_k is too smal then: ! returns a "s" function centered in zero ! with an inifinite exponent and a zero polynom coef P_center = 0.d0 @@ -468,114 +468,6 @@ end subroutine -subroutine multiply_poly_0c(b,c,nc,d,nd) - implicit none - BEGIN_DOC - ! Multiply two polynomials - ! D(t) += B(t)*C(t) - END_DOC - - integer, intent(in) :: nc - integer, intent(out) :: nd - double precision, intent(in) :: b(0:0), c(0:nc) - double precision, intent(inout) :: d(0:0+nc) - - integer :: ic - - do ic = 0,nc - d(ic) = d(ic) + c(ic) * b(0) - enddo - - do nd = nc,0,-1 - if (d(nd) /= 0.d0) exit - enddo - -end - -subroutine multiply_poly_1c(b,c,nc,d,nd) - implicit none - BEGIN_DOC - ! Multiply two polynomials - ! D(t) += B(t)*C(t) - END_DOC - - integer, intent(in) :: nc - integer, intent(out) :: nd - double precision, intent(in) :: b(0:1), c(0:nc) - double precision, intent(inout) :: d(0:1+nc) - - integer :: ic, id - if(nc < 0) return - - do ic = 0,nc - d( ic) = d( ic) + c(ic) * b(0) - d(1+ic) = d(1+ic) + c(ic) * b(1) - enddo - - do nd = nc+1,0,-1 - if (d(nd) /= 0.d0) exit - enddo - -end - - -subroutine multiply_poly_2c(b,c,nc,d,nd) - implicit none - BEGIN_DOC - ! Multiply two polynomials - ! D(t) += B(t)*C(t) - END_DOC - - integer, intent(in) :: nc - integer, intent(out) :: nd - double precision, intent(in) :: b(0:2), c(0:nc) - double precision, intent(inout) :: d(0:2+nc) - - integer :: ic, id, k - if (nc <0) return - - do ic = 0,nc - d( ic) = d( ic) + c(ic) * b(0) - d(1+ic) = d(1+ic) + c(ic) * b(1) - d(2+ic) = d(2+ic) + c(ic) * b(2) - enddo - - do nd = nc+2,0,-1 - if (d(nd) /= 0.d0) exit - enddo - -end - -subroutine multiply_poly_3c(b,c,nc,d,nd) - implicit none - BEGIN_DOC - ! Multiply two polynomials - ! D(t) += B(t)*C(t) - END_DOC - - integer, intent(in) :: nc - integer, intent(out) :: nd - double precision, intent(in) :: b(0:3), c(0:nc) - double precision, intent(inout) :: d(0:3+nc) - - integer :: ic, id - if (nc <0) return - - do ic = 1,nc - d( ic) = d(1+ic) + c(ic) * b(0) - d(1+ic) = d(1+ic) + c(ic) * b(1) - d(2+ic) = d(1+ic) + c(ic) * b(2) - d(3+ic) = d(1+ic) + c(ic) * b(3) - enddo - - do nd = nc+3,0,-1 - if (d(nd) /= 0.d0) exit - enddo - -end - - - subroutine multiply_poly(b,nb,c,nc,d,nd) implicit none BEGIN_DOC @@ -604,6 +496,254 @@ subroutine multiply_poly(b,nb,c,nc,d,nd) end + +subroutine multiply_poly_b0(b,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:0), c(0:nc) + double precision, intent(inout) :: d(0:nc) + + integer :: ndtmp + integer :: ic, id, k + if(nc < 0) return !False if nc>=0 + + do ic = 0,nc + d(ic) = d(ic) + c(ic) * b(0) + enddo + + do nd = nc,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + +subroutine multiply_poly_b1(b,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:1), c(0:nc) + double precision, intent(inout) :: d(0:1+nc) + + integer :: ndtmp + integer :: ib, ic, id, k + if(nc < 0) return !False if nc>=0 + + + select case (nc) + case (0) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + + case (1) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(1) * b(1) + + case default + d(0) = d(0) + c(0) * b(0) + do ic = 1,nc + d(ic) = d(ic) + c(ic) * b(0) + c(ic-1) * b(1) + enddo + d(nc+1) = d(nc+1) + c(nc) * b(1) + + end select + + do nd = 1+nc,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + +subroutine multiply_poly_b2(b,c,nc,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nc + integer, intent(out) :: nd + double precision, intent(in) :: b(0:2), c(0:nc) + double precision, intent(inout) :: d(0:2+nc) + + integer :: ndtmp + integer :: ib, ic, id, k + if(nc < 0) return !False if nc>=0 + + select case (nc) + case (0) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + d(2) = d(2) + c(0) * b(2) + + case (1) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(0) * b(2) + c(1) * b(1) + d(3) = d(3) + c(1) * b(2) + + case (2) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(0) * b(2) + c(1) * b(1) + c(2) * b(0) + d(3) = d(3) + c(2) * b(1) + c(1) * b(2) + d(4) = d(4) + c(2) * b(2) + + case default + + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + do ic = 2,nc + d(ic) = d(ic) + c(ic) * b(0) + c(ic-1) * b(1) + c(ic-2) * b(2) + enddo + d(nc+1) = d(nc+1) + c(nc) * b(1) + c(nc-1) * b(2) + d(nc+2) = d(nc+2) + c(nc) * b(2) + + end select + + do nd = 2+nc,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + +subroutine multiply_poly_c0(b,nb,c,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nb + integer, intent(out) :: nd + double precision, intent(in) :: b(0:nb), c(0:0) + double precision, intent(inout) :: d(0:nb) + + integer :: ndtmp + integer :: ib, ic, id, k + if(nb < 0) return !False if nb>=0 + + do ib=0,nb + d(ib) = d(ib) + c(0) * b(ib) + enddo + + do nd = nb,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + +subroutine multiply_poly_c1(b,nb,c,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nb + integer, intent(out) :: nd + double precision, intent(in) :: b(0:nb), c(0:1) + double precision, intent(inout) :: d(0:nb+1) + + integer :: ndtmp + integer :: ib, ic, id, k + if(nb < 0) return !False if nb>=0 + + select case (nb) + case (0) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(1) * b(0) + + case (1) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(1) * b(1) + + case default + d(0) = d(0) + c(0) * b(0) + do ib=1,nb + d(ib) = d(ib) + c(0) * b(ib) + c(1) * b(ib-1) + enddo + d(nb+1) = d(nb+1) + c(1) * b(nb) + + end select + + do nd = nb+1,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + +subroutine multiply_poly_c2(b,nb,c,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nb + integer, intent(out) :: nd + double precision, intent(in) :: b(0:nb), c(0:2) + double precision, intent(inout) :: d(0:nb+2) + + integer :: ndtmp + integer :: ib, ic, id, k + if(nb < 0) return !False if nb>=0 + + select case (nb) + case (0) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(1) * b(0) + d(2) = d(2) + c(2) * b(0) + + case (1) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(1) * b(1) + c(2) * b(0) + d(3) = d(3) + c(2) * b(1) + + case (2) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(0) * b(2) + c(1) * b(1) + c(2) * b(0) + d(3) = d(3) + c(1) * b(2) + c(2) * b(1) + d(4) = d(4) + c(2) * b(2) + + case default + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + do ib=2,nb + d(ib) = d(ib) + c(0) * b(ib) + c(1) * b(ib-1) + c(2) * b(ib-2) + enddo + d(nb+1) = d(nb+1) + c(1) * b(nb) + c(2) * b(nb-1) + d(nb+2) = d(nb+2) + c(2) * b(nb) + + end select + + do nd = nb+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + + + + subroutine multiply_poly_v(b,nb,c,nc,d,nd,n_points) implicit none BEGIN_DOC @@ -778,11 +918,11 @@ end subroutine recentered_poly2_v subroutine recentered_poly2_v0(P_new, lda, x_A, LD_xA, x_P, a, n_points) BEGIN_DOC - ! + ! ! Recenter two polynomials. Special case for b=(0,0,0) - ! + ! ! (x - A)^a (x - B)^0 = (x - P + P - A)^a (x - Q + Q - B)^0 - ! = (x - P + P - A)^a + ! = (x - P + P - A)^a ! END_DOC From b9c18338960064f01bdd3e7bf6427ab3510575c9 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 2 Jun 2023 00:33:37 +0200 Subject: [PATCH 139/337] Optimized three_e_5_idx_exch12_bi_ort --- src/bi_ort_ints/bi_ort_ints.irp.f | 12 +- src/bi_ort_ints/three_body_ijmkl.irp.f | 162 ++++++++++++++++++++----- src/utils/integration.irp.f | 24 ++++ 3 files changed, 166 insertions(+), 32 deletions(-) diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index ca50dd56..63b2aa8c 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -16,23 +16,27 @@ subroutine test_3e double precision :: accu, contrib,new,ref i = 1 k = 1 + n = 0 accu = 0.d0 do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num do l = 1, mo_num do m = 1, mo_num - do n = 1, mo_num - call give_integrals_3_body_bi_ort(n, l, k, m, j, i, new) - call give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, ref) + new = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) + ref = three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) +! do n = 1, mo_num +! call give_integrals_3_body_bi_ort(n, l, k, m, j, i, new) +! call give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, ref) contrib = dabs(new - ref) accu += contrib if(contrib .gt. 1.d-10)then print*,'pb !!' print*,i,k,j,l,m,n print*,ref,new,contrib + stop endif - enddo +! enddo enddo enddo enddo diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index ae4c9bd5..af2cb353 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -24,7 +24,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_direct_bi_ort) !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num @@ -33,7 +33,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, do l = 1, mo_num do m = 1, mo_num call give_integrals_3_body_bi_ort(m, l, k, m, j, i, integral) - three_e_5_idx_direct_bi_ort(m,l,j,k,i) = -1.d0 * integral + three_e_5_idx_direct_bi_ort(m,l,j,k,i) = -1.d0 * integral enddo enddo enddo @@ -45,7 +45,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_5_idx_direct_bi_ort', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- @@ -73,7 +73,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_cycle_1_bi_ort) !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num @@ -82,7 +82,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num do l = 1, mo_num do m = 1, mo_num call give_integrals_3_body_bi_ort(m, l, k, j, i, m, integral) - three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = -1.d0 * integral + three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = -1.d0 * integral enddo enddo enddo @@ -94,7 +94,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num call wall_time(wall1) print *, ' wall time for three_e_5_idx_cycle_1_bi_ort', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- @@ -122,7 +122,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_cycle_2_bi_ort) !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num @@ -131,7 +131,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num do m = 1, mo_num do l = 1, mo_num call give_integrals_3_body_bi_ort(m, l, k, i, m, j, integral) - three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = -1.d0 * integral + three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = -1.d0 * integral enddo enddo enddo @@ -143,7 +143,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num call wall_time(wall1) print *, ' wall time for three_e_5_idx_cycle_2_bi_ort', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- @@ -171,7 +171,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort, (mo_num, mo_num, !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_exch23_bi_ort) !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num @@ -180,7 +180,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort, (mo_num, mo_num, do l = 1, mo_num do m = 1, mo_num call give_integrals_3_body_bi_ort(m, l, k, j, m, i, integral) - three_e_5_idx_exch23_bi_ort(m,l,j,k,i) = -1.d0 * integral + three_e_5_idx_exch23_bi_ort(m,l,j,k,i) = -1.d0 * integral enddo enddo enddo @@ -192,7 +192,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_5_idx_exch23_bi_ort', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- @@ -220,7 +220,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort, (mo_num, mo_num, !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_exch13_bi_ort) !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num @@ -229,7 +229,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort, (mo_num, mo_num, do l = 1, mo_num do m = 1, mo_num call give_integrals_3_body_bi_ort(m, l, k, i, j, m, integral) - three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = -1.d0 * integral + three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = -1.d0 * integral enddo enddo enddo @@ -241,7 +241,57 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_5_idx_exch13_bi_ort', wall1 - wall0 -END_PROVIDER +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0 + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t + + three_e_5_idx_exch12_bi_ort_old = 0.d0 + print *, ' Providing the three_e_5_idx_exch12_bi_ort_old ...' + call wall_time(wall0) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_exch12_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, m, i, j, integral) + three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_exch12_bi_ort_old', wall1 - wall0 + +END_PROVIDER ! --- @@ -259,38 +309,94 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, implicit none integer :: i, j, k, m, l - double precision :: integral, wall1, wall0 + double precision :: wall1, wall0 + integer :: ipoint + double precision :: weight + double precision, allocatable :: grad_mli(:,:,:), m2grad_r(:,:,:,:), m2grad_l(:,:,:,:) + double precision, allocatable :: tmp_mat(:,:,:,:), orb_mat(:,:,:) + allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) + allocate(m2grad_r(n_points_final_grid,3,mo_num,mo_num)) + allocate(m2grad_l(n_points_final_grid,3,mo_num,mo_num)) + allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) + allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t - three_e_5_idx_exch12_bi_ort = 0.d0 print *, ' Providing the three_e_5_idx_exch12_bi_ort ...' call wall_time(wall0) - provide mos_r_in_r_array_transp mos_l_in_r_array_transp + do m = 1, mo_num !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_exch12_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + !$OMP PRIVATE (i,l,ipoint) & + !$OMP SHARED (m,mo_num,n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP m2grad_r, m2grad_l, grad_mli, tmp_mat, orb_mat) + !$OMP DO COLLAPSE(2) + do i=1,mo_num + do l=1,mo_num + do ipoint=1, n_points_final_grid + grad_mli(ipoint,l,i) = final_weight_at_r_vector(ipoint) * ( & + int2_grad1_u12_bimo_t(ipoint,1,m,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + & + int2_grad1_u12_bimo_t(ipoint,2,m,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + & + int2_grad1_u12_bimo_t(ipoint,3,m,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) ) + m2grad_l(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) + m2grad_l(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) + m2grad_l(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) + m2grad_r(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + m2grad_r(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + m2grad_r(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) + orb_mat(ipoint,l,i) = mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, n_points_final_grid, 1.d0, & + orb_mat, n_points_final_grid, & + grad_mli, n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,l) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, m, i, j, integral) - three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo + three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = - tmp_mat(l,i,k,j) - tmp_mat(k,j,l,i) enddo enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END PARALLEL DO + + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & + m2grad_l, 3*n_points_final_grid, & + m2grad_r, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,l) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = & + three_e_5_idx_exch12_bi_ort(m,l,j,k,i) - tmp_mat(l,i,k,j) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + enddo call wall_time(wall1) print *, ' wall time for three_e_5_idx_exch12_bi_ort', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f index 21179dac..b548b18a 100644 --- a/src/utils/integration.irp.f +++ b/src/utils/integration.irp.f @@ -484,6 +484,30 @@ subroutine multiply_poly(b,nb,c,nc,d,nd) integer :: ib, ic, id, k if(ior(nc,nb) < 0) return !False if nc>=0 and nb>=0 + select case (nb) + case (0) + call multiply_poly_b0(b,c,nc,d,nd) + return + case (1) + call multiply_poly_b1(b,c,nc,d,nd) + return + case (2) + call multiply_poly_b2(b,c,nc,d,nd) + return + end select + + select case (nc) + case (0) + call multiply_poly_c0(b,nb,c,d,nd) + return + case (1) + call multiply_poly_c1(b,nb,c,d,nd) + return + case (2) + call multiply_poly_c2(b,nb,c,d,nd) + return + end select + do ib=0,nb do ic = 0,nc d(ib+ic) = d(ib+ic) + c(ic) * b(ib) From fb5300a8e59d4dc08c4ce118317f4deffef7daba Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 2 Jun 2023 08:51:04 +0200 Subject: [PATCH 140/337] Preparing for optimization of 5idx in TC --- external/qp2-dependencies | 2 +- src/bi_ort_ints/three_body_ijmkl.irp.f | 58 +--- src/bi_ort_ints/three_body_ijmkl_old.irp.f | 295 +++++++++++++++++++++ 3 files changed, 303 insertions(+), 52 deletions(-) create mode 100644 src/bi_ort_ints/three_body_ijmkl_old.irp.f diff --git a/external/qp2-dependencies b/external/qp2-dependencies index 6e23ebac..e0d0e02e 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit 6e23ebac001acae91d1c762ca934e09a9b7d614a +Subproject commit e0d0e02e9f5ece138d1520106954a881ab0b8db2 diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index af2cb353..5220d8c7 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -245,56 +245,6 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m, l - double precision :: integral, wall1, wall0 - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t - - three_e_5_idx_exch12_bi_ort_old = 0.d0 - print *, ' Providing the three_e_5_idx_exch12_bi_ort_old ...' - call wall_time(wall0) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_exch12_bi_ort_old) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, m, i, j, integral) - three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_exch12_bi_ort_old', wall1 - wall0 - -END_PROVIDER - -! --- - BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC @@ -305,6 +255,12 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, ! ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign ! + ! Equivalent to: + ! + ! call give_integrals_3_body_bi_ort(m, l, k, m, i, j, integral) + ! + ! three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + ! END_DOC implicit none @@ -314,10 +270,10 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, double precision :: weight double precision, allocatable :: grad_mli(:,:,:), m2grad_r(:,:,:,:), m2grad_l(:,:,:,:) double precision, allocatable :: tmp_mat(:,:,:,:), orb_mat(:,:,:) - allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) allocate(m2grad_r(n_points_final_grid,3,mo_num,mo_num)) allocate(m2grad_l(n_points_final_grid,3,mo_num,mo_num)) allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) + allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) provide mos_r_in_r_array_transp mos_l_in_r_array_transp diff --git a/src/bi_ort_ints/three_body_ijmkl_old.irp.f b/src/bi_ort_ints/three_body_ijmkl_old.irp.f new file mode 100644 index 00000000..105cd179 --- /dev/null +++ b/src/bi_ort_ints/three_body_ijmkl_old.irp.f @@ -0,0 +1,295 @@ + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0 + + three_e_5_idx_direct_bi_ort_old = 0.d0 + print *, ' Providing the three_e_5_idx_direct_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_direct_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, m, j, i, integral) + three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_direct_bi_ort_old', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_cycle_1_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0 + + three_e_5_idx_cycle_1_bi_ort_old = 0.d0 + print *, ' Providing the three_e_5_idx_cycle_1_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_cycle_1_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, j, i, m, integral) + three_e_5_idx_cycle_1_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_cycle_1_bi_ort_old', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_cycle_2_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0 + + three_e_5_idx_cycle_2_bi_ort_old = 0.d0 + print *, ' Providing the three_e_5_idx_cycle_2_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_cycle_2_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + do l = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, i, m, j, integral) + three_e_5_idx_cycle_2_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_cycle_2_bi_ort_old', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_exch23_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0 + + three_e_5_idx_exch23_bi_ort_old = 0.d0 + print *, ' Providing the three_e_5_idx_exch23_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_exch23_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, j, m, i, integral) + three_e_5_idx_exch23_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_exch23_bi_ort_old', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_exch13_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0 + + three_e_5_idx_exch13_bi_ort_old = 0.d0 + print *, ' Providing the three_e_5_idx_exch13_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_exch13_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, i, j, m, integral) + three_e_5_idx_exch13_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_exch13_bi_ort_old', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0 + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t + + three_e_5_idx_exch12_bi_ort_old = 0.d0 + print *, ' Providing the three_e_5_idx_exch12_bi_ort_old ...' + call wall_time(wall0) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_exch12_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, m, i, j, integral) + three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_exch12_bi_ort_old', wall1 - wall0 + +END_PROVIDER + From c4612318ae9cce73c3cf668703827eb9c7bfd093 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 2 Jun 2023 09:11:32 +0200 Subject: [PATCH 141/337] Optimized direct 5idx --- src/bi_ort_ints/bi_ort_ints.irp.f | 62 ++- src/bi_ort_ints/three_body_ijmkl.irp.f | 500 ++++++++++++------------- 2 files changed, 297 insertions(+), 265 deletions(-) diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index 63b2aa8c..d0367f6f 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -7,7 +7,8 @@ program bi_ort_ints my_n_pt_r_grid = 10 my_n_pt_a_grid = 14 touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - call test_3e +! call test_3e + call test_5idx end subroutine test_3e @@ -19,15 +20,13 @@ subroutine test_3e n = 0 accu = 0.d0 do i = 1, mo_num - do k = 1, mo_num + do k = 1, mo_num do j = 1, mo_num - do l = 1, mo_num + do l = 1, mo_num do m = 1, mo_num - new = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) - ref = three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) -! do n = 1, mo_num -! call give_integrals_3_body_bi_ort(n, l, k, m, j, i, new) -! call give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, ref) + do n = 1, mo_num + call give_integrals_3_body_bi_ort(n, l, k, m, j, i, new) + call give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, ref) contrib = dabs(new - ref) accu += contrib if(contrib .gt. 1.d-10)then @@ -36,7 +35,7 @@ subroutine test_3e print*,ref,new,contrib stop endif -! enddo + enddo enddo enddo enddo @@ -46,3 +45,48 @@ subroutine test_3e end + +subroutine test_5idx + implicit none + integer :: i,k,j,l,m,n,ipoint + double precision :: accu, contrib,new,ref + i = 1 + k = 1 + n = 0 + accu = 0.d0 + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + new = three_e_5_idx_direct_bi_ort(m,l,j,k,i) + ref = three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. 1.d-10)then + print*,'direct' + print*,i,k,j,l,m + print*,ref,new,contrib + stop + endif + +! new = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'exch12' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif + + enddo + enddo + enddo + enddo + enddo + print*,'accu = ',accu/dble(mo_num)**5 + + +end diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index 5220d8c7..1db773f1 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -1,7 +1,8 @@ ! --- -BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] + BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC ! @@ -12,257 +13,6 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign END_DOC - implicit none - integer :: i, j, k, m, l - double precision :: integral, wall1, wall0 - - three_e_5_idx_direct_bi_ort = 0.d0 - print *, ' Providing the three_e_5_idx_direct_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_direct_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, m, j, i, integral) - three_e_5_idx_direct_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_direct_bi_ort', wall1 - wall0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m, l - double precision :: integral, wall1, wall0 - - three_e_5_idx_cycle_1_bi_ort = 0.d0 - print *, ' Providing the three_e_5_idx_cycle_1_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_cycle_1_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, j, i, m, integral) - three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_cycle_1_bi_ort', wall1 - wall0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m, l - double precision :: integral, wall1, wall0 - - three_e_5_idx_cycle_2_bi_ort = 0.d0 - print *, ' Providing the three_e_5_idx_cycle_2_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_cycle_2_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - do l = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, i, m, j, integral) - three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_cycle_2_bi_ort', wall1 - wall0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_exch23_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m, l - double precision :: integral, wall1, wall0 - - three_e_5_idx_exch23_bi_ort = 0.d0 - print *, ' Providing the three_e_5_idx_exch23_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_exch23_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, j, m, i, integral) - three_e_5_idx_exch23_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_exch23_bi_ort', wall1 - wall0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m, l - double precision :: integral, wall1, wall0 - - three_e_5_idx_exch13_bi_ort = 0.d0 - print *, ' Providing the three_e_5_idx_exch13_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_exch13_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, i, j, m, integral) - three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_exch13_bi_ort', wall1 - wall0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - ! Equivalent to: - ! - ! call give_integrals_3_body_bi_ort(m, l, k, m, i, j, integral) - ! - ! three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) = -1.d0 * integral - ! - END_DOC - implicit none integer :: i, j, k, m, l double precision :: wall1, wall0 @@ -279,7 +29,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, provide mos_r_in_r_array_transp mos_l_in_r_array_transp PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t - print *, ' Providing the three_e_5_idx_exch12_bi_ort ...' + print *, ' Providing the three_e_5_idx_direct_bi_ort ...' call wall_time(wall0) do m = 1, mo_num @@ -322,6 +72,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, do k = 1, mo_num do j = 1, mo_num do l = 1, mo_num + three_e_5_idx_direct_bi_ort(m,l,j,k,i) = - tmp_mat(l,j,k,i) - tmp_mat(k,i,l,j) three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = - tmp_mat(l,i,k,j) - tmp_mat(k,j,l,i) enddo enddo @@ -339,8 +90,8 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, do k = 1, mo_num do j = 1, mo_num do l = 1, mo_num - three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = & - three_e_5_idx_exch12_bi_ort(m,l,j,k,i) - tmp_mat(l,i,k,j) + three_e_5_idx_direct_bi_ort(m,l,j,k,i) = three_e_5_idx_direct_bi_ort(m,l,j,k,i) - tmp_mat(l,j,k,i) + three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) - tmp_mat(l,i,k,j) enddo enddo enddo @@ -350,9 +101,246 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, enddo call wall_time(wall1) - print *, ' wall time for three_e_5_idx_exch12_bi_ort', wall1 - wall0 + print *, ' wall time for three_e_5_idx_direct_bi_ort', wall1 - wall0 END_PROVIDER ! --- +BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + double precision :: integral + integer :: i, j, k, m, l + double precision :: wall1, wall0 + integer :: ipoint + double precision :: weight + double precision, allocatable :: grad_mli(:,:,:), m2grad_r(:,:,:,:), m2grad_l(:,:,:,:) + double precision, allocatable :: tmp_mat(:,:,:,:), orb_mat(:,:,:) + allocate(m2grad_r(n_points_final_grid,3,mo_num,mo_num)) + allocate(m2grad_l(n_points_final_grid,3,mo_num,mo_num)) + allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) + allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) + allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t + + print *, ' Providing the three_e_5_idx_cycle_1_bi_ort ...' + call wall_time(wall0) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_cycle_1_bi_ort) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, j, i, m, integral) + three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_cycle_1_bi_ort', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + double precision :: integral + integer :: i, j, k, m, l + double precision :: wall1, wall0 + integer :: ipoint + double precision :: weight + double precision, allocatable :: grad_mli(:,:,:), m2grad_r(:,:,:,:), m2grad_l(:,:,:,:) + double precision, allocatable :: tmp_mat(:,:,:,:), orb_mat(:,:,:) + allocate(m2grad_r(n_points_final_grid,3,mo_num,mo_num)) + allocate(m2grad_l(n_points_final_grid,3,mo_num,mo_num)) + allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) + allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) + allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t + + print *, ' Providing the three_e_5_idx_cycle_2_bi_ort ...' + call wall_time(wall0) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_cycle_2_bi_ort) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + do l = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, i, m, j, integral) + three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_cycle_2_bi_ort', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_exch23_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + double precision :: integral + integer :: i, j, k, m, l + double precision :: wall1, wall0 + integer :: ipoint + double precision :: weight + double precision, allocatable :: grad_mli(:,:,:), m2grad_r(:,:,:,:), m2grad_l(:,:,:,:) + double precision, allocatable :: tmp_mat(:,:,:,:), orb_mat(:,:,:) + allocate(m2grad_r(n_points_final_grid,3,mo_num,mo_num)) + allocate(m2grad_l(n_points_final_grid,3,mo_num,mo_num)) + allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) + allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) + allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t + + print *, ' Providing the three_e_5_idx_exch23_bi_ort ...' + call wall_time(wall0) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_exch23_bi_ort) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, j, m, i, integral) + three_e_5_idx_exch23_bi_ort(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_exch23_bi_ort', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + double precision :: integral + integer :: i, j, k, m, l + double precision :: wall1, wall0 + integer :: ipoint + double precision :: weight + double precision, allocatable :: grad_mli(:,:,:), m2grad_r(:,:,:,:), m2grad_l(:,:,:,:) + double precision, allocatable :: tmp_mat(:,:,:,:), orb_mat(:,:,:) + allocate(m2grad_r(n_points_final_grid,3,mo_num,mo_num)) + allocate(m2grad_l(n_points_final_grid,3,mo_num,mo_num)) + allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) + allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) + allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t + + print *, ' Providing the three_e_5_idx_exch13_bi_ort ...' + call wall_time(wall0) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_exch13_bi_ort) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, i, j, m, integral) + three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_exch13_bi_ort', wall1 - wall0 + +END_PROVIDER + +! --- + + From 00bd8e2fcc8d435a1484af065a443efee3ca3c9f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 2 Jun 2023 10:34:05 +0200 Subject: [PATCH 142/337] Optimized cyclic 5idx --- src/bi_ort_ints/bi_ort_ints.irp.f | 65 ++++- src/bi_ort_ints/three_body_ijmkl.irp.f | 288 +++++++------------ src/bi_ort_ints/three_body_ints_bi_ort.irp.f | 1 + 3 files changed, 152 insertions(+), 202 deletions(-) diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index d0367f6f..eae0affe 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -59,17 +59,18 @@ subroutine test_5idx do j = 1, mo_num do l = 1, mo_num do m = 1, mo_num - new = three_e_5_idx_direct_bi_ort(m,l,j,k,i) - ref = three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) - contrib = dabs(new - ref) - accu += contrib - if(contrib .gt. 1.d-10)then - print*,'direct' - print*,i,k,j,l,m - print*,ref,new,contrib - stop - endif +! new = three_e_5_idx_direct_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'direct' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif +! ! new = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) ! ref = three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) ! contrib = dabs(new - ref) @@ -81,6 +82,50 @@ subroutine test_5idx ! stop ! endif +! new = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_cycle_1_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'cycle1' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif + +! new = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_cycle_2_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'cycle2' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif + +! new = three_e_5_idx_exch23_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_exch23_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'exch23' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif + + new = three_e_5_idx_exch13_bi_ort(m,l,j,k,i) + ref = three_e_5_idx_exch13_bi_ort_old(m,l,j,k,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. 1.d-10)then + print*,'exch13' + print*,i,k,j,l,m + print*,ref,new,contrib + stop + endif + enddo enddo enddo diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index 1db773f1..9f316771 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -1,4 +1,3 @@ - ! --- BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] @@ -17,7 +16,6 @@ integer :: i, j, k, m, l double precision :: wall1, wall0 integer :: ipoint - double precision :: weight double precision, allocatable :: grad_mli(:,:,:), m2grad_r(:,:,:,:), m2grad_l(:,:,:,:) double precision, allocatable :: tmp_mat(:,:,:,:), orb_mat(:,:,:) allocate(m2grad_r(n_points_final_grid,3,mo_num,mo_num)) @@ -45,17 +43,22 @@ do i=1,mo_num do l=1,mo_num do ipoint=1, n_points_final_grid + grad_mli(ipoint,l,i) = final_weight_at_r_vector(ipoint) * ( & int2_grad1_u12_bimo_t(ipoint,1,m,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + & int2_grad1_u12_bimo_t(ipoint,2,m,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + & int2_grad1_u12_bimo_t(ipoint,3,m,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) ) + + orb_mat(ipoint,l,i) = mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) + m2grad_l(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) m2grad_l(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) m2grad_l(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) + m2grad_r(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) m2grad_r(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) m2grad_r(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) - orb_mat(ipoint,l,i) = mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) + enddo enddo enddo @@ -107,240 +110,141 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - double precision :: integral - integer :: i, j, k, m, l - double precision :: wall1, wall0 - integer :: ipoint - double precision :: weight - double precision, allocatable :: grad_mli(:,:,:), m2grad_r(:,:,:,:), m2grad_l(:,:,:,:) - double precision, allocatable :: tmp_mat(:,:,:,:), orb_mat(:,:,:) - allocate(m2grad_r(n_points_final_grid,3,mo_num,mo_num)) - allocate(m2grad_l(n_points_final_grid,3,mo_num,mo_num)) - allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) - allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) - allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t - - print *, ' Providing the three_e_5_idx_cycle_1_bi_ort ...' - call wall_time(wall0) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_cycle_1_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, j, i, m, integral) - three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_cycle_1_bi_ort', wall1 - wall0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - double precision :: integral - integer :: i, j, k, m, l - double precision :: wall1, wall0 - integer :: ipoint - double precision :: weight - double precision, allocatable :: grad_mli(:,:,:), m2grad_r(:,:,:,:), m2grad_l(:,:,:,:) - double precision, allocatable :: tmp_mat(:,:,:,:), orb_mat(:,:,:) - allocate(m2grad_r(n_points_final_grid,3,mo_num,mo_num)) - allocate(m2grad_l(n_points_final_grid,3,mo_num,mo_num)) - allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) - allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) - allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t - - print *, ' Providing the three_e_5_idx_cycle_2_bi_ort ...' - call wall_time(wall0) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_cycle_2_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - do l = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, i, m, j, integral) - three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_cycle_2_bi_ort', wall1 - wall0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] + BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC ! ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs ! - ! three_e_5_idx_exch23_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_5_idx_direct_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO ! ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! END_DOC implicit none - double precision :: integral integer :: i, j, k, m, l double precision :: wall1, wall0 integer :: ipoint - double precision :: weight - double precision, allocatable :: grad_mli(:,:,:), m2grad_r(:,:,:,:), m2grad_l(:,:,:,:) - double precision, allocatable :: tmp_mat(:,:,:,:), orb_mat(:,:,:) - allocate(m2grad_r(n_points_final_grid,3,mo_num,mo_num)) - allocate(m2grad_l(n_points_final_grid,3,mo_num,mo_num)) + double precision, allocatable :: lk_grad_mi(:,:,:,:), rk_grad_im(:,:,:,:) + double precision, allocatable :: lm_grad_ik(:,:,:,:), rm_grad_ik(:,:,:,:) + double precision, allocatable :: tmp_mat(:,:,:,:) + allocate(lk_grad_mi(n_points_final_grid,3,mo_num,mo_num)) + allocate(lm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) + allocate(rk_grad_im(n_points_final_grid,3,mo_num,mo_num)) + allocate(rm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) - allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) - allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) provide mos_r_in_r_array_transp mos_l_in_r_array_transp PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t - print *, ' Providing the three_e_5_idx_exch23_bi_ort ...' + print *, ' Providing the three_e_5_idx_cycle_bi_ort ...' call wall_time(wall0) + do m = 1, mo_num + !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_exch23_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + !$OMP PRIVATE (i,l,ipoint) & + !$OMP SHARED (m,mo_num,n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP rk_grad_im, rm_grad_ik, lk_grad_mi, lm_grad_ik, tmp_mat) + !$OMP DO COLLAPSE(2) + do i=1,mo_num + do l=1,mo_num + do ipoint=1, n_points_final_grid + lk_grad_mi(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,m,i) * final_weight_at_r_vector(ipoint) + lk_grad_mi(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,m,i) * final_weight_at_r_vector(ipoint) + lk_grad_mi(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,m,i) * final_weight_at_r_vector(ipoint) + + lm_grad_ik(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) + + rm_grad_ik(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + rm_grad_ik(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + rm_grad_ik(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) + + rk_grad_im(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,i,m) + rk_grad_im(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,i,m) + rk_grad_im(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,i,m) + + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & + lk_grad_mi, 3*n_points_final_grid, & + rm_grad_ik, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,l) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, j, m, i, integral) - three_e_5_idx_exch23_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo + three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = -tmp_mat(k,j,l,i) + three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = -tmp_mat(l,i,k,j) + three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = -tmp_mat(l,j,k,i) + three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = -tmp_mat(k,i,l,j) enddo enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END PARALLEL DO - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_exch23_bi_ort', wall1 - wall0 + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & + lk_grad_mi, 3*n_points_final_grid, & + rk_grad_im, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - double precision :: integral - integer :: i, j, k, m, l - double precision :: wall1, wall0 - integer :: ipoint - double precision :: weight - double precision, allocatable :: grad_mli(:,:,:), m2grad_r(:,:,:,:), m2grad_l(:,:,:,:) - double precision, allocatable :: tmp_mat(:,:,:,:), orb_mat(:,:,:) - allocate(m2grad_r(n_points_final_grid,3,mo_num,mo_num)) - allocate(m2grad_l(n_points_final_grid,3,mo_num,mo_num)) - allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) - allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) - allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t - - print *, ' Providing the three_e_5_idx_exch13_bi_ort ...' - call wall_time(wall0) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_exch13_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + !$OMP PARALLEL DO PRIVATE(i,j,k,l) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, i, j, m, integral) - three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo + three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - tmp_mat(l,j,i,k) + three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) - tmp_mat(k,i,j,l) + three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = three_e_5_idx_exch23_bi_ort (m,l,j,k,i) - tmp_mat(k,j,i,l) + three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = three_e_5_idx_exch13_bi_ort (m,l,j,k,i) - tmp_mat(l,i,j,k) enddo enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END PARALLEL DO + + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & + lm_grad_ik, 3*n_points_final_grid, & + rk_grad_im, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,l) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - tmp_mat(l,i,j,k) + three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) - tmp_mat(k,j,i,l) + three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = three_e_5_idx_exch23_bi_ort (m,l,j,k,i) - tmp_mat(k,i,j,l) + three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = three_e_5_idx_exch13_bi_ort (m,l,j,k,i) - tmp_mat(l,j,i,k) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + enddo call wall_time(wall1) - print *, ' wall time for three_e_5_idx_exch13_bi_ort', wall1 - wall0 + print *, ' wall time for three_e_5_idx_cycle_bi_ort', wall1 - wall0 END_PROVIDER ! --- + diff --git a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f index a72cd682..1962c8d6 100644 --- a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f +++ b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f @@ -85,6 +85,7 @@ subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral) PROVIDE int2_grad1_u12_bimo_t integral = 0.d0 + ! (n, l, k, m, j, i) do ipoint = 1, n_points_final_grid tmp = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & From b2c005eccb7e05eaebb59e9dcbc3c0e771c9a87f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 2 Jun 2023 11:08:30 +0200 Subject: [PATCH 143/337] Finished optimizing 5idx --- src/bi_ort_ints/bi_ort_ints.irp.f | 102 +++++++++---------- src/bi_ort_ints/three_body_ijmkl.irp.f | 135 +++++++------------------ 2 files changed, 87 insertions(+), 150 deletions(-) diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index eae0affe..5653a2e2 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -60,60 +60,60 @@ subroutine test_5idx do l = 1, mo_num do m = 1, mo_num -! new = three_e_5_idx_direct_bi_ort(m,l,j,k,i) -! ref = three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) -! contrib = dabs(new - ref) -! accu += contrib -! if(contrib .gt. 1.d-10)then -! print*,'direct' -! print*,i,k,j,l,m -! print*,ref,new,contrib -! stop -! endif + new = three_e_5_idx_direct_bi_ort(m,l,j,k,i) + ref = three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. 1.d-10)then + print*,'direct' + print*,i,k,j,l,m + print*,ref,new,contrib + stop + endif + + new = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) + ref = three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. 1.d-10)then + print*,'exch12' + print*,i,k,j,l,m + print*,ref,new,contrib + stop + endif ! -! new = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) -! ref = three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) -! contrib = dabs(new - ref) -! accu += contrib -! if(contrib .gt. 1.d-10)then -! print*,'exch12' -! print*,i,k,j,l,m -! print*,ref,new,contrib -! stop -! endif + new = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) + ref = three_e_5_idx_cycle_1_bi_ort_old(m,l,j,k,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. 1.d-10)then + print*,'cycle1' + print*,i,k,j,l,m + print*,ref,new,contrib + stop + endif -! new = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) -! ref = three_e_5_idx_cycle_1_bi_ort_old(m,l,j,k,i) -! contrib = dabs(new - ref) -! accu += contrib -! if(contrib .gt. 1.d-10)then -! print*,'cycle1' -! print*,i,k,j,l,m -! print*,ref,new,contrib -! stop -! endif + new = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) + ref = three_e_5_idx_cycle_2_bi_ort_old(m,l,j,k,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. 1.d-10)then + print*,'cycle2' + print*,i,k,j,l,m + print*,ref,new,contrib + stop + endif -! new = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) -! ref = three_e_5_idx_cycle_2_bi_ort_old(m,l,j,k,i) -! contrib = dabs(new - ref) -! accu += contrib -! if(contrib .gt. 1.d-10)then -! print*,'cycle2' -! print*,i,k,j,l,m -! print*,ref,new,contrib -! stop -! endif - -! new = three_e_5_idx_exch23_bi_ort(m,l,j,k,i) -! ref = three_e_5_idx_exch23_bi_ort_old(m,l,j,k,i) -! contrib = dabs(new - ref) -! accu += contrib -! if(contrib .gt. 1.d-10)then -! print*,'exch23' -! print*,i,k,j,l,m -! print*,ref,new,contrib -! stop -! endif + new = three_e_5_idx_exch23_bi_ort(m,l,j,k,i) + ref = three_e_5_idx_exch23_bi_ort_old(m,l,j,k,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. 1.d-10)then + print*,'exch23' + print*,i,k,j,l,m + print*,ref,new,contrib + stop + endif new = three_e_5_idx_exch13_bi_ort(m,l,j,k,i) ref = three_e_5_idx_exch13_bi_ort_old(m,l,j,k,i) diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index 9f316771..c9e88ab9 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -1,7 +1,11 @@ ! --- - BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] -&BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] + BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC ! @@ -16,18 +20,22 @@ integer :: i, j, k, m, l double precision :: wall1, wall0 integer :: ipoint - double precision, allocatable :: grad_mli(:,:,:), m2grad_r(:,:,:,:), m2grad_l(:,:,:,:) - double precision, allocatable :: tmp_mat(:,:,:,:), orb_mat(:,:,:) - allocate(m2grad_r(n_points_final_grid,3,mo_num,mo_num)) - allocate(m2grad_l(n_points_final_grid,3,mo_num,mo_num)) - allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) + double precision, allocatable :: grad_mli(:,:,:), orb_mat(:,:,:) + double precision, allocatable :: lk_grad_mi(:,:,:,:), rk_grad_im(:,:,:,:) + double precision, allocatable :: lm_grad_ik(:,:,:,:), rm_grad_ik(:,:,:,:) + double precision, allocatable :: tmp_mat(:,:,:,:) + allocate(rm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) + allocate(lm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) + allocate(lk_grad_mi(n_points_final_grid,3,mo_num,mo_num)) + allocate(rk_grad_im(n_points_final_grid,3,mo_num,mo_num)) allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) + allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) provide mos_r_in_r_array_transp mos_l_in_r_array_transp PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t - print *, ' Providing the three_e_5_idx_direct_bi_ort ...' + print *, ' Providing the three_e_5_idx_bi_ort ...' call wall_time(wall0) do m = 1, mo_num @@ -38,7 +46,8 @@ !$OMP SHARED (m,mo_num,n_points_final_grid, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP m2grad_r, m2grad_l, grad_mli, tmp_mat, orb_mat) + !$OMP rm_grad_ik, lm_grad_ik, rk_grad_im, lk_grad_mi, & + !$OMP grad_mli, tmp_mat, orb_mat) !$OMP DO COLLAPSE(2) do i=1,mo_num do l=1,mo_num @@ -51,13 +60,21 @@ orb_mat(ipoint,l,i) = mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) - m2grad_l(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) - m2grad_l(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) - m2grad_l(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) - m2grad_r(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) - m2grad_r(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) - m2grad_r(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) + rm_grad_ik(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + rm_grad_ik(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + rm_grad_ik(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) + + lk_grad_mi(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,m,i) * final_weight_at_r_vector(ipoint) + lk_grad_mi(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,m,i) * final_weight_at_r_vector(ipoint) + lk_grad_mi(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,m,i) * final_weight_at_r_vector(ipoint) + + rk_grad_im(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,i,m) + rk_grad_im(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,i,m) + rk_grad_im(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,i,m) enddo enddo @@ -84,8 +101,8 @@ !$OMP END PARALLEL DO call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & - m2grad_l, 3*n_points_final_grid, & - m2grad_r, 3*n_points_final_grid, 0.d0, & + lm_grad_ik, 3*n_points_final_grid, & + rm_grad_ik, 3*n_points_final_grid, 0.d0, & tmp_mat, mo_num*mo_num) !$OMP PARALLEL DO PRIVATE(i,j,k,l) @@ -101,83 +118,6 @@ enddo !$OMP END PARALLEL DO - enddo - - call wall_time(wall1) - print *, ' wall time for three_e_5_idx_direct_bi_ort', wall1 - wall0 - -END_PROVIDER - -! --- - - BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] -&BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] -&BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] -&BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_5_idx_direct_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - END_DOC - - implicit none - integer :: i, j, k, m, l - double precision :: wall1, wall0 - integer :: ipoint - double precision, allocatable :: lk_grad_mi(:,:,:,:), rk_grad_im(:,:,:,:) - double precision, allocatable :: lm_grad_ik(:,:,:,:), rm_grad_ik(:,:,:,:) - double precision, allocatable :: tmp_mat(:,:,:,:) - allocate(lk_grad_mi(n_points_final_grid,3,mo_num,mo_num)) - allocate(lm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) - allocate(rk_grad_im(n_points_final_grid,3,mo_num,mo_num)) - allocate(rm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) - allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t - - print *, ' Providing the three_e_5_idx_cycle_bi_ort ...' - call wall_time(wall0) - - do m = 1, mo_num - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,l,ipoint) & - !$OMP SHARED (m,mo_num,n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP rk_grad_im, rm_grad_ik, lk_grad_mi, lm_grad_ik, tmp_mat) - !$OMP DO COLLAPSE(2) - do i=1,mo_num - do l=1,mo_num - do ipoint=1, n_points_final_grid - lk_grad_mi(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,m,i) * final_weight_at_r_vector(ipoint) - lk_grad_mi(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,m,i) * final_weight_at_r_vector(ipoint) - lk_grad_mi(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,m,i) * final_weight_at_r_vector(ipoint) - - lm_grad_ik(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) - lm_grad_ik(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) - lm_grad_ik(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) - - rm_grad_ik(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) - rm_grad_ik(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) - rm_grad_ik(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) - - rk_grad_im(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,i,m) - rk_grad_im(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,i,m) - rk_grad_im(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,i,m) - - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & lk_grad_mi, 3*n_points_final_grid, & rm_grad_ik, 3*n_points_final_grid, 0.d0, & @@ -237,14 +177,11 @@ END_PROVIDER enddo enddo !$OMP END PARALLEL DO + enddo call wall_time(wall1) - print *, ' wall time for three_e_5_idx_cycle_bi_ort', wall1 - wall0 + print *, ' wall time for three_e_5_idx_bi_ort', wall1 - wall0 END_PROVIDER -! --- - - - From 896ac96e7e7339c710b6325972e878d94003b9e5 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 2 Jun 2023 11:40:21 +0200 Subject: [PATCH 144/337] Reduced memory in 5idx --- src/bi_ort_ints/bi_ort_ints.irp.f | 5 + src/bi_ort_ints/three_body_ijmkl.irp.f | 139 ++++++++++++++++--------- 2 files changed, 96 insertions(+), 48 deletions(-) diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index 5653a2e2..f7a42f37 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -9,6 +9,11 @@ program bi_ort_ints touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid ! call test_3e call test_5idx +! call test_5idx2 +end + +subroutine test_5idx2 + PROVIDE three_e_5_idx_cycle_2_bi_ort end subroutine test_3e diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index c9e88ab9..bd669163 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -24,12 +24,6 @@ double precision, allocatable :: lk_grad_mi(:,:,:,:), rk_grad_im(:,:,:,:) double precision, allocatable :: lm_grad_ik(:,:,:,:), rm_grad_ik(:,:,:,:) double precision, allocatable :: tmp_mat(:,:,:,:) - allocate(rm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) - allocate(lm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) - allocate(lk_grad_mi(n_points_final_grid,3,mo_num,mo_num)) - allocate(rk_grad_im(n_points_final_grid,3,mo_num,mo_num)) - allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) - allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) provide mos_r_in_r_array_transp mos_l_in_r_array_transp @@ -40,14 +34,15 @@ do m = 1, mo_num + allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) + allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,l,ipoint) & !$OMP SHARED (m,mo_num,n_points_final_grid, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP rm_grad_ik, lm_grad_ik, rk_grad_im, lk_grad_mi, & - !$OMP grad_mli, tmp_mat, orb_mat) + !$OMP grad_mli, orb_mat) !$OMP DO COLLAPSE(2) do i=1,mo_num do l=1,mo_num @@ -60,22 +55,6 @@ orb_mat(ipoint,l,i) = mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) - lm_grad_ik(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) - lm_grad_ik(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) - lm_grad_ik(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) - - rm_grad_ik(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) - rm_grad_ik(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) - rm_grad_ik(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) - - lk_grad_mi(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,m,i) * final_weight_at_r_vector(ipoint) - lk_grad_mi(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,m,i) * final_weight_at_r_vector(ipoint) - lk_grad_mi(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,m,i) * final_weight_at_r_vector(ipoint) - - rk_grad_im(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,i,m) - rk_grad_im(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,i,m) - rk_grad_im(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,i,m) - enddo enddo enddo @@ -100,6 +79,41 @@ enddo !$OMP END PARALLEL DO + deallocate(orb_mat,grad_mli) + + allocate(lm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) + allocate(rm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) + allocate(rk_grad_im(n_points_final_grid,3,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,l,ipoint) & + !$OMP SHARED (m,mo_num,n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP rm_grad_ik, lm_grad_ik, rk_grad_im, lk_grad_mi) + !$OMP DO COLLAPSE(2) + do i=1,mo_num + do l=1,mo_num + do ipoint=1, n_points_final_grid + + lm_grad_ik(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) + + rm_grad_ik(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + rm_grad_ik(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + rm_grad_ik(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) + + rk_grad_im(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,i,m) + rk_grad_im(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,i,m) + rk_grad_im(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,i,m) + + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & lm_grad_ik, 3*n_points_final_grid, & rm_grad_ik, 3*n_points_final_grid, 0.d0, & @@ -118,6 +132,52 @@ enddo !$OMP END PARALLEL DO + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & + lm_grad_ik, 3*n_points_final_grid, & + rk_grad_im, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,l) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = - tmp_mat(l,i,j,k) + three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = - tmp_mat(k,j,i,l) + three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = - tmp_mat(k,i,j,l) + three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = - tmp_mat(l,j,i,k) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(lm_grad_ik) + + allocate(lk_grad_mi(n_points_final_grid,3,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,l,ipoint) & + !$OMP SHARED (m,mo_num,n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP lk_grad_mi) + !$OMP DO COLLAPSE(2) + do i=1,mo_num + do l=1,mo_num + do ipoint=1, n_points_final_grid + + lk_grad_mi(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,m,i) * final_weight_at_r_vector(ipoint) + lk_grad_mi(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,m,i) * final_weight_at_r_vector(ipoint) + lk_grad_mi(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,m,i) * final_weight_at_r_vector(ipoint) + + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & lk_grad_mi, 3*n_points_final_grid, & rm_grad_ik, 3*n_points_final_grid, 0.d0, & @@ -128,10 +188,10 @@ do k = 1, mo_num do j = 1, mo_num do l = 1, mo_num - three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = -tmp_mat(k,j,l,i) - three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = -tmp_mat(l,i,k,j) - three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = -tmp_mat(l,j,k,i) - three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = -tmp_mat(k,i,l,j) + three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - tmp_mat(k,j,l,i) + three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) - tmp_mat(l,i,k,j) + three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = three_e_5_idx_exch23_bi_ort (m,l,j,k,i) - tmp_mat(l,j,k,i) + three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = three_e_5_idx_exch13_bi_ort (m,l,j,k,i) - tmp_mat(k,i,l,j) enddo enddo enddo @@ -158,26 +218,9 @@ enddo !$OMP END PARALLEL DO - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & - lm_grad_ik, 3*n_points_final_grid, & - rk_grad_im, 3*n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - tmp_mat(l,i,j,k) - three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) - tmp_mat(k,j,i,l) - three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = three_e_5_idx_exch23_bi_ort (m,l,j,k,i) - tmp_mat(k,i,j,l) - three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = three_e_5_idx_exch13_bi_ort (m,l,j,k,i) - tmp_mat(l,j,i,k) - enddo - enddo - enddo - enddo - !$OMP END PARALLEL DO - + deallocate(lk_grad_mi) + deallocate(rm_grad_ik) + deallocate(rk_grad_im) enddo call wall_time(wall1) From 81b7751b00f54a988e2df30fb92edc98f0e49474 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 2 Jun 2023 16:10:04 +0200 Subject: [PATCH 145/337] Fix bug in number of args --- src/ccsd/ccsd_t_space_orb_stoch.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f index 1f3bebc2..b669025e 100644 --- a/src/ccsd/ccsd_t_space_orb_stoch.irp.f +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -198,7 +198,7 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ allocate (bounds(2,nbuckets)) do isample=1,nbuckets eta = 1.d0/dble(nbuckets) * dble(isample) - ieta = binary_search(waccu,eta,Nabc,ileft,iright) + ieta = binary_search(waccu,eta,Nabc) bounds(1,isample) = ileft bounds(2,isample) = ieta ileft = ieta+1 From 6d01eb42ca24a4265710b20913d64c9fb3117298 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Fri, 2 Jun 2023 20:16:39 +0200 Subject: [PATCH 146/337] print mem details --- src/bi_ort_ints/three_body_ijm.irp.f | 7 +++++++ src/bi_ort_ints/three_body_ijmk.irp.f | 6 ++++++ src/bi_ort_ints/three_body_ijmkl.irp.f | 6 ++++++ src/bi_ort_ints/three_body_ints_bi_ort.irp.f | 1 + src/non_h_ints_mu/tc_integ.irp.f | 2 ++ src/non_h_ints_mu/total_tc_int.irp.f | 4 +--- 6 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/bi_ort_ints/three_body_ijm.irp.f b/src/bi_ort_ints/three_body_ijm.irp.f index 4d21cb93..b34638b8 100644 --- a/src/bi_ort_ints/three_body_ijm.irp.f +++ b/src/bi_ort_ints/three_body_ijm.irp.f @@ -49,6 +49,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_direct_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_3_idx_direct_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -102,6 +103,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_cycle_1_bi_ort, (mo_num, mo_num call wall_time(wall1) print *, ' wall time for three_e_3_idx_cycle_1_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -155,6 +157,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_cycle_2_bi_ort, (mo_num, mo_num call wall_time(wall1) print *, ' wall time for three_e_3_idx_cycle_2_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -208,6 +211,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_exch23_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_3_idx_exch23_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -261,6 +265,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_exch13_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_3_idx_exch13_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -306,6 +311,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_exch12_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_3_idx_exch12_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -359,6 +365,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_exch12_bi_ort_new, (mo_num, mo_ call wall_time(wall1) print *, ' wall time for three_e_3_idx_exch12_bi_ort_new', wall1 - wall0 + call print_memory_usage() END_PROVIDER diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f index 5afd49ab..95b57e37 100644 --- a/src/bi_ort_ints/three_body_ijmk.irp.f +++ b/src/bi_ort_ints/three_body_ijmk.irp.f @@ -43,6 +43,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_4_idx_direct_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -90,6 +91,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort, (mo_num, mo_num call wall_time(wall1) print *, ' wall time for three_e_4_idx_cycle_1_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -137,6 +139,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort, (mo_num, mo_num call wall_time(wall1) print *, ' wall time for three_e_4_idx_cycle_2_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -184,6 +187,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_4_idx_exch23_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -230,6 +234,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_4_idx_exch13_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -277,6 +282,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_4_idx_exch12_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index ae4c9bd5..507408e5 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -44,6 +44,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_5_idx_direct_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -93,6 +94,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num call wall_time(wall1) print *, ' wall time for three_e_5_idx_cycle_1_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -142,6 +144,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num call wall_time(wall1) print *, ' wall time for three_e_5_idx_cycle_2_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -191,6 +194,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_5_idx_exch23_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -240,6 +244,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_5_idx_exch13_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -289,6 +294,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_5_idx_exch12_bi_ort', wall1 - wall0 + call print_memory_usage() END_PROVIDER diff --git a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f index 5a3730b3..f82e8725 100644 --- a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f +++ b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f @@ -57,6 +57,7 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_n call wall_time(wall1) print *, ' wall time for three_body_ints_bi_ort', wall1 - wall0 + call print_memory_usage() ! if(write_three_body_ints_bi_ort)then ! print*,'Writing three_body_ints_bi_ort on disk ...' ! call write_array_6_index_tensor(mo_num,three_body_ints_bi_ort,name_file) diff --git a/src/non_h_ints_mu/tc_integ.irp.f b/src/non_h_ints_mu/tc_integ.irp.f index 8251fc71..b2c0df31 100644 --- a/src/non_h_ints_mu/tc_integ.irp.f +++ b/src/non_h_ints_mu/tc_integ.irp.f @@ -100,6 +100,8 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f !$OMP END DO !$OMP END PARALLEL + FREE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b x_v_ij_erf_rk_cst_mu_j1b + elseif(j1b_type .ge. 100) then PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f index 2034872a..afa10305 100644 --- a/src/non_h_ints_mu/total_tc_int.irp.f +++ b/src/non_h_ints_mu/total_tc_int.irp.f @@ -84,9 +84,7 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao enddo endif - FREE tc_grad_square_ao - FREE tc_grad_and_lapl_ao - FREE ao_two_e_coul + FREE tc_grad_square_ao tc_grad_and_lapl_ao ao_two_e_coul call wall_time(wall1) print *, ' wall time for ao_tc_int_chemist ', wall1 - wall0 From 107cc3f2fbfb13fdea4bad734c8bc5d11d9d8df8 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Fri, 2 Jun 2023 20:19:25 +0200 Subject: [PATCH 147/337] fixed bug in TC-VAR --- src/tc_bi_ortho/tc_utils.irp.f | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/tc_bi_ortho/tc_utils.irp.f b/src/tc_bi_ortho/tc_utils.irp.f index f8f648e8..737c393b 100644 --- a/src/tc_bi_ortho/tc_utils.irp.f +++ b/src/tc_bi_ortho/tc_utils.irp.f @@ -38,15 +38,16 @@ subroutine write_tc_var() implicit none integer :: i, j, k - double precision :: hmono, htwoe, hthree, htot + double precision :: hmono, htwoe, hthree, htot_1j, htot_j1 double precision :: SIGMA_TC do k = 1, n_states SIGMA_TC = 0.d0 do j = 2, N_det - call htilde_mu_mat_bi_ortho(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) - SIGMA_TC = SIGMA_TC + htot * htot + call htilde_mu_mat_bi_ortho(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot_1j) + call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot_j1) + SIGMA_TC = SIGMA_TC + htot_1j * htot_j1 enddo print *, " state : ", k From 4cc8dae42010e062f82ace4373e2d5927e9074b0 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 2 Jun 2023 20:32:31 +0200 Subject: [PATCH 148/337] Improve 5idx --- scripts/qp_import_trexio.py | 15 ++ scripts/utility/qp_bitmasks.py | 6 +- src/ao_basis/cosgtos.irp.f | 3 +- src/bi_ort_ints/bi_ort_ints.irp.f | 115 ++++++------- src/bi_ort_ints/three_body_ijmkl.irp.f | 10 +- src/tc_bi_ortho/31.tc_bi_ortho.bats | 34 ++-- src/tc_bi_ortho/slater_tc_3e_slow.irp.f | 89 +++++----- src/tc_bi_ortho/slater_tc_opt.irp.f | 3 +- src/tc_bi_ortho/slater_tc_opt_double.irp.f | 180 +++++++++++---------- src/tc_bi_ortho/symmetrized_3_e_int.irp.f | 3 +- 10 files changed, 247 insertions(+), 211 deletions(-) diff --git a/scripts/qp_import_trexio.py b/scripts/qp_import_trexio.py index 89096387..2c829f5c 100755 --- a/scripts/qp_import_trexio.py +++ b/scripts/qp_import_trexio.py @@ -17,6 +17,7 @@ import numpy as np from functools import reduce from ezfio import ezfio from docopt import docopt +import qp_bitmasks try: import trexio @@ -453,6 +454,20 @@ def write_ezfio(trexio_filename, filename): else: print("None") + print("Determinant\t\t...\t", end=' ') + alpha = [ i for i in range(num_alpha) ] + beta = [ i for i in range(num_beta) ] + if trexio.has_mo_spin(trexio_file): + spin = trexio.read_mo_spin(trexio_file) + beta = [ i for i in range(mo_num) if spin[i] == 1 ] + beta = [ beta[i] for i in range(num_beta) ] + + alpha = qp_bitmasks.BitMask(alpha) + beta = qp_bitmasks.BitMask(beta ) + print(alpha) + print(beta) + print("OK") + diff --git a/scripts/utility/qp_bitmasks.py b/scripts/utility/qp_bitmasks.py index 38aa48d7..11965b72 100644 --- a/scripts/utility/qp_bitmasks.py +++ b/scripts/utility/qp_bitmasks.py @@ -22,7 +22,7 @@ def int_to_string(s): assert s>=0 AssertionError """ - assert type(s) in (int, long) + assert type(s) == int assert s>=0 return '{s:0b}'.format(s=s) @@ -62,7 +62,7 @@ def int_to_bitmask(s,bit_kind_size=BIT_KIND_SIZE): ['1111111111111111111111111111111111111111111111111111111111110110'] >>> """ - assert type(s) in (int, long) + assert type(s) == int if s < 0: s = s + (1 << bit_kind_size) return ['{s:0{width}b}'.format(s=s,width=bit_kind_size)] @@ -104,7 +104,7 @@ class BitMask(object): return self._data_int[i] def __setitem__(self,i,value): - if type(value) in (int,long): + if type(value) == int : self._data_int[i] = value elif type(value) == str: s = string_to_bitmask(value,bit_kind_size=self.bit_kind_size)[0] diff --git a/src/ao_basis/cosgtos.irp.f b/src/ao_basis/cosgtos.irp.f index 721a3e57..dfa7d6b9 100644 --- a/src/ao_basis/cosgtos.irp.f +++ b/src/ao_basis/cosgtos.irp.f @@ -6,13 +6,14 @@ BEGIN_PROVIDER [ logical, use_cosgtos ] logical :: has PROVIDE ezfio_filename + use_cosgtos = .False. if (mpi_master) then call ezfio_has_ao_basis_use_cosgtos(has) if (has) then ! write(6,'(A)') '.. >>>>> [ IO READ: use_cosgtos ] <<<<< ..' call ezfio_get_ao_basis_use_cosgtos(use_cosgtos) else - use_cosgtos = .False. + call ezfio_set_ao_basis_use_cosgtos(use_cosgtos) endif endif IRP_IF MPI_DEBUG diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index f7a42f37..42bbe315 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -55,6 +55,7 @@ subroutine test_5idx implicit none integer :: i,k,j,l,m,n,ipoint double precision :: accu, contrib,new,ref + double precision, external :: three_e_5_idx_exch12_bi_ort i = 1 k = 1 n = 0 @@ -64,18 +65,21 @@ subroutine test_5idx do j = 1, mo_num do l = 1, mo_num do m = 1, mo_num +! if (dabs(three_e_5_idx_direct_bi_ort(m,l,j,k,i) - three_e_5_idx_exch12_bi_ort(m,l,i,k,j)) > 1.d-10) then +! stop +! endif - new = three_e_5_idx_direct_bi_ort(m,l,j,k,i) - ref = three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) - contrib = dabs(new - ref) - accu += contrib - if(contrib .gt. 1.d-10)then - print*,'direct' - print*,i,k,j,l,m - print*,ref,new,contrib - stop - endif - +! new = three_e_5_idx_direct_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'direct' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif +! new = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) ref = three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) contrib = dabs(new - ref) @@ -86,51 +90,52 @@ subroutine test_5idx print*,ref,new,contrib stop endif + +! +! new = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_cycle_1_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'cycle1' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif +! +! new = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_cycle_2_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'cycle2' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif +! +! new = three_e_5_idx_exch23_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_exch23_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'exch23' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif +! +! new = three_e_5_idx_exch13_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_exch13_bi_ort_old(m,l,j,k,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. 1.d-10)then +! print*,'exch13' +! print*,i,k,j,l,m +! print*,ref,new,contrib +! stop +! endif ! - new = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - ref = three_e_5_idx_cycle_1_bi_ort_old(m,l,j,k,i) - contrib = dabs(new - ref) - accu += contrib - if(contrib .gt. 1.d-10)then - print*,'cycle1' - print*,i,k,j,l,m - print*,ref,new,contrib - stop - endif - - new = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) - ref = three_e_5_idx_cycle_2_bi_ort_old(m,l,j,k,i) - contrib = dabs(new - ref) - accu += contrib - if(contrib .gt. 1.d-10)then - print*,'cycle2' - print*,i,k,j,l,m - print*,ref,new,contrib - stop - endif - - new = three_e_5_idx_exch23_bi_ort(m,l,j,k,i) - ref = three_e_5_idx_exch23_bi_ort_old(m,l,j,k,i) - contrib = dabs(new - ref) - accu += contrib - if(contrib .gt. 1.d-10)then - print*,'exch23' - print*,i,k,j,l,m - print*,ref,new,contrib - stop - endif - - new = three_e_5_idx_exch13_bi_ort(m,l,j,k,i) - ref = three_e_5_idx_exch13_bi_ort_old(m,l,j,k,i) - contrib = dabs(new - ref) - accu += contrib - if(contrib .gt. 1.d-10)then - print*,'exch13' - print*,i,k,j,l,m - print*,ref,new,contrib - stop - endif - enddo enddo enddo diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index bd669163..7b39235b 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -1,7 +1,11 @@ ! --- +double precision function three_e_5_idx_exch12_bi_ort(m,l,i,k,j) result(integral) + implicit none + integer, intent(in) :: m,l,j,k,i + integral = three_e_5_idx_direct_bi_ort(m,l,j,k,i) +end BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] -&BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] &BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] &BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort , (mo_num, mo_num, mo_num, mo_num, mo_num)] &BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)] @@ -14,6 +18,8 @@ ! three_e_5_idx_direct_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO ! ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC implicit none @@ -72,7 +78,6 @@ do j = 1, mo_num do l = 1, mo_num three_e_5_idx_direct_bi_ort(m,l,j,k,i) = - tmp_mat(l,j,k,i) - tmp_mat(k,i,l,j) - three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = - tmp_mat(l,i,k,j) - tmp_mat(k,j,l,i) enddo enddo enddo @@ -125,7 +130,6 @@ do j = 1, mo_num do l = 1, mo_num three_e_5_idx_direct_bi_ort(m,l,j,k,i) = three_e_5_idx_direct_bi_ort(m,l,j,k,i) - tmp_mat(l,j,k,i) - three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) - tmp_mat(l,i,k,j) enddo enddo enddo diff --git a/src/tc_bi_ortho/31.tc_bi_ortho.bats b/src/tc_bi_ortho/31.tc_bi_ortho.bats index f5b9d8c0..93bed2ab 100644 --- a/src/tc_bi_ortho/31.tc_bi_ortho.bats +++ b/src/tc_bi_ortho/31.tc_bi_ortho.bats @@ -4,46 +4,50 @@ source $QP_ROOT/tests/bats/common.bats.sh source $QP_ROOT/quantum_package.rc +function get_e() { + grep "eigval_right_tc_bi_orth" $1 | cut -d '=' -f 2 | xargs +} + function run_Ne() { - qp set_file Ne_tc_scf - qp run cisd - qp run tc_bi_ortho | tee Ne_tc_scf.cisd_tc_bi_ortho.out + qp set_file Ne_tc_scf + qp run cisd + qp run tc_bi_ortho | tee Ne_tc_scf.cisd_tc_bi_ortho.out eref=-128.77020441279302 - energy="$(grep "eigval_right_tc_bi_orth =" Ne_tc_scf.cisd_tc_bi_ortho.out)" + energy=$(get_e Ne_tc_scf.cisd_tc_bi_ortho.out) eq $energy $eref 1e-6 } @test "Ne" { - run_Ne + run_Ne } function run_C() { - qp set_file C_tc_scf - qp run cisd - qp run tc_bi_ortho | tee C_tc_scf.cisd_tc_bi_ortho.out + qp set_file C_tc_scf + qp run cisd + qp run tc_bi_ortho | tee C_tc_scf.cisd_tc_bi_ortho.out eref=-37.757536149952514 - energy="$(grep "eigval_right_tc_bi_orth =" C_tc_scf.cisd_tc_bi_ortho.out)" + energy=$(get_e C_tc_scf.cisd_tc_bi_ortho.out) eq $energy $eref 1e-6 } @test "C" { - run_C + run_C } function run_O() { - qp set_file C_tc_scf - qp run cisd - qp run tc_bi_ortho | tee O_tc_scf.cisd_tc_bi_ortho.out + qp set_file C_tc_scf + qp run cisd + qp run tc_bi_ortho | tee O_tc_scf.cisd_tc_bi_ortho.out eref=-74.908518517716161 - energy="$(grep "eigval_right_tc_bi_orth =" O_tc_scf.cisd_tc_bi_ortho.out)" + energy=$(get_e O_tc_scf.cisd_tc_bi_ortho.out) eq $energy $eref 1e-6 } @test "O" { - run_O + run_O } diff --git a/src/tc_bi_ortho/slater_tc_3e_slow.irp.f b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f index 6abb6b78..49977f37 100644 --- a/src/tc_bi_ortho/slater_tc_3e_slow.irp.f +++ b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f @@ -32,28 +32,28 @@ subroutine diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree) if(Ne(1)+Ne(2).ge.3)then !! ! alpha/alpha/beta three-body do i = 1, Ne(1) - ii = occ(i,1) + ii = occ(i,1) do j = i+1, Ne(1) - jj = occ(j,1) + jj = occ(j,1) do m = 1, Ne(2) - mm = occ(m,2) -! direct_int = three_body_ints_bi_ort(mm,jj,ii,mm,jj,ii) USES THE 6-IDX TENSOR -! exchange_int = three_body_ints_bi_ort(mm,jj,ii,mm,ii,jj) USES THE 6-IDX TENSOR - direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) ! USES 3-IDX TENSOR - exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii) ! USES 3-IDX TENSOR + mm = occ(m,2) +! direct_int = three_body_ints_bi_ort(mm,jj,ii,mm,jj,ii) USES THE 6-IDX TENSOR +! exchange_int = three_body_ints_bi_ort(mm,jj,ii,mm,ii,jj) USES THE 6-IDX TENSOR + direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) ! USES 3-IDX TENSOR + exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii) ! USES 3-IDX TENSOR hthree += direct_int - exchange_int enddo enddo enddo - + ! beta/beta/alpha three-body do i = 1, Ne(2) - ii = occ(i,2) + ii = occ(i,2) do j = i+1, Ne(2) - jj = occ(j,2) + jj = occ(j,2) do m = 1, Ne(1) - mm = occ(m,1) - direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) + mm = occ(m,1) + direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii) hthree += direct_int - exchange_int enddo @@ -64,10 +64,10 @@ subroutine diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree) do i = 1, Ne(1) ii = occ(i,1) ! 1 do j = i+1, Ne(1) - jj = occ(j,1) ! 2 + jj = occ(j,1) ! 2 do m = j+1, Ne(1) - mm = occ(m,1) ! 3 -! ref = sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) USES THE 6 IDX TENSOR + mm = occ(m,1) ! 3 +! ref = sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) USES THE 6 IDX TENSOR hthree += three_e_diag_parrallel_spin(mm,jj,ii) ! USES ONLY 3-IDX TENSORS enddo enddo @@ -80,7 +80,7 @@ subroutine diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree) jj = occ(j,2) ! 2 do m = j+1, Ne(2) mm = occ(m,2) ! 3 -! ref = sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) USES THE 6 IDX TENSOR +! ref = sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) USES THE 6 IDX TENSOR hthree += three_e_diag_parrallel_spin(mm,jj,ii) ! USES ONLY 3-IDX TENSORS enddo enddo @@ -96,7 +96,7 @@ subroutine single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) ! for single excitation ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS !! !! WARNING !! - ! + ! ! Non hermitian !! END_DOC @@ -110,7 +110,7 @@ subroutine single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) integer :: Ne(2),i,j,ii,jj,ispin,jspin,k,kk integer :: degree,exc(0:2,2,2) integer :: h1, p1, h2, p2, s1, s2 - double precision :: direct_int,phase,exchange_int,three_e_single_parrallel_spin + double precision :: direct_int,phase,exchange_int,three_e_single_parrallel_spin double precision :: sym_3_e_int_from_6_idx_tensor integer :: other_spin(2) integer(bit_kind) :: key_j_core(Nint,2),key_i_core(Nint,2) @@ -142,26 +142,26 @@ subroutine single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) ! alpha/alpha/beta three-body ! print*,'IN SLAT RULES' if(Ne(1)+Ne(2).ge.3)then - ! hole of spin s1 :: contribution from purely other spin + ! hole of spin s1 :: contribution from purely other spin ispin = other_spin(s1) ! ispin is the other spin than s1 - do i = 1, Ne(ispin) ! i is the orbitals of the other spin than s1 - ii = occ(i,ispin) - do j = i+1, Ne(ispin) ! j has the same spin than s1 - jj = occ(j,ispin) + do i = 1, Ne(ispin) ! i is the orbitals of the other spin than s1 + ii = occ(i,ispin) + do j = i+1, Ne(ispin) ! j has the same spin than s1 + jj = occ(j,ispin) ! is == ispin in ::: s1 is is s1 is is s1 is is s1 is is ! < h1 j i | p1 j i > - < h1 j i | p1 i j > - ! - direct_int = three_e_4_idx_direct_bi_ort(jj,ii,p1,h1) - exchange_int = three_e_4_idx_exch23_bi_ort(jj,ii,p1,h1) + ! + direct_int = three_e_4_idx_direct_bi_ort(jj,ii,p1,h1) + exchange_int = three_e_4_idx_exch23_bi_ort(jj,ii,p1,h1) hthree += direct_int - exchange_int enddo enddo - + ! hole of spin s1 :: contribution from mixed other spin / same spin - do i = 1, Ne(ispin) ! other spin - ii = occ(i,ispin) ! other spin - do j = 1, Ne(s1) ! same spin - jj = occ(j,s1) ! same spin + do i = 1, Ne(ispin) ! other spin + ii = occ(i,ispin) ! other spin + do j = 1, Ne(s1) ! same spin + jj = occ(j,s1) ! same spin direct_int = three_e_4_idx_direct_bi_ort(jj,ii,p1,h1) exchange_int = three_e_4_idx_exch13_bi_ort(jj,ii,p1,h1) ! < h1 j i | p1 j i > - < h1 j i | j p1 i > @@ -174,8 +174,8 @@ subroutine single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) ii = occ(i,s1) do j = i+1, Ne(s1) jj = occ(j,s1) -! ref = sym_3_e_int_from_6_idx_tensor(jj,ii,p1,jj,ii,h1) - hthree += three_e_single_parrallel_spin(jj,ii,p1,h1) ! USES THE 4-IDX TENSOR +! ref = sym_3_e_int_from_6_idx_tensor(jj,ii,p1,jj,ii,h1) + hthree += three_e_single_parrallel_spin(jj,ii,p1,h1) ! USES THE 4-IDX TENSOR enddo enddo endif @@ -191,7 +191,7 @@ subroutine double_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) ! for double excitation ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS !! !! WARNING !! - ! + ! ! Non hermitian !! END_DOC @@ -235,29 +235,30 @@ subroutine double_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) call get_double_excitation(key_i, key_j, exc, phase, Nint) call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2) - + if(Ne(1)+Ne(2).ge.3)then - if(s1==s2)then ! same spin excitation + if(s1==s2)then ! same spin excitation ispin = other_spin(s1) do m = 1, Ne(ispin) ! direct(other_spin) - exchange(s1) mm = occ(m,ispin) - direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) - exchange_int = three_e_5_idx_exch12_bi_ort(mm,p2,h2,p1,h1) + direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) +! exchange_int = three_e_5_idx_exch12_bi_ort(mm,p2,h2,p1,h1) + exchange_int = three_e_5_idx_direct_bi_ort(mm,p2,h1,p1,h2) hthree += direct_int - exchange_int enddo - do m = 1, Ne(s1) ! pure contribution from s1 + do m = 1, Ne(s1) ! pure contribution from s1 mm = occ(m,s1) hthree += three_e_double_parrallel_spin(mm,p2,h2,p1,h1) - enddo - else ! different spin excitation + enddo + else ! different spin excitation do m = 1, Ne(s1) - mm = occ(m,s1) ! - direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) + mm = occ(m,s1) ! + direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) exchange_int = three_e_5_idx_exch13_bi_ort(mm,p2,h2,p1,h1) hthree += direct_int - exchange_int enddo do m = 1, Ne(s2) - mm = occ(m,s2) ! + mm = occ(m,s2) ! direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) exchange_int = three_e_5_idx_exch23_bi_ort(mm,p2,h2,p1,h1) hthree += direct_int - exchange_int diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index 3fd2576a..882470ed 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -13,8 +13,7 @@ subroutine provide_all_three_ints_bi_ortho PROVIDE three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort three_e_4_idx_exch12_bi_ort endif if(.not.double_normal_ord.and.three_e_5_idx_term)then - PROVIDE three_e_5_idx_direct_bi_ort three_e_5_idx_cycle_1_bi_ort three_e_5_idx_cycle_2_bi_ort - PROVIDE three_e_5_idx_exch23_bi_ort three_e_5_idx_exch13_bi_ort three_e_5_idx_exch12_bi_ort + PROVIDE three_e_5_idx_direct_bi_ort elseif (double_normal_ord .and. (.not. three_e_5_idx_term))then PROVIDE normal_two_body_bi_orth endif diff --git a/src/tc_bi_ortho/slater_tc_opt_double.irp.f b/src/tc_bi_ortho/slater_tc_opt_double.irp.f index 2d6bfb27..12bbbec0 100644 --- a/src/tc_bi_ortho/slater_tc_opt_double.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_double.irp.f @@ -2,17 +2,17 @@ subroutine double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) BEGIN_DOC - ! for double excitation ONLY FOR ONE- AND TWO-BODY TERMS + ! for double excitation ONLY FOR ONE- AND TWO-BODY TERMS !! !! WARNING !! - ! + ! ! Non hermitian !! END_DOC use bitmasks implicit none - integer, intent(in) :: Nint + integer, intent(in) :: Nint integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2) double precision, intent(out) :: hmono, htwoe, hthree, htot integer :: occ(Nint*bit_kind_size,2) @@ -39,8 +39,8 @@ subroutine double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2) if(s1.ne.s2)then - ! opposite spin two-body - htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) + ! opposite spin two-body + htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) if(three_body_h_tc.and.elec_num.gt.2)then if(.not.double_normal_ord.and.three_e_5_idx_term)then if(degree_i>degree_j)then @@ -53,11 +53,11 @@ subroutine double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, endif endif else - ! same spin two-body - ! direct terms - htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) - ! exchange terms - htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) + ! same spin two-body + ! direct terms + htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) + ! exchange terms + htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) if(three_body_h_tc.and.elec_num.gt.2)then if(.not.double_normal_ord.and.three_e_5_idx_term)then if(degree_i>degree_j)then @@ -112,72 +112,76 @@ subroutine three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree) !DIR$ FORCEINLINE call bitstring_to_list_ab(particle, occ_particle, tmp, N_int) ASSERT (tmp(1) == nexc(1)) ! Number of particles alpha - ASSERT (tmp(2) == nexc(2)) ! Number of particle beta + ASSERT (tmp(2) == nexc(2)) ! Number of particle beta !DIR$ FORCEINLINE call bitstring_to_list_ab(hole, occ_hole, tmp, N_int) ASSERT (tmp(1) == nexc(1)) ! Number of holes alpha - ASSERT (tmp(2) == nexc(2)) ! Number of holes beta + ASSERT (tmp(2) == nexc(2)) ! Number of holes beta if(s1==s2.and.s1==1)then !!!!!!!!!!!!!!!!!!!!!!!!!! alpha/alpha double exc - hthree = eff_2_e_from_3_e_aa(p2,p1,h2,h1) - if(nexc(1)+nexc(2) ==0)return !! if you're on the reference determinant - !!!!!!!! the matrix element is already exact - !!!!!!!! else you need to take care of holes and particles + hthree = eff_2_e_from_3_e_aa(p2,p1,h2,h1) + if(nexc(1)+nexc(2) ==0)return !! if you're on the reference determinant + !!!!!!!! the matrix element is already exact + !!!!!!!! else you need to take care of holes and particles !!!!!!!!!!!!! Holes and particles !!!!!!!!!!!!!!!!!!!!!!! ispin = 1 ! i==alpha ==> pure same spin terms - do i = 1, nexc(ispin) ! number of couple of holes/particles + do i = 1, nexc(ispin) ! number of couple of holes/particles ipart=occ_particle(i,ispin) hthree += three_e_double_parrallel_spin_prov(ipart,p2,h2,p1,h1) ihole=occ_hole(i,ispin) hthree -= three_e_double_parrallel_spin_prov(ihole,p2,h2,p1,h1) enddo ispin = 2 ! i==beta ==> alpha/alpha/beta terms - do i = 1, nexc(ispin) ! number of couple of holes/particles + do i = 1, nexc(ispin) ! number of couple of holes/particles ! exchange between (h1,p1) and (h2,p2) ipart=occ_particle(i,ispin) direct_int = three_e_5_idx_direct_bi_ort(ipart,p2,h2,p1,h1) - exchange_int = three_e_5_idx_exch12_bi_ort(ipart,p2,h2,p1,h1) +! exchange_int = three_e_5_idx_exch12_bi_ort(ipart,p2,h2,p1,h1) + exchange_int = three_e_5_idx_direct_bi_ort(ipart,p2,h1,p1,h2) hthree += direct_int - exchange_int ihole=occ_hole(i,ispin) direct_int = three_e_5_idx_direct_bi_ort(ihole,p2,h2,p1,h1) - exchange_int = three_e_5_idx_exch12_bi_ort(ihole,p2,h2,p1,h1) +! exchange_int = three_e_5_idx_exch12_bi_ort(ihole,p2,h2,p1,h1) + exchange_int = three_e_5_idx_direct_bi_ort(ihole,p2,h1,p1,h2) hthree -= direct_int - exchange_int enddo !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - elseif(s1==s2.and.s1==2)then + elseif(s1==s2.and.s1==2)then !!!!!!!!!!!!!!!!!!!!!!!!!! beta/beta double exc hthree = eff_2_e_from_3_e_bb(p2,p1,h2,h1) - if(nexc(1)+nexc(2) ==0)return !! if you're on the reference determinant - !!!!!!!! the matrix element is already exact - !!!!!!!! else you need to take care of holes and particles + if(nexc(1)+nexc(2) ==0)return !! if you're on the reference determinant + !!!!!!!! the matrix element is already exact + !!!!!!!! else you need to take care of holes and particles !!!!!!!!!!!!! Holes and particles !!!!!!!!!!!!!!!!!!!!!!! ispin = 2 ! i==beta ==> pure same spin terms - do i = 1, nexc(ispin) ! number of couple of holes/particles + do i = 1, nexc(ispin) ! number of couple of holes/particles ipart=occ_particle(i,ispin) hthree += three_e_double_parrallel_spin_prov(ipart,p2,h2,p1,h1) ihole=occ_hole(i,ispin) hthree -= three_e_double_parrallel_spin_prov(ihole,p2,h2,p1,h1) enddo ispin = 1 ! i==alpha==> beta/beta/alpha terms - do i = 1, nexc(ispin) ! number of couple of holes/particles + do i = 1, nexc(ispin) ! number of couple of holes/particles ! exchange between (h1,p1) and (h2,p2) ipart=occ_particle(i,ispin) direct_int = three_e_5_idx_direct_bi_ort(ipart,p2,h2,p1,h1) - exchange_int = three_e_5_idx_exch12_bi_ort(ipart,p2,h2,p1,h1) +! exchange_int = three_e_5_idx_exch12_bi_ort(ipart,p2,h2,p1,h1) + exchange_int = three_e_5_idx_direct_bi_ort(ipart,p2,h1,p1,h2) hthree += direct_int - exchange_int ihole=occ_hole(i,ispin) direct_int = three_e_5_idx_direct_bi_ort(ihole,p2,h2,p1,h1) - exchange_int = three_e_5_idx_exch12_bi_ort(ihole,p2,h2,p1,h1) +! exchange_int = three_e_5_idx_exch12_bi_ort(ihole,p2,h2,p1,h1) + exchange_int = three_e_5_idx_direct_bi_ort(ihole,p2,h1,p1,h2) hthree -= direct_int - exchange_int enddo - else ! (h1,p1) == alpha/(h2,p2) == beta + else ! (h1,p1) == alpha/(h2,p2) == beta hthree = eff_2_e_from_3_e_ab(p2,p1,h2,h1) - if(nexc(1)+nexc(2) ==0)return !! if you're on the reference determinant - !!!!!!!! the matrix element is already exact - !!!!!!!! else you need to take care of holes and particles + if(nexc(1)+nexc(2) ==0)return !! if you're on the reference determinant + !!!!!!!! the matrix element is already exact + !!!!!!!! else you need to take care of holes and particles !!!!!!!!!!!!! Holes and particles !!!!!!!!!!!!!!!!!!!!!!! - ispin = 1 ! i==alpha ==> alpha/beta/alpha terms - do i = 1, nexc(ispin) ! number of couple of holes/particles + ispin = 1 ! i==alpha ==> alpha/beta/alpha terms + do i = 1, nexc(ispin) ! number of couple of holes/particles ! exchange between (h1,p1) and i ipart=occ_particle(i,ispin) direct_int = three_e_5_idx_direct_bi_ort(ipart,p2,h2,p1,h1) @@ -188,8 +192,8 @@ subroutine three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree) exchange_int = three_e_5_idx_exch13_bi_ort(ihole,p2,h2,p1,h1) hthree -= direct_int - exchange_int enddo - ispin = 2 ! i==beta ==> alpha/beta/beta terms - do i = 1, nexc(ispin) ! number of couple of holes/particles + ispin = 2 ! i==beta ==> alpha/beta/beta terms + do i = 1, nexc(ispin) ! number of couple of holes/particles ! exchange between (h2,p2) and i ipart=occ_particle(i,ispin) direct_int = three_e_5_idx_direct_bi_ort(ipart,p2,h2,p1,h1) @@ -207,7 +211,7 @@ end BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_ab, (mo_num, mo_num, mo_num, mo_num)] implicit none BEGIN_DOC -! eff_2_e_from_3_e_ab(p2,p1,h2,h1) = Effective Two-electron operator for alpha/beta double excitations +! eff_2_e_from_3_e_ab(p2,p1,h2,h1) = Effective Two-electron operator for alpha/beta double excitations ! ! from contraction with HF density = a^{dagger}_p1_alpha a^{dagger}_p2_beta a_h2_beta a_h1_alpha END_DOC @@ -222,16 +226,16 @@ BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_ab, (mo_num, mo_num, mo_num, eff_2_e_from_3_e_ab = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, contrib) & + !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, contrib) & !$OMP SHARED (n_act_orb, list_act, Ne,occ, eff_2_e_from_3_e_ab) - !$OMP DO SCHEDULE (static) - do hh1 = 1, n_act_orb !! alpha - h1 = list_act(hh1) - do hh2 = 1, n_act_orb !! beta - h2 = list_act(hh2) + !$OMP DO SCHEDULE (static) + do hh1 = 1, n_act_orb !! alpha + h1 = list_act(hh1) + do hh2 = 1, n_act_orb !! beta + h2 = list_act(hh2) do pp1 = 1, n_act_orb !! alpha p1 = list_act(pp1) - do pp2 = 1, n_act_orb !! beta + do pp2 = 1, n_act_orb !! beta p2 = list_act(pp2) call give_contrib_for_abab(h1,h2,p1,p2,occ,Ne,contrib) eff_2_e_from_3_e_ab(p2,p1,h2,h1) = contrib @@ -242,25 +246,25 @@ BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_ab, (mo_num, mo_num, mo_num, !$OMP END DO !$OMP END PARALLEL -END_PROVIDER +END_PROVIDER subroutine give_contrib_for_abab(h1,h2,p1,p2,occ,Ne,contrib) implicit none - BEGIN_DOC + BEGIN_DOC ! gives the contribution for a double excitation (h1,p1)_alpha (h2,p2)_beta ! ! on top of a determinant whose occupied orbitals is in (occ, Ne) END_DOC integer, intent(in) :: h1,h2,p1,p2,occ(N_int*bit_kind_size,2),Ne(2) double precision, intent(out) :: contrib - integer :: mm,m + integer :: mm,m double precision :: direct_int, exchange_int - !! h1,p1 == alpha + !! h1,p1 == alpha !! h2,p2 == beta contrib = 0.d0 - do mm = 1, Ne(1) !! alpha + do mm = 1, Ne(1) !! alpha m = occ(mm,1) - direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) + direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) ! exchange between (h1,p1) and m exchange_int = three_e_5_idx_exch13_bi_ort(mm,p2,h2,p1,h1) contrib += direct_int - exchange_int @@ -268,7 +272,7 @@ subroutine give_contrib_for_abab(h1,h2,p1,p2,occ,Ne,contrib) do mm = 1, Ne(2) !! beta m = occ(mm,2) - direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) + direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) ! exchange between (h2,p2) and m exchange_int = three_e_5_idx_exch23_bi_ort(mm,p2,h2,p1,h1) contrib += direct_int - exchange_int @@ -278,11 +282,11 @@ end BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_aa, (mo_num, mo_num, mo_num, mo_num)] implicit none BEGIN_DOC -! eff_2_e_from_3_e_ab(p2,p1,h2,h1) = Effective Two-electron operator for alpha/alpha double excitations +! eff_2_e_from_3_e_ab(p2,p1,h2,h1) = Effective Two-electron operator for alpha/alpha double excitations ! ! from contractionelec_alpha_num with HF density = a^{dagger}_p1_alpha a^{dagger}_p2_alpha a_h2_alpha a_h1_alpha ! -! WARNING :: to be coherent with the phase convention used in the Hamiltonian matrix elements, you must fulfill +! WARNING :: to be coherent with the phase convention used in the Hamiltonian matrix elements, you must fulfill ! ! |||| h2>h1, p2>p1 |||| END_DOC @@ -297,13 +301,13 @@ BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_aa, (mo_num, mo_num, mo_num, eff_2_e_from_3_e_aa = 100000000.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, contrib) & + !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, contrib) & !$OMP SHARED (n_act_orb, list_act, Ne,occ, eff_2_e_from_3_e_aa) - !$OMP DO SCHEDULE (static) - do hh1 = 1, n_act_orb !! alpha - h1 = list_act(hh1) + !$OMP DO SCHEDULE (static) + do hh1 = 1, n_act_orb !! alpha + h1 = list_act(hh1) do hh2 = hh1+1, n_act_orb !! alpha - h2 = list_act(hh2) + h2 = list_act(hh2) do pp1 = 1, n_act_orb !! alpha p1 = list_act(pp1) do pp2 = pp1+1, n_act_orb !! alpha @@ -317,20 +321,20 @@ BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_aa, (mo_num, mo_num, mo_num, !$OMP END DO !$OMP END PARALLEL -END_PROVIDER +END_PROVIDER subroutine give_contrib_for_aaaa(h1,h2,p1,p2,occ,Ne,contrib) implicit none - BEGIN_DOC + BEGIN_DOC ! gives the contribution for a double excitation (h1,p1)_alpha (h2,p2)_alpha ! ! on top of a determinant whose occupied orbitals is in (occ, Ne) END_DOC integer, intent(in) :: h1,h2,p1,p2,occ(N_int*bit_kind_size,2),Ne(2) double precision, intent(out) :: contrib - integer :: mm,m + integer :: mm,m double precision :: direct_int, exchange_int - !! h1,p1 == alpha + !! h1,p1 == alpha !! h2,p2 == alpha contrib = 0.d0 do mm = 1, Ne(1) !! alpha ==> pure parallele spin contribution @@ -340,9 +344,10 @@ subroutine give_contrib_for_aaaa(h1,h2,p1,p2,occ,Ne,contrib) do mm = 1, Ne(2) !! beta m = occ(mm,2) - direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) + direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) ! exchange between (h1,p1) and (h2,p2) - exchange_int = three_e_5_idx_exch12_bi_ort(mm,p2,h2,p1,h1) +! exchange_int = three_e_5_idx_exch12_bi_ort(mm,p2,h2,p1,h1) + exchange_int = three_e_5_idx_direct_bi_ort(mm,p2,h1,p1,h2) contrib += direct_int - exchange_int enddo end @@ -351,11 +356,11 @@ end BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_bb, (mo_num, mo_num, mo_num, mo_num)] implicit none BEGIN_DOC -! eff_2_e_from_3_e_ab(p2,p1,h2,h1) = Effective Two-electron operator for beta/beta double excitations +! eff_2_e_from_3_e_ab(p2,p1,h2,h1) = Effective Two-electron operator for beta/beta double excitations ! ! from contractionelec_beta_num with HF density = a^{dagger}_p1_beta a^{dagger}_p2_beta a_h2_beta a_h1_beta ! -! WARNING :: to be coherent with the phase convention used in the Hamiltonian matrix elements, you must fulfill +! WARNING :: to be coherent with the phase convention used in the Hamiltonian matrix elements, you must fulfill ! ! |||| h2>h1, p2>p1 |||| END_DOC @@ -370,13 +375,13 @@ BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_bb, (mo_num, mo_num, mo_num, eff_2_e_from_3_e_bb = 100000000.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, contrib) & + !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, contrib) & !$OMP SHARED (n_act_orb, list_act, Ne,occ, eff_2_e_from_3_e_bb) - !$OMP DO SCHEDULE (static) - do hh1 = 1, n_act_orb !! beta - h1 = list_act(hh1) + !$OMP DO SCHEDULE (static) + do hh1 = 1, n_act_orb !! beta + h1 = list_act(hh1) do hh2 = hh1+1, n_act_orb !! beta - h2 = list_act(hh2) + h2 = list_act(hh2) do pp1 = 1, n_act_orb !! beta p1 = list_act(pp1) do pp2 = pp1+1, n_act_orb !! beta @@ -390,18 +395,18 @@ BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_bb, (mo_num, mo_num, mo_num, !$OMP END DO !$OMP END PARALLEL -END_PROVIDER +END_PROVIDER subroutine give_contrib_for_bbbb(h1,h2,p1,p2,occ,Ne,contrib) implicit none - BEGIN_DOC + BEGIN_DOC ! gives the contribution for a double excitation (h1,p1)_beta (h2,p2)_beta ! ! on top of a determinant whose occupied orbitals is in (occ, Ne) END_DOC integer, intent(in) :: h1,h2,p1,p2,occ(N_int*bit_kind_size,2),Ne(2) double precision, intent(out) :: contrib - integer :: mm,m + integer :: mm,m double precision :: direct_int, exchange_int !! h1,p1 == beta !! h2,p2 == beta @@ -413,9 +418,10 @@ subroutine give_contrib_for_bbbb(h1,h2,p1,p2,occ,Ne,contrib) do mm = 1, Ne(1) !! alpha m = occ(mm,1) - direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) + direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) ! exchange between (h1,p1) and (h2,p2) - exchange_int = three_e_5_idx_exch12_bi_ort(mm,p2,h2,p1,h1) +! exchange_int = three_e_5_idx_exch12_bi_ort(mm,p2,h2,p1,h1) + exchange_int = three_e_5_idx_direct_bi_ort(mm,p2,h1,p1,h2) contrib += direct_int - exchange_int enddo end @@ -424,17 +430,17 @@ end subroutine double_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_j, key_i, htot) BEGIN_DOC - ! for double excitation ONLY FOR ONE- AND TWO-BODY TERMS + ! for double excitation ONLY FOR ONE- AND TWO-BODY TERMS !! !! WARNING !! - ! + ! ! Non hermitian !! END_DOC use bitmasks implicit none - integer, intent(in) :: Nint + integer, intent(in) :: Nint integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2) double precision, intent(out) :: htot double precision :: hmono, htwoe @@ -461,17 +467,17 @@ subroutine double_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_j, key_i, htot) call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2) if(s1.ne.s2)then - ! opposite spin two-body - htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) + ! opposite spin two-body + htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) else - ! same spin two-body - ! direct terms - htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) - ! exchange terms - htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) + ! same spin two-body + ! direct terms + htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) + ! exchange terms + htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) endif htwoe *= phase - htot = htwoe + htot = htwoe end diff --git a/src/tc_bi_ortho/symmetrized_3_e_int.irp.f b/src/tc_bi_ortho/symmetrized_3_e_int.irp.f index e4f7ca93..e725d8e5 100644 --- a/src/tc_bi_ortho/symmetrized_3_e_int.irp.f +++ b/src/tc_bi_ortho/symmetrized_3_e_int.irp.f @@ -107,5 +107,6 @@ double precision function three_e_double_parrallel_spin(m,l,j,k,i) three_e_double_parrallel_spin = three_e_5_idx_direct_bi_ort(m,l,j,k,i) ! direct three_e_double_parrallel_spin += three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) + three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) & ! two cyclic permutations - three_e_5_idx_exch23_bi_ort(m,l,j,k,i) - three_e_5_idx_exch13_bi_ort(m,l,j,k,i) & ! two first exchange - - three_e_5_idx_exch12_bi_ort(m,l,j,k,i) ! last exchange +! - three_e_5_idx_exch12_bi_ort(m,l,j,k,i) ! last exchange + - three_e_5_idx_direct_bi_ort(m,l,i,k,j) ! last exchange end From 82b2d8bd98e9f3d543b74f766553d28166486094 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Fri, 2 Jun 2023 20:48:23 +0200 Subject: [PATCH 149/337] avoid long name in cosgtos --- .../two_e_Coul_integrals_cosgtos.irp.f | 30 +++++++++---------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/src/ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f b/src/ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f index 527a98d5..ea9ff009 100644 --- a/src/ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f +++ b/src/ao_two_e_ints/two_e_Coul_integrals_cosgtos.irp.f @@ -29,14 +29,14 @@ double precision function ao_two_e_integral_cosgtos(i, j, k, l) complex*16 :: integral5, integral6, integral7, integral8 complex*16 :: integral_tot - double precision :: ao_two_e_integral_cosgtos_schwartz_accel + double precision :: ao_2e_cosgtos_schwartz_accel complex*16 :: ERI_cosgtos complex*16 :: general_primitive_integral_cosgtos if(ao_prim_num(i) * ao_prim_num(j) * ao_prim_num(k) * ao_prim_num(l) > 1024) then !print *, ' with shwartz acc ' - ao_two_e_integral_cosgtos = ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) + ao_two_e_integral_cosgtos = ao_2e_cosgtos_schwartz_accel(i, j, k, l) else !print *, ' without shwartz acc ' @@ -294,7 +294,7 @@ end function ao_two_e_integral_cosgtos ! --- -double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) +double precision function ao_2e_cosgtos_schwartz_accel(i, j, k, l) BEGIN_DOC ! integral of the AO basis or (ij|kl) @@ -329,7 +329,7 @@ double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) complex*16 :: ERI_cosgtos complex*16 :: general_primitive_integral_cosgtos - ao_two_e_integral_cosgtos_schwartz_accel = 0.d0 + ao_2e_cosgtos_schwartz_accel = 0.d0 dim1 = n_pt_max_integrals @@ -519,8 +519,7 @@ double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 - ao_two_e_integral_cosgtos_schwartz_accel = ao_two_e_integral_cosgtos_schwartz_accel & - + coef4 * 2.d0 * real(integral_tot) + ao_2e_cosgtos_schwartz_accel = ao_2e_cosgtos_schwartz_accel + coef4 * 2.d0 * real(integral_tot) enddo ! s enddo ! r enddo ! q @@ -698,8 +697,7 @@ double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8 - ao_two_e_integral_cosgtos_schwartz_accel = ao_two_e_integral_cosgtos_schwartz_accel & - + coef4 * 2.d0 * real(integral_tot) + ao_2e_cosgtos_schwartz_accel = ao_2e_cosgtos_schwartz_accel + coef4 * 2.d0 * real(integral_tot) enddo ! s enddo ! r enddo ! q @@ -709,11 +707,11 @@ double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l) deallocate(schwartz_kl) -end function ao_two_e_integral_cosgtos_schwartz_accel +end function ao_2e_cosgtos_schwartz_accel ! --- -BEGIN_PROVIDER [ double precision, ao_two_e_integral_cosgtos_schwartz, (ao_num,ao_num) ] +BEGIN_PROVIDER [ double precision, ao_2e_cosgtos_schwartz, (ao_num,ao_num)] BEGIN_DOC ! Needed to compute Schwartz inequalities @@ -723,16 +721,16 @@ BEGIN_PROVIDER [ double precision, ao_two_e_integral_cosgtos_schwartz, (ao_num,a integer :: i, k double precision :: ao_two_e_integral_cosgtos - ao_two_e_integral_cosgtos_schwartz(1,1) = ao_two_e_integral_cosgtos(1, 1, 1, 1) + ao_2e_cosgtos_schwartz(1,1) = ao_two_e_integral_cosgtos(1, 1, 1, 1) - !$OMP PARALLEL DO PRIVATE(i,k) & - !$OMP DEFAULT(NONE) & - !$OMP SHARED(ao_num, ao_two_e_integral_cosgtos_schwartz) & + !$OMP PARALLEL DO PRIVATE(i,k) & + !$OMP DEFAULT(NONE) & + !$OMP SHARED(ao_num, ao_2e_cosgtos_schwartz) & !$OMP SCHEDULE(dynamic) do i = 1, ao_num do k = 1, i - ao_two_e_integral_cosgtos_schwartz(i,k) = dsqrt(ao_two_e_integral_cosgtos(i, i, k, k)) - ao_two_e_integral_cosgtos_schwartz(k,i) = ao_two_e_integral_cosgtos_schwartz(i,k) + ao_2e_cosgtos_schwartz(i,k) = dsqrt(ao_two_e_integral_cosgtos(i, i, k, k)) + ao_2e_cosgtos_schwartz(k,i) = ao_2e_cosgtos_schwartz(i,k) enddo enddo !$OMP END PARALLEL DO From cab3b12b9b397933ca438717846d28d3164d4804 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Fri, 2 Jun 2023 20:55:51 +0200 Subject: [PATCH 150/337] minor modif in names --- src/tc_bi_ortho/tc_utils.irp.f | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tc_bi_ortho/tc_utils.irp.f b/src/tc_bi_ortho/tc_utils.irp.f index e0f29eb8..9023e2f0 100644 --- a/src/tc_bi_ortho/tc_utils.irp.f +++ b/src/tc_bi_ortho/tc_utils.irp.f @@ -45,8 +45,8 @@ subroutine write_tc_var() SIGMA_TC = 0.d0 do j = 2, N_det - call htilde_mu_mat_bi_ortho(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot_1j) - call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot_j1) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot_1j) + call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot_j1) SIGMA_TC = SIGMA_TC + htot_1j * htot_j1 enddo From 072bea8041a5414da00bc8ddc001186c3c9ff269 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 3 Jun 2023 22:12:30 +0200 Subject: [PATCH 151/337] Improve 4idx --- src/bi_ort_ints/bi_ort_ints.irp.f | 186 +++++- src/bi_ort_ints/three_body_ijm.irp.f | 14 +- src/bi_ort_ints/three_body_ijmk.irp.f | 686 +++++++++++++-------- src/bi_ort_ints/three_body_ijmk_old.irp.f | 290 +++++++++ src/bi_ort_ints/three_body_ijmkl.irp.f | 299 +++++---- src/tc_bi_ortho/slater_tc_opt.irp.f | 3 +- src/tc_bi_ortho/slater_tc_opt_single.irp.f | 7 +- src/tc_bi_ortho/symmetrized_3_e_int.irp.f | 6 +- 8 files changed, 1071 insertions(+), 420 deletions(-) create mode 100644 src/bi_ort_ints/three_body_ijmk_old.irp.f diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index 42bbe315..bb0424cd 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -8,8 +8,9 @@ program bi_ort_ints my_n_pt_a_grid = 14 touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid ! call test_3e - call test_5idx -! call test_5idx2 +! call test_5idx +! call test_5idx2 + call test_4idx end subroutine test_5idx2 @@ -145,3 +146,184 @@ subroutine test_5idx end + +! --- + +subroutine test_4idx() + + implicit none + integer :: i, j, k, l + double precision :: accu, contrib, new, ref, thr + + thr = 1d-5 + + PROVIDE three_e_4_idx_direct_bi_ort_old + PROVIDE three_e_4_idx_direct_bi_ort + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_direct_bi_ort (l,k,j,i) + ref = three_e_4_idx_direct_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_direct_bi_ort' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_direct_bi_ort = ', accu / dble(mo_num)**4 + + ! --- + + PROVIDE three_e_4_idx_exch13_bi_ort_old + PROVIDE three_e_4_idx_exch13_bi_ort + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_exch13_bi_ort (l,k,j,i) + ref = three_e_4_idx_exch13_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_exch13_bi_ort' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_exch13_bi_ort = ', accu / dble(mo_num)**4 + + ! --- + +! PROVIDE three_e_4_idx_exch12_bi_ort_old +! PROVIDE three_e_4_idx_exch12_bi_ort +! +! accu = 0.d0 +! do i = 1, mo_num +! do j = 1, mo_num +! do k = 1, mo_num +! do l = 1, mo_num +! +! new = three_e_4_idx_exch12_bi_ort (l,k,j,i) +! ref = three_e_4_idx_exch12_bi_ort_old(l,k,j,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. thr) then +! print*, ' problem in three_e_4_idx_exch12_bi_ort' +! print*, l, k, j, i +! print*, ref, new, contrib +! stop +! endif +! +! enddo +! enddo +! enddo +! enddo +! print*, ' accu on three_e_4_idx_exch12_bi_ort = ', accu / dble(mo_num)**4 + + ! --- + + PROVIDE three_e_4_idx_cycle_1_bi_ort_old + PROVIDE three_e_4_idx_cycle_1_bi_ort + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_cycle_1_bi_ort (l,k,j,i) + ref = three_e_4_idx_cycle_1_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_cycle_1_bi_ort' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_cycle_1_bi_ort = ', accu / dble(mo_num)**4 + + ! --- + +! PROVIDE three_e_4_idx_cycle_2_bi_ort_old +! PROVIDE three_e_4_idx_cycle_2_bi_ort +! +! accu = 0.d0 +! do i = 1, mo_num +! do j = 1, mo_num +! do k = 1, mo_num +! do l = 1, mo_num +! +! new = three_e_4_idx_cycle_2_bi_ort (l,k,j,i) +! ref = three_e_4_idx_cycle_2_bi_ort_old(l,k,j,i) +! contrib = dabs(new - ref) +! accu += contrib +! if(contrib .gt. thr) then +! print*, ' problem in three_e_4_idx_cycle_2_bi_ort' +! print*, l, k, j, i +! print*, ref, new, contrib +! stop +! endif +! +! enddo +! enddo +! enddo +! enddo +! print*, ' accu on three_e_4_idx_cycle_2_bi_ort = ', accu / dble(mo_num)**4 + + ! --- + + PROVIDE three_e_4_idx_exch23_bi_ort_old + PROVIDE three_e_4_idx_exch23_bi_ort + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_exch23_bi_ort (l,k,j,i) + ref = three_e_4_idx_exch23_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_exch23_bi_ort' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_exch23_bi_ort = ', accu / dble(mo_num)**4 + + ! --- + + return +end diff --git a/src/bi_ort_ints/three_body_ijm.irp.f b/src/bi_ort_ints/three_body_ijm.irp.f index b34638b8..ae100fb5 100644 --- a/src/bi_ort_ints/three_body_ijm.irp.f +++ b/src/bi_ort_ints/three_body_ijm.irp.f @@ -23,11 +23,11 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_direct_bi_ort, (mo_num, mo_num, provide mos_r_in_r_array_transp mos_l_in_r_array_transp - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,m,integral) & - !$OMP SHARED (mo_num,three_e_3_idx_direct_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,m,integral) & + !$OMP SHARED (mo_num,three_e_3_idx_direct_bi_ort) + !$OMP DO SCHEDULE (dynamic) do i = 1, mo_num do j = 1, mo_num do m = j, mo_num @@ -36,8 +36,8 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_direct_bi_ort, (mo_num, mo_num, enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL do i = 1, mo_num do j = 1, mo_num diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f index 95b57e37..39a31751 100644 --- a/src/bi_ort_ints/three_body_ijmk.irp.f +++ b/src/bi_ort_ints/three_body_ijmk.irp.f @@ -1,287 +1,467 @@ ! --- -BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort, (mo_num, mo_num, mo_num, mo_num)] + BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort , (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort , (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num)] +!&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort , (mo_num, mo_num, mo_num, mo_num)] +!&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC ! ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs ! - ! three_e_4_idx_direct_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_direct_bi_ort (m,j,k,i) = < m j k | -L | m j i > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_exch13_bi_ort (m,j,k,i) = < m j k | -L | i j m > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_exch12_bi_ort (m,j,k,i) = < m j k | -L | m i j > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! = three_e_4_idx_exch13_bi_ort (j,m,k,i) + ! three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = < m j k | -L | j i m > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = < m j k | -L | i m j > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! = three_e_4_idx_cycle_1_bi_ort(j,m,k,i) ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! notice the -1 sign: in this way three_e_4_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign ! - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 - - three_e_4_idx_direct_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_direct_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_direct_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, m, j, i, integral) - three_e_4_idx_direct_bi_ort(m,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_4_idx_direct_bi_ort', wall1 - wall0 - call print_memory_usage() - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! three_e_4_idx_direct_bi_ort (m,j,k,i) : Lk Ri Imm Ijj + Lj Rj Imm Iki + Lm Rm Ijj Iki + ! three_e_4_idx_exch13_bi_ort (m,j,k,i) : Lk Rm Imi Ijj + Lj Rj Imi Ikm + Lm Ri Ijj Ikm + ! three_e_4_idx_cycle_1_bi_ort(m,j,k,i) : Lk Rm Imj Iji + Lj Ri Imj Ikm + Lm Rj Iji Ikm ! END_DOC implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 + integer :: ipoint, i, j, k, l, m + double precision :: wall1, wall0 + double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:,:,:), tmp3(:,:,:,:), tmp4(:,:,:,:) + double precision, allocatable :: tmp_4d(:,:,:,:) + double precision, allocatable :: tmp5(:,:,:) + double precision, allocatable :: tmp7(:,:) + double precision, allocatable :: tmp_3d(:,:,:) - three_e_4_idx_cycle_1_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_cycle_1_bi_ort ...' + print *, ' Providing the three_e_4_idx_bi_ort ...' call wall_time(wall0) provide mos_r_in_r_array_transp mos_l_in_r_array_transp - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_cycle_1_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + + allocate(tmp_4d(mo_num,mo_num,mo_num,mo_num)) + + allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) + allocate(tmp2(n_points_final_grid,3,mo_num,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num,mo_num)) + allocate(tmp4(n_points_final_grid,3,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1, tmp2, tmp3, tmp4) + !$OMP DO COLLAPSE(2) do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, j, i, m, integral) - three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + do l = 1, mo_num + do ipoint = 1, n_points_final_grid - call wall_time(wall1) - print *, ' wall time for three_e_4_idx_cycle_1_bi_ort', wall1 - wall0 - call print_memory_usage() + tmp1(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint) -END_PROVIDER + tmp2(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_r_in_r_array_transp(ipoint,i) + tmp2(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_r_in_r_array_transp(ipoint,i) + tmp2(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_r_in_r_array_transp(ipoint,i) -! -- + tmp3(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + tmp3(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + tmp3(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) -BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 - - three_e_4_idx_cycle_2_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_cycle_2_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_cycle_2_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, i, m, j, integral) - three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_4_idx_cycle_2_bi_ort', wall1 - wall0 - call print_memory_usage() - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_4_idx_exch23_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 - - three_e_4_idx_exch23_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_exch23_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_exch23_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, j, m, i, integral) - three_e_4_idx_exch23_bi_ort(m,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_4_idx_exch23_bi_ort', wall1 - wall0 - call print_memory_usage() - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_4_idx_exch13_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 - - three_e_4_idx_exch13_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_exch13_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_exch13_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, i, j, m, integral) - three_e_4_idx_exch13_bi_ort(m,j,k,i) = -1.d0 * integral - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print *, ' wall time for three_e_4_idx_exch13_bi_ort', wall1 - wall0 - call print_memory_usage() - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_4_idx_exch12_bi_ort(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - END_DOC - - implicit none - integer :: i, j, k, m - double precision :: integral, wall1, wall0 - - three_e_4_idx_exch12_bi_ort = 0.d0 - print *, ' Providing the three_e_4_idx_exch12_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,integral) & - !$OMP SHARED (mo_num,three_e_4_idx_exch12_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, j, k, m, i, j, integral) - three_e_4_idx_exch12_bi_ort(m,j,k,i) = -1.d0 * integral - enddo + tmp4(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_r_in_r_array_transp(ipoint,l) + tmp4(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_r_in_r_array_transp(ipoint,l) + tmp4(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_r_in_r_array_transp(ipoint,l) enddo enddo enddo !$OMP END DO !$OMP END PARALLEL + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1, 3*n_points_final_grid, tmp2, 3*n_points_final_grid & + , 0.d0, tmp_4d, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_direct_bi_ort(m,j,k,i) = -tmp_4d(m,k,j,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp4, 3*n_points_final_grid, tmp1, 3*n_points_final_grid & + , 0.d0, tmp_4d, mo_num*mo_num) + + deallocate(tmp1) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch13_bi_ort(m,j,k,i) = -tmp_4d(m,i,j,k) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3, 3*n_points_final_grid, tmp2, 3*n_points_final_grid & + , 0.d0, tmp_4d, mo_num*mo_num) + + deallocate(tmp2) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch13_bi_ort(m,j,k,i) = three_e_4_idx_exch13_bi_ort(m,j,k,i) - tmp_4d(m,k,j,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3, 3*n_points_final_grid, tmp4, 3*n_points_final_grid & + , 0.d0, tmp_4d, mo_num*mo_num) + + deallocate(tmp3) + deallocate(tmp4) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = -tmp_4d(m,k,j,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + + + allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp1(ipoint,2,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp1(ipoint,3,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1, 3*n_points_final_grid, int2_grad1_u12_bimo_t, 3*n_points_final_grid & + , 0.d0, tmp_4d, mo_num*mo_num) + + deallocate(tmp1) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_direct_bi_ort(m,j,k,i) = three_e_4_idx_direct_bi_ort(m,j,k,i) - tmp_4d(m,j,k,i) - tmp_4d(j,m,k,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(tmp_4d) + + + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp7(n_points_final_grid,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP final_weight_at_r_vector, & + !$OMP tmp7) + !$OMP DO + do i = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp7(ipoint,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + + allocate(tmp5(n_points_final_grid,mo_num,mo_num)) + + do m = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, m, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp5) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp5(ipoint,k,i) = int2_grad1_u12_bimo_t(ipoint,1,k,m) * int2_grad1_u12_bimo_t(ipoint,1,m,i) & + + int2_grad1_u12_bimo_t(ipoint,2,k,m) * int2_grad1_u12_bimo_t(ipoint,2,m,i) & + + int2_grad1_u12_bimo_t(ipoint,3,k,m) * int2_grad1_u12_bimo_t(ipoint,3,m,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, n_points_final_grid, 1.d0 & + , tmp7, n_points_final_grid, tmp5, n_points_final_grid & + , 0.d0, tmp_3d, mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + three_e_4_idx_exch13_bi_ort(m,j,k,i) = three_e_4_idx_exch13_bi_ort(m,j,k,i) - tmp_3d(j,k,i) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, k, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, m, & + !$OMP mos_l_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp5) + !$OMP DO COLLAPSE(2) + do k = 1, mo_num + do j = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp5(ipoint,j,k) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) & + * ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,k,m) & + + int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,k,m) & + + int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,k,m) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp5, n_points_final_grid, mos_r_in_r_array_transp, n_points_final_grid & + , 0.d0, tmp_3d, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = three_e_4_idx_cycle_1_bi_ort(m,j,k,i) - tmp_3d(j,k,i) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + enddo + + deallocate(tmp7) + deallocate(tmp_3d) + + + + do i = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (m, j, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp5) + !$OMP DO COLLAPSE(2) + do j = 1, mo_num + do m = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp5(ipoint,m,j) = final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,m) & + * ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, -1.d0 & + , tmp5, n_points_final_grid, mos_l_in_r_array_transp, n_points_final_grid & + , 1.d0, three_e_4_idx_cycle_1_bi_ort(1,1,1,i), mo_num*mo_num) + + enddo + + deallocate(tmp5) + + +! !$OMP PARALLEL DO PRIVATE(i,j,k,m) +! do i = 1, mo_num +! do k = 1, mo_num +! do j = 1, mo_num +! do m = 1, mo_num +! three_e_4_idx_exch12_bi_ort (m,j,k,i) = three_e_4_idx_exch13_bi_ort (j,m,k,i) +! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = three_e_4_idx_cycle_1_bi_ort(j,m,k,i) +! enddo +! enddo +! enddo +! enddo +! !$OMP END PARALLEL DO + + call wall_time(wall1) - print *, ' wall time for three_e_4_idx_exch12_bi_ort', wall1 - wall0 + print *, ' wall time for three_e_4_idx_bi_ort', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort , (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_exch23_bi_ort (m,j,k,i) = < m j k | -L | j m i > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_4_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + ! three_e_4_idx_exch23_bi_ort (m,j,k,i) : Lk Ri Imj Ijm + Lj Rm Imj Iki + Lm Rj Ijm Iki + ! + END_DOC + + implicit none + integer :: i, j, k, l, m, ipoint + double precision :: wall1, wall0 + double precision, allocatable :: tmp1(:,:,:,:), tmp_4d(:,:,:,:) + double precision, allocatable :: tmp5(:,:,:), tmp6(:,:,:) + + print *, ' Providing the three_e_4_idx_exch23_bi_ort ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + + allocate(tmp5(n_points_final_grid,mo_num,mo_num)) + allocate(tmp6(n_points_final_grid,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp5, tmp6) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp5(ipoint,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * int2_grad1_u12_bimo_t(ipoint,1,i,l) & + + int2_grad1_u12_bimo_t(ipoint,2,l,i) * int2_grad1_u12_bimo_t(ipoint,2,i,l) & + + int2_grad1_u12_bimo_t(ipoint,3,l,i) * int2_grad1_u12_bimo_t(ipoint,3,i,l) + + tmp6(ipoint,l,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, n_points_final_grid, -1.d0 & + , tmp5, n_points_final_grid, tmp6, n_points_final_grid & + , 0.d0, three_e_4_idx_exch23_bi_ort, mo_num*mo_num) + + deallocate(tmp5) + deallocate(tmp6) + + + allocate(tmp_4d(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l) + tmp1(ipoint,2,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l) + tmp1(ipoint,3,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1, 3*n_points_final_grid, int2_grad1_u12_bimo_t, 3*n_points_final_grid & + , 0.d0, tmp_4d, mo_num*mo_num) + + deallocate(tmp1) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch23_bi_ort(m,j,k,i) = three_e_4_idx_exch23_bi_ort(m,j,k,i) - tmp_4d(m,j,k,i) - tmp_4d(j,m,k,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(tmp_4d) + + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_exch23_bi_ort', wall1 - wall0 call print_memory_usage() END_PROVIDER diff --git a/src/bi_ort_ints/three_body_ijmk_old.irp.f b/src/bi_ort_ints/three_body_ijmk_old.irp.f new file mode 100644 index 00000000..1a67f35b --- /dev/null +++ b/src/bi_ort_ints/three_body_ijmk_old.irp.f @@ -0,0 +1,290 @@ + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_direct_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_direct_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_direct_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_direct_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, m, j, i, integral) + three_e_4_idx_direct_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_direct_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_cycle_1_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_cycle_1_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_cycle_1_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_cycle_1_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, j, i, m, integral) + three_e_4_idx_cycle_1_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_cycle_1_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! -- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_cycle_2_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_cycle_2_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_cycle_2_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_cycle_2_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, i, m, j, integral) + three_e_4_idx_cycle_2_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_cycle_2_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_exch23_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_exch23_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_exch23_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_exch23_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, j, m, i, integral) + three_e_4_idx_exch23_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_exch23_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_exch13_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_exch13_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_exch13_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_exch13_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, i, j, m, integral) + three_e_4_idx_exch13_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_exch13_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_exch12_bi_ort_old(m,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m + double precision :: integral, wall1, wall0 + + three_e_4_idx_exch12_bi_ort_old = 0.d0 + print *, ' Providing the three_e_4_idx_exch12_bi_ort_old ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,integral) & + !$OMP SHARED (mo_num,three_e_4_idx_exch12_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, j, k, m, i, j, integral) + three_e_4_idx_exch12_bi_ort_old(m,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_4_idx_exch12_bi_ort_old', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index d67e1434..3e4412a3 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -19,17 +19,17 @@ end ! ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign ! - END_DOC implicit none - integer :: i, j, k, m, l - double precision :: wall1, wall0 - integer :: ipoint + integer :: i, j, k, m, l + integer :: ipoint + double precision :: wall1, wall0 double precision, allocatable :: grad_mli(:,:,:), orb_mat(:,:,:) double precision, allocatable :: lk_grad_mi(:,:,:,:), rk_grad_im(:,:,:,:) double precision, allocatable :: lm_grad_ik(:,:,:,:), rm_grad_ik(:,:,:,:) double precision, allocatable :: tmp_mat(:,:,:,:) + allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) provide mos_r_in_r_array_transp mos_l_in_r_array_transp @@ -38,201 +38,196 @@ end print *, ' Providing the three_e_5_idx_bi_ort ...' call wall_time(wall0) - do m = 1, mo_num + do m = 1, mo_num - allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) - allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,l,ipoint) & - !$OMP SHARED (m,mo_num,n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP grad_mli, orb_mat) - !$OMP DO COLLAPSE(2) - do i=1,mo_num - do l=1,mo_num - do ipoint=1, n_points_final_grid + allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) + allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) - grad_mli(ipoint,l,i) = final_weight_at_r_vector(ipoint) * ( & - int2_grad1_u12_bimo_t(ipoint,1,m,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + & - int2_grad1_u12_bimo_t(ipoint,2,m,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + & - int2_grad1_u12_bimo_t(ipoint,3,m,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) ) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,l,ipoint) & + !$OMP SHARED (m,mo_num,n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP grad_mli, orb_mat) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid - orb_mat(ipoint,l,i) = mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) + grad_mli(ipoint,l,i) = final_weight_at_r_vector(ipoint) * ( & + int2_grad1_u12_bimo_t(ipoint,1,m,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + & + int2_grad1_u12_bimo_t(ipoint,2,m,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + & + int2_grad1_u12_bimo_t(ipoint,3,m,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) ) - enddo + orb_mat(ipoint,l,i) = mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) + + enddo + enddo enddo - enddo + !$OMP END DO + !$OMP END PARALLEL - !$OMP END DO - !$OMP END PARALLEL + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, n_points_final_grid, 1.d0, & + orb_mat, n_points_final_grid, & + grad_mli, n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) - - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, n_points_final_grid, 1.d0, & - orb_mat, n_points_final_grid, & - grad_mli, n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num + !$OMP PARALLEL DO PRIVATE(i,j,k,l) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num three_e_5_idx_direct_bi_ort(m,l,j,k,i) = - tmp_mat(l,j,k,i) - tmp_mat(k,i,l,j) + enddo enddo enddo enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO - deallocate(orb_mat,grad_mli) + deallocate(orb_mat,grad_mli) + allocate(lm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) + allocate(rm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) + allocate(rk_grad_im(n_points_final_grid,3,mo_num,mo_num)) - allocate(lm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) - allocate(rm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) - allocate(rk_grad_im(n_points_final_grid,3,mo_num,mo_num)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,l,ipoint) & + !$OMP SHARED (m,mo_num,n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP rm_grad_ik, lm_grad_ik, rk_grad_im, lk_grad_mi) + !$OMP DO COLLAPSE(2) + do i=1,mo_num + do l=1,mo_num + do ipoint=1, n_points_final_grid - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,l,ipoint) & - !$OMP SHARED (m,mo_num,n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP rm_grad_ik, lm_grad_ik, rk_grad_im, lk_grad_mi) - !$OMP DO COLLAPSE(2) - do i=1,mo_num - do l=1,mo_num - do ipoint=1, n_points_final_grid + lm_grad_ik(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) - lm_grad_ik(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) - lm_grad_ik(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) - lm_grad_ik(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) + rm_grad_ik(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + rm_grad_ik(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + rm_grad_ik(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) - rm_grad_ik(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) - rm_grad_ik(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) - rm_grad_ik(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) - - rk_grad_im(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,i,m) - rk_grad_im(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,i,m) - rk_grad_im(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,i,m) - - enddo + rk_grad_im(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,i,m) + rk_grad_im(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,i,m) + rk_grad_im(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,i,m) + enddo + enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & - lm_grad_ik, 3*n_points_final_grid, & - rm_grad_ik, 3*n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) + !$OMP END DO + !$OMP END PARALLEL + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & + lm_grad_ik, 3*n_points_final_grid, & + rm_grad_ik, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num + !$OMP PARALLEL DO PRIVATE(i,j,k,l) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num three_e_5_idx_direct_bi_ort(m,l,j,k,i) = three_e_5_idx_direct_bi_ort(m,l,j,k,i) - tmp_mat(l,j,k,i) + enddo enddo enddo enddo - enddo - !$OMP END PARALLEL DO - - - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & - lm_grad_ik, 3*n_points_final_grid, & - rk_grad_im, 3*n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num + !$OMP END PARALLEL DO + + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & + lm_grad_ik, 3*n_points_final_grid, & + rk_grad_im, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,l) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = - tmp_mat(l,i,j,k) three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = - tmp_mat(k,j,i,l) three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = - tmp_mat(k,i,j,l) three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = - tmp_mat(l,j,i,k) + enddo enddo enddo enddo - enddo - !$OMP END PARALLEL DO - - - deallocate(lm_grad_ik) - - allocate(lk_grad_mi(n_points_final_grid,3,mo_num,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,l,ipoint) & - !$OMP SHARED (m,mo_num,n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP lk_grad_mi) - !$OMP DO COLLAPSE(2) - do i=1,mo_num - do l=1,mo_num - do ipoint=1, n_points_final_grid - - lk_grad_mi(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,m,i) * final_weight_at_r_vector(ipoint) - lk_grad_mi(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,m,i) * final_weight_at_r_vector(ipoint) - lk_grad_mi(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,m,i) * final_weight_at_r_vector(ipoint) - - enddo + !$OMP END PARALLEL DO + + deallocate(lm_grad_ik) + + allocate(lk_grad_mi(n_points_final_grid,3,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,l,ipoint) & + !$OMP SHARED (m,mo_num,n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP lk_grad_mi) + !$OMP DO COLLAPSE(2) + do i=1,mo_num + do l=1,mo_num + do ipoint=1, n_points_final_grid + + lk_grad_mi(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,m,i) * final_weight_at_r_vector(ipoint) + lk_grad_mi(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,m,i) * final_weight_at_r_vector(ipoint) + lk_grad_mi(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,m,i) * final_weight_at_r_vector(ipoint) + + enddo + enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & - lk_grad_mi, 3*n_points_final_grid, & - rm_grad_ik, 3*n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & + lk_grad_mi, 3*n_points_final_grid, & + rm_grad_ik, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,l) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - tmp_mat(k,j,l,i) three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) - tmp_mat(l,i,k,j) three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = three_e_5_idx_exch23_bi_ort (m,l,j,k,i) - tmp_mat(l,j,k,i) three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = three_e_5_idx_exch13_bi_ort (m,l,j,k,i) - tmp_mat(k,i,l,j) + enddo enddo enddo enddo - enddo - !$OMP END PARALLEL DO - - - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & - lk_grad_mi, 3*n_points_final_grid, & - rk_grad_im, 3*n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num + !$OMP END PARALLEL DO + + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & + lk_grad_mi, 3*n_points_final_grid, & + rk_grad_im, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,l) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - tmp_mat(l,j,i,k) three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) - tmp_mat(k,i,j,l) three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = three_e_5_idx_exch23_bi_ort (m,l,j,k,i) - tmp_mat(k,j,i,l) three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = three_e_5_idx_exch13_bi_ort (m,l,j,k,i) - tmp_mat(l,i,j,k) + enddo enddo enddo enddo - enddo - !$OMP END PARALLEL DO - - deallocate(lk_grad_mi) - deallocate(rm_grad_ik) - deallocate(rk_grad_im) + !$OMP END PARALLEL DO + + deallocate(lk_grad_mi) + deallocate(rm_grad_ik) + deallocate(rk_grad_im) enddo + deallocate(tmp_mat) call wall_time(wall1) print *, ' wall time for three_e_5_idx_bi_ort', wall1 - wall0 diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index 882470ed..a2077f0f 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -9,8 +9,7 @@ subroutine provide_all_three_ints_bi_ortho PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort endif if(three_e_4_idx_term)then - PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_cycle_2_bi_ort - PROVIDE three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort three_e_4_idx_exch12_bi_ort + PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort endif if(.not.double_normal_ord.and.three_e_5_idx_term)then PROVIDE three_e_5_idx_direct_bi_ort diff --git a/src/tc_bi_ortho/slater_tc_opt_single.irp.f b/src/tc_bi_ortho/slater_tc_opt_single.irp.f index 7178d6d9..9719a6e7 100644 --- a/src/tc_bi_ortho/slater_tc_opt_single.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_single.irp.f @@ -243,7 +243,9 @@ subroutine fock_ac_tc_operator(iorb,ispin,key, h_fock,p_fock, ispin_fock,hthree, do j = 1, nb jj = occ(j,other_spin) direct_int = three_e_4_idx_direct_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR - exchange_int = three_e_4_idx_exch12_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR + ! TODO + ! use transpose + exchange_int = three_e_4_idx_exch13_bi_ort(iorb,jj,p_fock,h_fock) ! USES 4-IDX TENSOR hthree += direct_int - exchange_int enddo else !! ispin NE to ispin_fock @@ -322,7 +324,8 @@ subroutine fock_a_tc_operator(iorb,ispin,key, h_fock,p_fock, ispin_fock,hthree,N do j = 1, nb jj = occ(j,other_spin) direct_int = three_e_4_idx_direct_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR - exchange_int = three_e_4_idx_exch12_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR + ! TODO use transpose + exchange_int = three_e_4_idx_exch13_bi_ort(iorb,jj,p_fock,h_fock) ! USES 4-IDX TENSOR hthree -= direct_int - exchange_int enddo else !! ispin NE to ispin_fock diff --git a/src/tc_bi_ortho/symmetrized_3_e_int.irp.f b/src/tc_bi_ortho/symmetrized_3_e_int.irp.f index e725d8e5..3180d946 100644 --- a/src/tc_bi_ortho/symmetrized_3_e_int.irp.f +++ b/src/tc_bi_ortho/symmetrized_3_e_int.irp.f @@ -96,9 +96,11 @@ double precision function three_e_single_parrallel_spin(m,j,k,i) implicit none integer, intent(in) :: i,k,j,m three_e_single_parrallel_spin = three_e_4_idx_direct_bi_ort(m,j,k,i) ! direct - three_e_single_parrallel_spin += three_e_4_idx_cycle_1_bi_ort(m,j,k,i) + three_e_4_idx_cycle_2_bi_ort(m,j,k,i) & ! two cyclic permutations + three_e_single_parrallel_spin += three_e_4_idx_cycle_1_bi_ort(m,j,k,i) + three_e_4_idx_cycle_1_bi_ort(j,m,k,i) & ! two cyclic permutations - three_e_4_idx_exch23_bi_ort(m,j,k,i) - three_e_4_idx_exch13_bi_ort(m,j,k,i) & ! two first exchange - - three_e_4_idx_exch12_bi_ort(m,j,k,i) ! last exchange + - three_e_4_idx_exch13_bi_ort(j,m,k,i) ! last exchange + ! TODO + ! use transpose end double precision function three_e_double_parrallel_spin(m,l,j,k,i) From a791a28523b787618d571947452fffbc4e7340c6 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sun, 4 Jun 2023 09:19:34 +0200 Subject: [PATCH 152/337] working on memory footprint --- .../grid_becke_vector.irp.f | 18 +++- src/bi_ort_ints/semi_num_ints_mo.irp.f | 23 ++++-- src/bi_ort_ints/three_body_ijmk.irp.f | 82 +++++++++++-------- src/bi_ortho_mos/bi_ort_mos_in_r.irp.f | 6 +- src/fci_tc_bi/fci_tc_bi_ortho.irp.f | 8 +- src/tc_bi_ortho/normal_ordered.irp.f | 6 -- src/tc_bi_ortho/slater_tc_opt.irp.f | 50 +++++++---- 7 files changed, 121 insertions(+), 72 deletions(-) diff --git a/src/becke_numerical_grid/grid_becke_vector.irp.f b/src/becke_numerical_grid/grid_becke_vector.irp.f index fd185641..8982fe83 100644 --- a/src/becke_numerical_grid/grid_becke_vector.irp.f +++ b/src/becke_numerical_grid/grid_becke_vector.irp.f @@ -62,20 +62,30 @@ END_PROVIDER enddo enddo + FREE grid_points_per_atom + FREE final_weight_at_r + END_PROVIDER ! --- BEGIN_PROVIDER [double precision, final_grid_points_transp, (n_points_final_grid,3)] - implicit none + BEGIN_DOC -! Transposed final_grid_points + ! Transposed final_grid_points END_DOC + implicit none integer :: i,j - do j=1,3 - do i=1,n_points_final_grid + + do j = 1, 3 + do i = 1, n_points_final_grid final_grid_points_transp(i,j) = final_grid_points(j,i) enddo enddo + END_PROVIDER + +! --- + + diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f index 771d3274..6354b393 100644 --- a/src/bi_ort_ints/semi_num_ints_mo.irp.f +++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f @@ -124,6 +124,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3, enddo enddo + FREE int2_grad1_u12_ao_test + else PROVIDE int2_grad1_u12_ao @@ -153,14 +155,14 @@ END_PROVIDER BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, 3, n_points_final_grid)] implicit none - integer :: ipoint + integer :: ipoint double precision :: wall0, wall1 PROVIDE mo_l_coef mo_r_coef PROVIDE int2_grad1_u12_ao_transp - !print *, ' providing int2_grad1_u12_bimo_transp' - !call wall_time(wall0) + print *, ' providing int2_grad1_u12_bimo_transp' + call wall_time(wall0) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & @@ -178,8 +180,9 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, !$OMP END DO !$OMP END PARALLEL - !call wall_time(wall1) - !print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0 + call wall_time(wall1) + print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0 + call print_memory_usage() END_PROVIDER @@ -188,7 +191,11 @@ END_PROVIDER BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, 3, mo_num, mo_num)] implicit none - integer :: i, j, ipoint + integer :: i, j, ipoint + double precision :: wall0, wall1 + + call wall_time(wall0) + print *, ' Providing int2_grad1_u12_bimo_t ...' PROVIDE mo_l_coef mo_r_coef PROVIDE int2_grad1_u12_bimo_transp @@ -205,6 +212,10 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, FREE int2_grad1_u12_bimo_transp + call wall_time(wall1) + print *, ' wall time for int2_grad1_u12_bimo_t,', wall1 - wall0 + call print_memory_usage() + END_PROVIDER ! --- diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f index 39a31751..ee7e88ef 100644 --- a/src/bi_ort_ints/three_body_ijmk.irp.f +++ b/src/bi_ort_ints/three_body_ijmk.irp.f @@ -30,10 +30,10 @@ implicit none integer :: ipoint, i, j, k, l, m double precision :: wall1, wall0 - double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:,:,:), tmp3(:,:,:,:), tmp4(:,:,:,:) + double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:,:,:), tmp3(:,:,:,:) double precision, allocatable :: tmp_4d(:,:,:,:) - double precision, allocatable :: tmp5(:,:,:) - double precision, allocatable :: tmp7(:,:) + double precision, allocatable :: tmp4(:,:,:) + double precision, allocatable :: tmp5(:,:) double precision, allocatable :: tmp_3d(:,:,:) print *, ' Providing the three_e_4_idx_bi_ort ...' @@ -47,7 +47,6 @@ allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) allocate(tmp2(n_points_final_grid,3,mo_num,mo_num)) allocate(tmp3(n_points_final_grid,3,mo_num,mo_num)) - allocate(tmp4(n_points_final_grid,3,mo_num,mo_num)) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & @@ -55,7 +54,7 @@ !$OMP SHARED (mo_num, n_points_final_grid, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp1, tmp2, tmp3, tmp4) + !$OMP tmp1, tmp2, tmp3) !$OMP DO COLLAPSE(2) do i = 1, mo_num do l = 1, mo_num @@ -69,13 +68,9 @@ tmp2(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_r_in_r_array_transp(ipoint,i) tmp2(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_r_in_r_array_transp(ipoint,i) - tmp3(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) - tmp3(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) - tmp3(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) - - tmp4(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_r_in_r_array_transp(ipoint,l) - tmp4(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_r_in_r_array_transp(ipoint,l) - tmp4(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_r_in_r_array_transp(ipoint,l) + tmp3(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_r_in_r_array_transp(ipoint,l) + tmp3(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_r_in_r_array_transp(ipoint,l) + tmp3(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_r_in_r_array_transp(ipoint,l) enddo enddo enddo @@ -99,7 +94,7 @@ !$OMP END PARALLEL DO call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp4, 3*n_points_final_grid, tmp1, 3*n_points_final_grid & + , tmp3, 3*n_points_final_grid, tmp1, 3*n_points_final_grid & , 0.d0, tmp_4d, mo_num*mo_num) deallocate(tmp1) @@ -116,8 +111,30 @@ enddo !$OMP END PARALLEL DO + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp3, 3*n_points_final_grid, tmp2, 3*n_points_final_grid & + , tmp1, 3*n_points_final_grid, tmp2, 3*n_points_final_grid & , 0.d0, tmp_4d, mo_num*mo_num) deallocate(tmp2) @@ -135,11 +152,10 @@ !$OMP END PARALLEL DO call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp3, 3*n_points_final_grid, tmp4, 3*n_points_final_grid & + , tmp1, 3*n_points_final_grid, tmp3, 3*n_points_final_grid & , 0.d0, tmp_4d, mo_num*mo_num) deallocate(tmp3) - deallocate(tmp4) !$OMP PARALLEL DO PRIVATE(i,j,k,m) do i = 1, mo_num @@ -155,8 +171,6 @@ - allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) - !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i, l, ipoint) & @@ -199,26 +213,26 @@ allocate(tmp_3d(mo_num,mo_num,mo_num)) - allocate(tmp7(n_points_final_grid,mo_num)) + allocate(tmp5(n_points_final_grid,mo_num)) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i, ipoint) & !$OMP SHARED (mo_num, n_points_final_grid, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP final_weight_at_r_vector, & - !$OMP tmp7) + !$OMP final_weight_at_r_vector, & + !$OMP tmp5) !$OMP DO do i = 1, mo_num do ipoint = 1, n_points_final_grid - tmp7(ipoint,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp5(ipoint,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) enddo enddo !$OMP END DO !$OMP END PARALLEL - allocate(tmp5(n_points_final_grid,mo_num,mo_num)) + allocate(tmp4(n_points_final_grid,mo_num,mo_num)) do m = 1, mo_num @@ -227,13 +241,13 @@ !$OMP PRIVATE (i, k, ipoint) & !$OMP SHARED (mo_num, n_points_final_grid, m, & !$OMP int2_grad1_u12_bimo_t, & - !$OMP tmp5) + !$OMP tmp4) !$OMP DO COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do ipoint = 1, n_points_final_grid - tmp5(ipoint,k,i) = int2_grad1_u12_bimo_t(ipoint,1,k,m) * int2_grad1_u12_bimo_t(ipoint,1,m,i) & + tmp4(ipoint,k,i) = int2_grad1_u12_bimo_t(ipoint,1,k,m) * int2_grad1_u12_bimo_t(ipoint,1,m,i) & + int2_grad1_u12_bimo_t(ipoint,2,k,m) * int2_grad1_u12_bimo_t(ipoint,2,m,i) & + int2_grad1_u12_bimo_t(ipoint,3,k,m) * int2_grad1_u12_bimo_t(ipoint,3,m,i) enddo @@ -243,7 +257,7 @@ !$OMP END PARALLEL call dgemm( 'T', 'N', mo_num, mo_num*mo_num, n_points_final_grid, 1.d0 & - , tmp7, n_points_final_grid, tmp5, n_points_final_grid & + , tmp5, n_points_final_grid, tmp4, n_points_final_grid & , 0.d0, tmp_3d, mo_num) !$OMP PARALLEL DO PRIVATE(i,j,k) @@ -264,13 +278,13 @@ !$OMP SHARED (mo_num, n_points_final_grid, m, & !$OMP mos_l_in_r_array_transp, & !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp5) + !$OMP tmp4) !$OMP DO COLLAPSE(2) do k = 1, mo_num do j = 1, mo_num do ipoint = 1, n_points_final_grid - tmp5(ipoint,j,k) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) & + tmp4(ipoint,j,k) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) & * ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,k,m) & + int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,k,m) & + int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,k,m) ) @@ -281,7 +295,7 @@ !$OMP END PARALLEL call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, 1.d0 & - , tmp5, n_points_final_grid, mos_r_in_r_array_transp, n_points_final_grid & + , tmp4, n_points_final_grid, mos_r_in_r_array_transp, n_points_final_grid & , 0.d0, tmp_3d, mo_num*mo_num) !$OMP PARALLEL DO PRIVATE(i,j,k) @@ -296,7 +310,7 @@ enddo - deallocate(tmp7) + deallocate(tmp5) deallocate(tmp_3d) @@ -309,13 +323,13 @@ !$OMP SHARED (mo_num, n_points_final_grid, i, & !$OMP mos_r_in_r_array_transp, & !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp5) + !$OMP tmp4) !$OMP DO COLLAPSE(2) do j = 1, mo_num do m = 1, mo_num do ipoint = 1, n_points_final_grid - tmp5(ipoint,m,j) = final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,m) & + tmp4(ipoint,m,j) = final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,m) & * ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) @@ -326,12 +340,12 @@ !$OMP END PARALLEL call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, -1.d0 & - , tmp5, n_points_final_grid, mos_l_in_r_array_transp, n_points_final_grid & + , tmp4, n_points_final_grid, mos_l_in_r_array_transp, n_points_final_grid & , 1.d0, three_e_4_idx_cycle_1_bi_ort(1,1,1,i), mo_num*mo_num) enddo - deallocate(tmp5) + deallocate(tmp4) ! !$OMP PARALLEL DO PRIVATE(i,j,k,m) diff --git a/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f b/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f index 42130575..8667683e 100644 --- a/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f +++ b/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f @@ -46,6 +46,8 @@ BEGIN_PROVIDER[double precision, mos_r_in_r_array_transp, (n_points_final_grid, mos_r_in_r_array_transp(i,j) = mos_r_in_r_array(j,i) enddo enddo + + FREE mos_r_in_r_array END_PROVIDER @@ -116,7 +118,7 @@ end subroutine give_all_mos_l_at_r ! --- -BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp,(n_points_final_grid,mo_num)] +BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp, (n_points_final_grid,mo_num)] BEGIN_DOC ! mos_l_in_r_array_transp(i,j) = value of the jth mo on the ith grid point @@ -130,6 +132,8 @@ BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp,(n_points_final_grid,mo mos_l_in_r_array_transp(i,j) = mos_l_in_r_array(j,i) enddo enddo + + FREE mos_l_in_r_array END_PROVIDER diff --git a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f index ed75c882..f9bda058 100644 --- a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f +++ b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f @@ -54,11 +54,13 @@ subroutine run_cipsi_tc implicit none - if (.not.is_zmq_slave) then + if (.not. is_zmq_slave) then + PROVIDE psi_det psi_coef mo_bi_ortho_tc_two_e mo_bi_ortho_tc_one_e - if(elec_alpha_num+elec_beta_num.ge.3)then + + if(elec_alpha_num+elec_beta_num .ge. 3) then if(three_body_h_tc)then - call provide_all_three_ints_bi_ortho + call provide_all_three_ints_bi_ortho() endif endif ! --- diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index cc01d144..f8e310df 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -24,9 +24,6 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ PROVIDE N_int - print*,' Providing normal_two_body_bi_orth ...' - call wall_time(wall0) - if(read_tc_norm_ord) then open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="read") @@ -115,9 +112,6 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ call wall_time(wall1) print*,' Wall time for normal_two_body_bi_orth ', wall1-wall0 - call wall_time(wall1) - print*,' Wall time for normal_two_body_bi_orth ', wall1-wall0 - END_PROVIDER ! --- diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index a2077f0f..42c59308 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -1,24 +1,38 @@ -subroutine provide_all_three_ints_bi_ortho - implicit none - BEGIN_DOC -! routine that provides all necessary three-electron integrals - END_DOC - if(three_body_h_tc)then - if(three_e_3_idx_term)then - PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort - PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort - endif - if(three_e_4_idx_term)then - PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort - endif - if(.not.double_normal_ord.and.three_e_5_idx_term)then - PROVIDE three_e_5_idx_direct_bi_ort - elseif (double_normal_ord .and. (.not. three_e_5_idx_term))then - PROVIDE normal_two_body_bi_orth - endif + +! --- + +subroutine provide_all_three_ints_bi_ortho() + + BEGIN_DOC + ! routine that provides all necessary three-electron integrals + END_DOC + + implicit none + + if(three_body_h_tc) then + + if(three_e_3_idx_term) then + PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort + PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort + endif + + if(three_e_4_idx_term) then + PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort + endif + + if(.not. double_normal_ord. and. three_e_5_idx_term) then + PROVIDE three_e_5_idx_direct_bi_ort + elseif(double_normal_ord .and. (.not. three_e_5_idx_term)) then + PROVIDE normal_two_body_bi_orth + endif + endif + + return end +! --- + subroutine htilde_mu_mat_opt_bi_ortho_tot(key_j, key_i, Nint, htot) implicit none BEGIN_DOC From 501b9d648702c0f5a2ba0f684ef40ce69d0cb6ce Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sun, 4 Jun 2023 09:58:29 +0200 Subject: [PATCH 153/337] minor modifs --- .../grid_becke_vector.irp.f | 21 +++++++++++++--- src/bi_ort_ints/bi_ort_ints.irp.f | 25 ++++++++++++++----- src/bi_ort_ints/semi_num_ints_mo.irp.f | 22 ++++++++-------- src/bi_ortho_mos/bi_ort_mos_in_r.irp.f | 4 --- src/tc_bi_ortho/slater_tc_opt.irp.f | 2 +- 5 files changed, 48 insertions(+), 26 deletions(-) diff --git a/src/becke_numerical_grid/grid_becke_vector.irp.f b/src/becke_numerical_grid/grid_becke_vector.irp.f index 8982fe83..0386f3c6 100644 --- a/src/becke_numerical_grid/grid_becke_vector.irp.f +++ b/src/becke_numerical_grid/grid_becke_vector.irp.f @@ -1,10 +1,13 @@ BEGIN_PROVIDER [integer, n_points_final_grid] - implicit none + BEGIN_DOC ! Number of points which are non zero END_DOC - integer :: i,j,k,l + + implicit none + integer :: i, j, k, l + n_points_final_grid = 0 do j = 1, nucl_num do i = 1, n_points_radial_grid -1 @@ -16,9 +19,11 @@ BEGIN_PROVIDER [integer, n_points_final_grid] enddo enddo enddo - print*,'n_points_final_grid = ',n_points_final_grid - print*,'n max point = ',n_points_integration_angular*(n_points_radial_grid*nucl_num - 1) + + print*,' n_points_final_grid = ', n_points_final_grid + print*,' n max point = ', n_points_integration_angular*(n_points_radial_grid*nucl_num - 1) call ezfio_set_becke_numerical_grid_n_points_final_grid(n_points_final_grid) + END_PROVIDER ! --- @@ -41,6 +46,10 @@ END_PROVIDER implicit none integer :: i, j, k, l, i_count double precision :: r(3) + double precision :: wall0, wall1 + + call wall_time(wall0) + print *, ' Providing final_grid_points ...' i_count = 0 do j = 1, nucl_num @@ -65,6 +74,10 @@ END_PROVIDER FREE grid_points_per_atom FREE final_weight_at_r + call wall_time(wall1) + print *, ' wall time for final_grid_points,', wall1 - wall0 + call print_memory_usage() + END_PROVIDER ! --- diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index bb0424cd..e64892d7 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -1,22 +1,35 @@ +! --- + program bi_ort_ints - implicit none + BEGIN_DOC -! TODO : Put the documentation of the program here + ! TODO : Put the documentation of the program here END_DOC + + implicit none + my_grid_becke = .True. - my_n_pt_r_grid = 10 - my_n_pt_a_grid = 14 - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + !my_n_pt_r_grid = 10 + !my_n_pt_a_grid = 14 + my_n_pt_r_grid = 30 + my_n_pt_a_grid = 50 + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + ! call test_3e ! call test_5idx ! call test_5idx2 - call test_4idx + !call test_4idx + call test_4idx2() end subroutine test_5idx2 PROVIDE three_e_5_idx_cycle_2_bi_ort end +subroutine test_4idx2() + PROVIDE three_e_4_idx_direct_bi_ort +end + subroutine test_3e implicit none integer :: i,k,j,l,m,n,ipoint diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f index 6354b393..355fa38f 100644 --- a/src/bi_ort_ints/semi_num_ints_mo.irp.f +++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f @@ -54,7 +54,7 @@ BEGIN_PROVIDER [ double precision, mo_v_ki_bi_ortho_erf_rk_cst_mu_transp, (n_poi enddo enddo -! FREE mo_v_ki_bi_ortho_erf_rk_cst_mu + !FREE mo_v_ki_bi_ortho_erf_rk_cst_mu END_PROVIDER @@ -161,8 +161,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, PROVIDE mo_l_coef mo_r_coef PROVIDE int2_grad1_u12_ao_transp - print *, ' providing int2_grad1_u12_bimo_transp' - call wall_time(wall0) + !print *, ' providing int2_grad1_u12_bimo_transp' + !call wall_time(wall0) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & @@ -180,9 +180,9 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, !$OMP END DO !$OMP END PARALLEL - call wall_time(wall1) - print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0 - call print_memory_usage() + !call wall_time(wall1) + !print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0 + !call print_memory_usage() END_PROVIDER @@ -194,8 +194,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, integer :: i, j, ipoint double precision :: wall0, wall1 - call wall_time(wall0) - print *, ' Providing int2_grad1_u12_bimo_t ...' + !call wall_time(wall0) + !print *, ' Providing int2_grad1_u12_bimo_t ...' PROVIDE mo_l_coef mo_r_coef PROVIDE int2_grad1_u12_bimo_transp @@ -212,9 +212,9 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, FREE int2_grad1_u12_bimo_transp - call wall_time(wall1) - print *, ' wall time for int2_grad1_u12_bimo_t,', wall1 - wall0 - call print_memory_usage() + !call wall_time(wall1) + !print *, ' wall time for int2_grad1_u12_bimo_t,', wall1 - wall0 + !call print_memory_usage() END_PROVIDER diff --git a/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f b/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f index 8667683e..25572854 100644 --- a/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f +++ b/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f @@ -47,8 +47,6 @@ BEGIN_PROVIDER[double precision, mos_r_in_r_array_transp, (n_points_final_grid, enddo enddo - FREE mos_r_in_r_array - END_PROVIDER ! --- @@ -133,8 +131,6 @@ BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp, (n_points_final_grid,m enddo enddo - FREE mos_l_in_r_array - END_PROVIDER ! --- diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index 42c59308..7acb0d0f 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -20,7 +20,7 @@ subroutine provide_all_three_ints_bi_ortho() PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort endif - if(.not. double_normal_ord. and. three_e_5_idx_term) then + if(.not. double_normal_ord .and. three_e_5_idx_term) then PROVIDE three_e_5_idx_direct_bi_ort elseif(double_normal_ord .and. (.not. three_e_5_idx_term)) then PROVIDE normal_two_body_bi_orth From b984d7a1f4a734ae459a3c91e2ca9ee2ea26bc50 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sun, 4 Jun 2023 15:27:07 +0200 Subject: [PATCH 154/337] minor modif --- src/bi_ort_ints/bi_ort_ints.irp.f | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index e64892d7..5e465d0f 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -27,7 +27,8 @@ subroutine test_5idx2 end subroutine test_4idx2() - PROVIDE three_e_4_idx_direct_bi_ort + !PROVIDE three_e_4_idx_direct_bi_ort + PROVIDE three_e_4_idx_exch23_bi_ort end subroutine test_3e From b03709020df45806e59d2dfc18303285d69696d4 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 4 Jun 2023 16:45:38 +0200 Subject: [PATCH 155/337] 5idx arrays need O(N3) temp arrays --- src/bi_ort_ints/bi_ort_ints.irp.f | 34 +-- src/bi_ort_ints/three_body_ijmkl.irp.f | 330 +++++++++++++------------ 2 files changed, 187 insertions(+), 177 deletions(-) diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index 42bbe315..5618a2cd 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -8,8 +8,8 @@ program bi_ort_ints my_n_pt_a_grid = 14 touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid ! call test_3e + call test_5idx2 call test_5idx -! call test_5idx2 end subroutine test_5idx2 @@ -60,6 +60,8 @@ subroutine test_5idx k = 1 n = 0 accu = 0.d0 + PROVIDE three_e_5_idx_direct_bi_ort_old + do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -69,28 +71,28 @@ subroutine test_5idx ! stop ! endif -! new = three_e_5_idx_direct_bi_ort(m,l,j,k,i) -! ref = three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) + new = three_e_5_idx_direct_bi_ort(m,l,j,k,i) + ref = three_e_5_idx_direct_bi_ort_old(m,l,j,k,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. 1.d-10)then + print*,'direct' + print*,i,k,j,l,m + print*,ref,new,contrib + stop + endif +! +! new = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) +! ref = three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) ! contrib = dabs(new - ref) ! accu += contrib ! if(contrib .gt. 1.d-10)then -! print*,'direct' +! print*,'exch12' ! print*,i,k,j,l,m ! print*,ref,new,contrib ! stop ! endif ! - new = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) - ref = three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) - contrib = dabs(new - ref) - accu += contrib - if(contrib .gt. 1.d-10)then - print*,'exch12' - print*,i,k,j,l,m - print*,ref,new,contrib - stop - endif - ! ! new = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) ! ref = three_e_5_idx_cycle_1_bi_ort_old(m,l,j,k,i) @@ -135,7 +137,7 @@ subroutine test_5idx ! print*,ref,new,contrib ! stop ! endif -! + enddo enddo enddo diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index 7b39235b..6e46637d 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -15,7 +15,7 @@ end ! ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs ! - ! three_e_5_idx_direct_bi_ort(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_5_idx_direct_bi_ort(m,l,j,k,i) = :: : notice that i is the RIGHT MO and k is the LEFT MO ! ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign ! @@ -23,210 +23,218 @@ end END_DOC implicit none - integer :: i, j, k, m, l - double precision :: wall1, wall0 - integer :: ipoint - double precision, allocatable :: grad_mli(:,:,:), orb_mat(:,:,:) - double precision, allocatable :: lk_grad_mi(:,:,:,:), rk_grad_im(:,:,:,:) - double precision, allocatable :: lm_grad_ik(:,:,:,:), rm_grad_ik(:,:,:,:) - double precision, allocatable :: tmp_mat(:,:,:,:) - allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) + integer :: i, j, k, m, l + double precision :: wall1, wall0 + integer :: ipoint + double precision, allocatable :: grad_mli(:,:), orb_mat(:,:,:) + double precision, allocatable :: lk_grad_mi(:,:,:,:), rk_grad_im(:,:,:) + double precision, allocatable :: lm_grad_ik(:,:,:,:), rm_grad_ik(:,:,:) + double precision, allocatable :: tmp_mat(:,:,:) provide mos_r_in_r_array_transp mos_l_in_r_array_transp PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t + call print_memory_usage print *, ' Providing the three_e_5_idx_bi_ort ...' call wall_time(wall0) - do m = 1, mo_num + three_e_5_idx_direct_bi_ort (:,:,:,:,:) = 0.d0 + three_e_5_idx_cycle_1_bi_ort(:,:,:,:,:) = 0.d0 + three_e_5_idx_cycle_2_bi_ort(:,:,:,:,:) = 0.d0 + three_e_5_idx_exch23_bi_ort (:,:,:,:,:) = 0.d0 + three_e_5_idx_exch13_bi_ort (:,:,:,:,:) = 0.d0 - allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) + call print_memory_usage + + allocate(tmp_mat(mo_num,mo_num,mo_num)) allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,l,ipoint) & - !$OMP SHARED (m,mo_num,n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP grad_mli, orb_mat) - !$OMP DO COLLAPSE(2) + + !$OMP PARALLEL DO PRIVATE (i,l,ipoint) do i=1,mo_num do l=1,mo_num - do ipoint=1, n_points_final_grid + do ipoint=1, n_points_final_grid - grad_mli(ipoint,l,i) = final_weight_at_r_vector(ipoint) * ( & - int2_grad1_u12_bimo_t(ipoint,1,m,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + & - int2_grad1_u12_bimo_t(ipoint,2,m,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + & - int2_grad1_u12_bimo_t(ipoint,3,m,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) ) + orb_mat(ipoint,l,i) = final_weight_at_r_vector(ipoint) & + * mos_l_in_r_array_transp(ipoint,l) & + * mos_r_in_r_array_transp(ipoint,i) - orb_mat(ipoint,l,i) = mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) - - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, n_points_final_grid, 1.d0, & - orb_mat, n_points_final_grid, & - grad_mli, n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - three_e_5_idx_direct_bi_ort(m,l,j,k,i) = - tmp_mat(l,j,k,i) - tmp_mat(k,i,l,j) - enddo enddo enddo enddo !$OMP END PARALLEL DO - deallocate(orb_mat,grad_mli) + tmp_mat = 0.d0 + call print_memory_usage +! + do m = 1, mo_num - allocate(lm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) - allocate(rm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) - allocate(rk_grad_im(n_points_final_grid,3,mo_num,mo_num)) + allocate(grad_mli(n_points_final_grid,mo_num)) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,l,ipoint) & - !$OMP SHARED (m,mo_num,n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP rm_grad_ik, lm_grad_ik, rk_grad_im, lk_grad_mi) - !$OMP DO COLLAPSE(2) - do i=1,mo_num - do l=1,mo_num - do ipoint=1, n_points_final_grid + do i=1,mo_num + !$OMP PARALLEL DO PRIVATE (l,ipoint) + do l=1,mo_num + do ipoint=1, n_points_final_grid - lm_grad_ik(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) - lm_grad_ik(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) - lm_grad_ik(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) + grad_mli(ipoint,l) = & + int2_grad1_u12_bimo_t(ipoint,1,m,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) +& + int2_grad1_u12_bimo_t(ipoint,2,m,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) +& + int2_grad1_u12_bimo_t(ipoint,3,m,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) - rm_grad_ik(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) - rm_grad_ik(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) - rm_grad_ik(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) + enddo + enddo + !$OMP END PARALLEL DO - rk_grad_im(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,i,m) - rk_grad_im(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,i,m) - rk_grad_im(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,i,m) + call dgemm('T','N', mo_num*mo_num, mo_num, n_points_final_grid, 1.d0,& + orb_mat, n_points_final_grid, & + grad_mli, n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & - lm_grad_ik, 3*n_points_final_grid, & - rm_grad_ik, 3*n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num + !$OMP PARALLEL PRIVATE(j,k,l) + !$OMP DO + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + three_e_5_idx_direct_bi_ort(m,l,j,k,i) = three_e_5_idx_direct_bi_ort(m,l,j,k,i) - tmp_mat(l,j,k) + enddo + enddo + enddo + !$OMP END DO + !$OMP DO do j = 1, mo_num do l = 1, mo_num - three_e_5_idx_direct_bi_ort(m,l,j,k,i) = three_e_5_idx_direct_bi_ort(m,l,j,k,i) - tmp_mat(l,j,k,i) + do k = 1, mo_num + three_e_5_idx_direct_bi_ort(m,k,i,l,j) = three_e_5_idx_direct_bi_ort(m,k,i,l,j) - tmp_mat(l,j,k) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + enddo + + deallocate(grad_mli) + + allocate(lm_grad_ik(n_points_final_grid,3,mo_num,mo_num)) + allocate(lk_grad_mi(n_points_final_grid,3,mo_num,mo_num)) + + !$OMP PARALLEL DO PRIVATE (i,l,ipoint) + do i=1,mo_num + do l=1,mo_num + do ipoint=1, n_points_final_grid + + lm_grad_ik(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) + lm_grad_ik(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) + + lk_grad_mi(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,m,i) * final_weight_at_r_vector(ipoint) + lk_grad_mi(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,m,i) * final_weight_at_r_vector(ipoint) + lk_grad_mi(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,m,i) * final_weight_at_r_vector(ipoint) + enddo enddo enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & - lm_grad_ik, 3*n_points_final_grid, & - rk_grad_im, 3*n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = - tmp_mat(l,i,j,k) - three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = - tmp_mat(k,j,i,l) - three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = - tmp_mat(k,i,j,l) - three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = - tmp_mat(l,j,i,k) + allocate(rm_grad_ik(n_points_final_grid,3,mo_num)) + allocate(rk_grad_im(n_points_final_grid,3,mo_num)) + + do i=1,mo_num + !$OMP PARALLEL DO PRIVATE (l,ipoint) + do l=1,mo_num + do ipoint=1, n_points_final_grid + + rm_grad_ik(ipoint,1,l) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + rm_grad_ik(ipoint,2,l) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + rm_grad_ik(ipoint,3,l) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) + + rk_grad_im(ipoint,1,l) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,i,m) + rk_grad_im(ipoint,2,l) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,i,m) + rk_grad_im(ipoint,3,l) = mos_r_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,i,m) + enddo enddo - enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO - deallocate(lm_grad_ik) + call dgemm('T','N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0,& + lm_grad_ik, 3*n_points_final_grid, & + rm_grad_ik, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) - allocate(lk_grad_mi(n_points_final_grid,3,mo_num,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,l,ipoint) & - !$OMP SHARED (m,mo_num,n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP lk_grad_mi) - !$OMP DO COLLAPSE(2) - do i=1,mo_num - do l=1,mo_num - do ipoint=1, n_points_final_grid - - lk_grad_mi(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,1,m,i) * final_weight_at_r_vector(ipoint) - lk_grad_mi(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,2,m,i) * final_weight_at_r_vector(ipoint) - lk_grad_mi(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,l) * int2_grad1_u12_bimo_t(ipoint,3,m,i) * final_weight_at_r_vector(ipoint) - - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & - lk_grad_mi, 3*n_points_final_grid, & - rm_grad_ik, 3*n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - tmp_mat(k,j,l,i) - three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) - tmp_mat(l,i,k,j) - three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = three_e_5_idx_exch23_bi_ort (m,l,j,k,i) - tmp_mat(l,j,k,i) - three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = three_e_5_idx_exch13_bi_ort (m,l,j,k,i) - tmp_mat(k,i,l,j) + !$OMP PARALLEL DO PRIVATE(j,k,l) + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + three_e_5_idx_direct_bi_ort(m,l,j,k,i) = three_e_5_idx_direct_bi_ort(m,l,j,k,i) - tmp_mat(l,j,k) + enddo enddo enddo - enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO - call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & - lk_grad_mi, 3*n_points_final_grid, & - rk_grad_im, 3*n_points_final_grid, 0.d0, & - tmp_mat, mo_num*mo_num) + call dgemm('T','N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0,& + lm_grad_ik, 3*n_points_final_grid, & + rk_grad_im, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) - !$OMP PARALLEL DO PRIVATE(i,j,k,l) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do l = 1, mo_num - three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - tmp_mat(l,j,i,k) - three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) - tmp_mat(k,i,j,l) - three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = three_e_5_idx_exch23_bi_ort (m,l,j,k,i) - tmp_mat(k,j,i,l) - three_e_5_idx_exch13_bi_ort (m,l,j,k,i) = three_e_5_idx_exch13_bi_ort (m,l,j,k,i) - tmp_mat(l,i,j,k) + !$OMP PARALLEL DO PRIVATE(j,k,l) + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + three_e_5_idx_cycle_1_bi_ort(m,l,j,i,k) = three_e_5_idx_cycle_1_bi_ort(m,l,j,i,k) - tmp_mat(l,k,j) + three_e_5_idx_cycle_2_bi_ort(m,i,j,k,l) = three_e_5_idx_cycle_2_bi_ort(m,i,j,k,l) - tmp_mat(k,j,l) + three_e_5_idx_exch23_bi_ort (m,i,j,k,l) = three_e_5_idx_exch23_bi_ort (m,i,j,k,l) - tmp_mat(k,l,j) + three_e_5_idx_exch13_bi_ort (m,l,j,i,k) = three_e_5_idx_exch13_bi_ort (m,l,j,i,k) - tmp_mat(l,j,k) + enddo enddo enddo - enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO + + + call dgemm('T','N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0,& + lk_grad_mi, 3*n_points_final_grid, & + rm_grad_ik, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(j,k,l) + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) - tmp_mat(k,j,l) + three_e_5_idx_cycle_2_bi_ort(m,l,i,k,j) = three_e_5_idx_cycle_2_bi_ort(m,l,i,k,j) - tmp_mat(l,j,k) + three_e_5_idx_exch23_bi_ort (m,l,j,k,i) = three_e_5_idx_exch23_bi_ort (m,l,j,k,i) - tmp_mat(l,j,k) + three_e_5_idx_exch13_bi_ort (m,l,i,k,j) = three_e_5_idx_exch13_bi_ort (m,l,i,k,j) - tmp_mat(k,j,l) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm('T','N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0,& + lk_grad_mi, 3*n_points_final_grid, & + rk_grad_im, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(j,k,l) + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + three_e_5_idx_cycle_1_bi_ort(m,l,j,i,k) = three_e_5_idx_cycle_1_bi_ort(m,l,j,i,k) - tmp_mat(l,j,k) + three_e_5_idx_cycle_2_bi_ort(m,i,j,k,l) = three_e_5_idx_cycle_2_bi_ort(m,i,j,k,l) - tmp_mat(k,l,j) + three_e_5_idx_exch23_bi_ort (m,i,j,k,l) = three_e_5_idx_exch23_bi_ort (m,i,j,k,l) - tmp_mat(k,j,l) + three_e_5_idx_exch13_bi_ort (m,l,j,i,k) = three_e_5_idx_exch13_bi_ort (m,l,j,i,k) - tmp_mat(l,k,j) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + enddo + deallocate(rm_grad_ik) + deallocate(rk_grad_im) + deallocate(lk_grad_mi) + deallocate(lm_grad_ik) - deallocate(lk_grad_mi) - deallocate(rm_grad_ik) - deallocate(rk_grad_im) enddo + deallocate(orb_mat) + call wall_time(wall1) print *, ' wall time for three_e_5_idx_bi_ort', wall1 - wall0 From b48e6b269d624ecfabcbb4895c75bd397646c0d8 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Mon, 5 Jun 2023 01:33:55 +0200 Subject: [PATCH 156/337] start optim normal ordering --- src/tc_bi_ortho/normal_ordered.irp.f | 438 ++++++++++++----------- src/tc_bi_ortho/normal_ordered_old.irp.f | 390 ++++++++++++++++++++ 2 files changed, 625 insertions(+), 203 deletions(-) create mode 100644 src/tc_bi_ortho/normal_ordered_old.irp.f diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index f8e310df..a092762b 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -14,7 +14,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ integer :: i, h1, p1, h2, p2 integer :: hh1, hh2, pp1, pp2 integer :: Ne(2) - double precision :: hthree_aba, hthree_aaa, hthree_aab + double precision :: hthree_aaa, hthree_aab double precision :: wall0, wall1 integer, allocatable :: occ(:,:) integer(bit_kind), allocatable :: key_i_core(:,:) @@ -39,57 +39,65 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ if(core_tc_op) then do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) enddo - call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) else - call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) endif - normal_two_body_bi_orth = 0.d0 + ! opposite spin double excitations : s1 /= s2 + normal_two_body_bi_orth(:,:,:,:) = no_aba_contraction(:,:,:,:) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aba, hthree_aab, hthree_aaa) & + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aab, hthree_aaa) & !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth) !$OMP DO SCHEDULE (static) do hh1 = 1, n_act_orb h1 = list_act(hh1) + do pp1 = 1, n_act_orb p1 = list_act(pp1) + do hh2 = 1, n_act_orb h2 = list_act(hh2) + do pp2 = 1, n_act_orb p2 = list_act(pp2) + ! all contributions from the 3-e terms to the double excitations ! s1:(h1-->p1), s2:(h2-->p2) from the HF reference determinant - - - ! opposite spin double excitations : s1 /= s2 - call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aba) ! same spin double excitations : s1 == s2 - if(h1h2 - ! same spin double excitations with same spin contributions - if(Ne(2).ge.3)then - call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 - else - hthree_aaa = 0.d0 - endif - else - ! with opposite spin contributions - call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) - if(Ne(2).ge.3)then + if((h1 < h2) .and. (p1 > p2)) then + + ! with opposite spin contributions + call give_aab_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aab) ! exchange h1<->h2 + ! same spin double excitations with same spin contributions - call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) - else - hthree_aaa = 0.d0 - endif + if(Ne(2) .ge. 3) then + call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 + else + hthree_aaa = 0.d0 + endif + + else + + ! with opposite spin contributions + call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) + + if(Ne(2) .ge. 3) then + ! same spin double excitations with same spin contributions + call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) + else + hthree_aaa = 0.d0 + endif + endif - normal_two_body_bi_orth(p2,h2,p1,h1) = 0.5d0*(hthree_aba + hthree_aab + hthree_aaa) + + normal_two_body_bi_orth(p2,h2,p1,h1) = 0.5d0*(hthree_aab + hthree_aaa) enddo enddo enddo @@ -116,178 +124,6 @@ END_PROVIDER ! --- -subroutine give_aba_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer, intent(in) :: Nint, h1, h2, p1, p2 - integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) - double precision, intent(out) :: hthree - integer :: ii, i - double precision :: int_direct, int_exc_12, int_exc_13, integral - - !!!! double alpha/beta - hthree = 0.d0 - - do ii = 1, Ne(2) ! purely closed shell part - i = occ(ii,2) - - call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) - int_direct = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) - int_exc_13 = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) - int_exc_12 = -1.d0 * integral - - hthree += 2.d0 * int_direct - 1.d0 * (int_exc_13 + int_exc_12) - enddo - - do ii = Ne(2) + 1, Ne(1) ! purely open-shell part - i = occ(ii,1) - - call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) - int_direct = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) - int_exc_13 = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) - int_exc_12 = -1.d0 * integral - - hthree += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) - enddo - - return -end - -! --- - -BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_ab, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! Normal ordered two-body sector of the three-body terms for opposite spin double excitations - END_DOC - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer :: h1, p1, h2, p2, i - integer :: hh1, hh2, pp1, pp2 - integer :: Ne(2) - integer, allocatable :: occ(:,:) - integer(bit_kind), allocatable :: key_i_core(:,:) - double precision :: hthree - - PROVIDE N_int - - allocate( key_i_core(N_int,2) ) - allocate( occ(N_int*bit_kind_size,2) ) - - if(core_tc_op) then - do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) - enddo - call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) - else - call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) - endif - - normal_two_body_bi_orth_ab = 0.d0 - do hh1 = 1, n_act_orb - h1 = list_act(hh1) - do pp1 = 1, n_act_orb - p1 = list_act(pp1) - do hh2 = 1, n_act_orb - h2 = list_act(hh2) - do pp2 = 1, n_act_orb - p2 = list_act(pp2) - call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree) - - normal_two_body_bi_orth_ab(p2,h2,p1,h1) = hthree - enddo - enddo - enddo - enddo - - deallocate( key_i_core ) - deallocate( occ ) - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_aa_bb, (n_act_orb, n_act_orb, n_act_orb, n_act_orb)] - - BEGIN_DOC - ! Normal ordered two-body sector of the three-body terms for same spin double excitations - END_DOC - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer :: i,ii,j,h1,p1,h2,p2 - integer :: hh1,hh2,pp1,pp2 - integer :: Ne(2) - integer, allocatable :: occ(:,:) - integer(bit_kind), allocatable :: key_i_core(:,:) - double precision :: hthree_aab, hthree_aaa - - PROVIDE N_int - - allocate( key_i_core(N_int,2) ) - allocate( occ(N_int*bit_kind_size,2) ) - - if(core_tc_op)then - do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) - enddo - call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) - else - call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) - endif - - normal_two_body_bi_orth_aa_bb = 0.d0 - do hh1 = 1, n_act_orb - h1 = list_act(hh1) - do pp1 = 1 , n_act_orb - p1 = list_act(pp1) - do hh2 = 1, n_act_orb - h2 = list_act(hh2) - do pp2 = 1 , n_act_orb - p2 = list_act(pp2) - if(h1h2 - if(Ne(2).ge.3)then - call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 - else - hthree_aaa = 0.d0 - endif - else - call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) - if(Ne(2).ge.3)then - call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) - else - hthree_aaa = 0.d0 - endif - endif - normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1) = hthree_aab + hthree_aaa - enddo - enddo - enddo - enddo - - deallocate( key_i_core ) - deallocate( occ ) - -END_PROVIDER - -! --- - subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) BEGIN_DOC @@ -388,3 +224,199 @@ end ! --- +BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_num)] + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:) + + print*,' Providing no_aba_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t, 3*n_points_final_grid, tmp1, 3*n_points_final_grid & + , 0.d0, tmp_3d, mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp, n_points_final_grid, tmp2, n_points_final_grid & + , 1.d0, no_aba_contraction(p2,h2,1,1), mo_num*mo_num) + + enddo ! p1 + enddo ! h1 + enddo ! i + + + double precision :: integral, int_direct, int_exc_13, int_exc_12 + + ! TODO + ! purely open-shell part + if(Ne(2) < Ne(1)) then + + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) + int_exc_13 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) + int_exc_12 = -1.d0 * integral + + no_aba_contraction(p2,h2,p1,h1) += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) + enddo + endif + + ! --- + + deallocate(tmp_3d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2) + + + !$OMP PARALLEL DO PRIVATE(h1,h2,p1,p2) + do h1 = 1, mo_num + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = -0.5d0 * (no_aba_contraction(p2,h2,p1,h1) + no_aba_contraction(p1,h1,p2,h2)) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + +END_PROVIDER + +! --- + + diff --git a/src/tc_bi_ortho/normal_ordered_old.irp.f b/src/tc_bi_ortho/normal_ordered_old.irp.f new file mode 100644 index 00000000..553cafdb --- /dev/null +++ b/src/tc_bi_ortho/normal_ordered_old.irp.f @@ -0,0 +1,390 @@ + +! --- + +BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_old, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! Normal ordering of the three body interaction on the HF density + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + + integer :: i, h1, p1, h2, p2 + integer :: hh1, hh2, pp1, pp2 + integer :: Ne(2) + double precision :: hthree_aba, hthree_aaa, hthree_aab + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + + print*,' Providing normal_two_body_bi_orth_old ...' + call wall_time(wall0) + + PROVIDE N_int + + if(read_tc_norm_ord) then + + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth_old', action="read") + read(11) normal_two_body_bi_orth_old + close(11) + + else + + PROVIDE N_int + + allocate( occ(N_int*bit_kind_size,2) ) + allocate( key_i_core(N_int,2) ) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) + else + call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) + endif + + normal_two_body_bi_orth_old = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aba, hthree_aab, hthree_aaa) & + !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth_old) + !$OMP DO SCHEDULE (static) + do hh1 = 1, n_act_orb + h1 = list_act(hh1) + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + do hh2 = 1, n_act_orb + h2 = list_act(hh2) + do pp2 = 1, n_act_orb + p2 = list_act(pp2) + ! all contributions from the 3-e terms to the double excitations + ! s1:(h1-->p1), s2:(h2-->p2) from the HF reference determinant + + + ! opposite spin double excitations : s1 /= s2 + call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aba) + + ! same spin double excitations : s1 == s2 + if(h1h2 + ! same spin double excitations with same spin contributions + if(Ne(2).ge.3)then + call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 + else + hthree_aaa = 0.d0 + endif + else + ! with opposite spin contributions + call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) + if(Ne(2).ge.3)then + ! same spin double excitations with same spin contributions + call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) + else + hthree_aaa = 0.d0 + endif + endif + normal_two_body_bi_orth_old(p2,h2,p1,h1) = 0.5d0*(hthree_aba + hthree_aab + hthree_aaa) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate( occ ) + deallocate( key_i_core ) + endif + + if(write_tc_norm_ord.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth_old', action="write") + call ezfio_set_work_empty(.False.) + write(11) normal_two_body_bi_orth_old + close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') + endif + + call wall_time(wall1) + print*,' Wall time for normal_two_body_bi_orth_old ', wall1-wall0 + +END_PROVIDER + +! --- + +subroutine give_aba_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer, intent(in) :: Nint, h1, h2, p1, p2 + integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) + double precision, intent(out) :: hthree + integer :: ii, i + double precision :: int_direct, int_exc_12, int_exc_13, integral + + !!!! double alpha/beta + hthree = 0.d0 + + do ii = 1, Ne(2) ! purely closed shell part + i = occ(ii,2) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) + int_exc_13 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) + int_exc_12 = -1.d0 * integral + + hthree += 2.d0 * int_direct - 1.d0 * (int_exc_13 + int_exc_12) + enddo + + do ii = Ne(2) + 1, Ne(1) ! purely open-shell part + i = occ(ii,1) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) + int_exc_13 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) + int_exc_12 = -1.d0 * integral + + hthree += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) + enddo + + return +end + +! --- + +BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_ab, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! Normal ordered two-body sector of the three-body terms for opposite spin double excitations + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: h1, p1, h2, p2, i + integer :: hh1, hh2, pp1, pp2 + integer :: Ne(2) + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision :: hthree + + PROVIDE N_int + + allocate( key_i_core(N_int,2) ) + allocate( occ(N_int*bit_kind_size,2) ) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core,occ,Ne,N_int) + else + call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int) + endif + + normal_two_body_bi_orth_ab = 0.d0 + do hh1 = 1, n_act_orb + h1 = list_act(hh1) + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + do hh2 = 1, n_act_orb + h2 = list_act(hh2) + do pp2 = 1, n_act_orb + p2 = list_act(pp2) + call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree) + + normal_two_body_bi_orth_ab(p2,h2,p1,h1) = hthree + enddo + enddo + enddo + enddo + + deallocate( key_i_core ) + deallocate( occ ) + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_aa_bb, (n_act_orb, n_act_orb, n_act_orb, n_act_orb)] + + BEGIN_DOC + ! Normal ordered two-body sector of the three-body terms for same spin double excitations + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i,ii,j,h1,p1,h2,p2 + integer :: hh1,hh2,pp1,pp2 + integer :: Ne(2) + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision :: hthree_aab, hthree_aaa + + PROVIDE N_int + + allocate( key_i_core(N_int,2) ) + allocate( occ(N_int*bit_kind_size,2) ) + + if(core_tc_op)then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + normal_two_body_bi_orth_aa_bb = 0.d0 + do hh1 = 1, n_act_orb + h1 = list_act(hh1) + do pp1 = 1 , n_act_orb + p1 = list_act(pp1) + do hh2 = 1, n_act_orb + h2 = list_act(hh2) + do pp2 = 1 , n_act_orb + p2 = list_act(pp2) + if(h1h2 + if(Ne(2).ge.3)then + call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 + else + hthree_aaa = 0.d0 + endif + else + call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) + if(Ne(2).ge.3)then + call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) + else + hthree_aaa = 0.d0 + endif + endif + normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1) = hthree_aab + hthree_aaa + enddo + enddo + enddo + enddo + + deallocate( key_i_core ) + deallocate( occ ) + +END_PROVIDER + +! --- + +subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) + + BEGIN_DOC + ! pure same spin contribution to same spin double excitation s1=h1,p1, s2=h2,p2, with s1==s2 + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer, intent(in) :: Nint, h1, h2, p1, p2 + integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) + double precision, intent(out) :: hthree + integer :: ii,i + double precision :: int_direct,int_exc_12,int_exc_13,int_exc_23 + double precision :: integral,int_exc_l,int_exc_ll + + hthree = 0.d0 + do ii = 1, Ne(2) ! purely closed shell part + i = occ(ii,2) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, p1, i, i, h2, h1, integral) + int_exc_l = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) + int_exc_ll= -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) + int_exc_12= -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) + int_exc_13= -1.d0 * integral + + call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) + int_exc_23= -1.d0 * integral + + hthree += 1.d0 * int_direct + int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23) + enddo + + do ii = Ne(2)+1,Ne(1) ! purely open-shell part + i = occ(ii,1) + + call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, p1, i , i, h2, h1, integral) + int_exc_l = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) + int_exc_ll = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) + int_exc_12 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) + int_exc_13 = -1.d0 * integral + + call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) + int_exc_23 = -1.d0 * integral + + hthree += 1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) + enddo + + return +end + +! --- + +subroutine give_aab_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer, intent(in) :: Nint, h1, h2, p1, p2 + integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) + double precision, intent(out) :: hthree + integer :: ii, i + double precision :: int_direct, int_exc_12, int_exc_13, int_exc_23 + double precision :: integral, int_exc_l, int_exc_ll + + hthree = 0.d0 + do ii = 1, Ne(2) ! purely closed shell part + i = occ(ii,2) + + call give_integrals_3_body_bi_ort(p2, p1, i, h2, h1, i, integral) + int_direct = -1.d0 * integral + + call give_integrals_3_body_bi_ort(p1, p2, i, h2, h1, i, integral) + int_exc_23= -1.d0 * integral + + hthree += 1.d0 * int_direct - int_exc_23 + enddo + + return +end + +! --- + From 471283634919dd134e294aa71c0bac0a37d4872c Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Mon, 5 Jun 2023 11:17:08 +0200 Subject: [PATCH 157/337] normal ordering: aba-DGEMM OK --- src/tc_bi_ortho/normal_ordered.irp.f | 495 ++++++++++++++++++------- src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 48 ++- 2 files changed, 411 insertions(+), 132 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index a092762b..59e78b92 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -22,8 +22,6 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ print*,' Providing normal_two_body_bi_orth ...' call wall_time(wall0) - PROVIDE N_int - if(read_tc_norm_ord) then open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="read") @@ -48,12 +46,13 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ endif ! opposite spin double excitations : s1 /= s2 - normal_two_body_bi_orth(:,:,:,:) = no_aba_contraction(:,:,:,:) + PROVIDE no_aba_contraction - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aab, hthree_aaa) & - !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aab, hthree_aaa) & + !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth, & + !$OMP no_aba_contraction) !$OMP DO SCHEDULE (static) do hh1 = 1, n_act_orb h1 = list_act(hh1) @@ -97,7 +96,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ endif - normal_two_body_bi_orth(p2,h2,p1,h1) = 0.5d0*(hthree_aab + hthree_aaa) + normal_two_body_bi_orth(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + 0.5d0*(hthree_aab + hthree_aaa) enddo enddo enddo @@ -124,103 +123,103 @@ END_PROVIDER ! --- -subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) - - BEGIN_DOC - ! pure same spin contribution to same spin double excitation s1=h1,p1, s2=h2,p2, with s1==s2 - END_DOC - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer, intent(in) :: Nint, h1, h2, p1, p2 - integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) - double precision, intent(out) :: hthree - integer :: ii,i - double precision :: int_direct,int_exc_12,int_exc_13,int_exc_23 - double precision :: integral,int_exc_l,int_exc_ll - - hthree = 0.d0 - do ii = 1, Ne(2) ! purely closed shell part - i = occ(ii,2) - - call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) - int_direct = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p2, p1, i, i, h2, h1, integral) - int_exc_l = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) - int_exc_ll= -1.d0 * integral - - call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) - int_exc_12= -1.d0 * integral - - call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) - int_exc_13= -1.d0 * integral - - call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) - int_exc_23= -1.d0 * integral - - hthree += 1.d0 * int_direct + int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23) - enddo - - do ii = Ne(2)+1,Ne(1) ! purely open-shell part - i = occ(ii,1) - - call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) - int_direct = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p2, p1, i , i, h2, h1, integral) - int_exc_l = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) - int_exc_ll = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) - int_exc_12 = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) - int_exc_13 = -1.d0 * integral - - call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) - int_exc_23 = -1.d0 * integral - - hthree += 1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) - enddo - - return -end +!subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) +! +! BEGIN_DOC +! ! pure same spin contribution to same spin double excitation s1=h1,p1, s2=h2,p2, with s1==s2 +! END_DOC +! +! use bitmasks ! you need to include the bitmasks_module.f90 features +! +! implicit none +! integer, intent(in) :: Nint, h1, h2, p1, p2 +! integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) +! double precision, intent(out) :: hthree +! integer :: ii,i +! double precision :: int_direct,int_exc_12,int_exc_13,int_exc_23 +! double precision :: integral,int_exc_l,int_exc_ll +! +! hthree = 0.d0 +! do ii = 1, Ne(2) ! purely closed shell part +! i = occ(ii,2) +! +! call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) +! int_direct = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p2, p1, i, i, h2, h1, integral) +! int_exc_l = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) +! int_exc_ll= -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) +! int_exc_12= -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) +! int_exc_13= -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) +! int_exc_23= -1.d0 * integral +! +! hthree += 1.d0 * int_direct + int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23) +! enddo +! +! do ii = Ne(2)+1,Ne(1) ! purely open-shell part +! i = occ(ii,1) +! +! call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) +! int_direct = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p2, p1, i , i, h2, h1, integral) +! int_exc_l = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) +! int_exc_ll = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) +! int_exc_12 = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) +! int_exc_13 = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) +! int_exc_23 = -1.d0 * integral +! +! hthree += 1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) +! enddo +! +! return +!end ! --- -subroutine give_aab_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer, intent(in) :: Nint, h1, h2, p1, p2 - integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) - double precision, intent(out) :: hthree - integer :: ii, i - double precision :: int_direct, int_exc_12, int_exc_13, int_exc_23 - double precision :: integral, int_exc_l, int_exc_ll - - hthree = 0.d0 - do ii = 1, Ne(2) ! purely closed shell part - i = occ(ii,2) - - call give_integrals_3_body_bi_ort(p2, p1, i, h2, h1, i, integral) - int_direct = -1.d0 * integral - - call give_integrals_3_body_bi_ort(p1, p2, i, h2, h1, i, integral) - int_exc_23= -1.d0 * integral - - hthree += 1.d0 * int_direct - int_exc_23 - enddo - - return -end +!subroutine give_aab_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) +! +! use bitmasks ! you need to include the bitmasks_module.f90 features +! +! implicit none +! integer, intent(in) :: Nint, h1, h2, p1, p2 +! integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) +! double precision, intent(out) :: hthree +! integer :: ii, i +! double precision :: int_direct, int_exc_12, int_exc_13, int_exc_23 +! double precision :: integral, int_exc_l, int_exc_ll +! +! hthree = 0.d0 +! do ii = 1, Ne(2) ! purely closed shell part +! i = occ(ii,2) +! +! call give_integrals_3_body_bi_ort(p2, p1, i, h2, h1, i, integral) +! int_direct = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p1, p2, i, h2, h1, i, integral) +! int_exc_23= -1.d0 * integral +! +! hthree += 1.d0 * int_direct - int_exc_23 +! enddo +! +! return +!end ! --- @@ -264,6 +263,10 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ allocate(tmpvec_1(n_points_final_grid,3)) allocate(tmpvec_2(n_points_final_grid,3)) + double precision, allocatable :: tmp_2d(:,:) + allocate(tmp_2d(mo_num,mo_num)) + + ! purely closed shell part do ii = 1, Ne(2) i = occ(ii,2) @@ -313,9 +316,10 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , int2_grad1_u12_bimo_t, 3*n_points_final_grid, tmp1, 3*n_points_final_grid & - , 0.d0, tmp_3d, mo_num) + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) !$OMP PARALLEL DO PRIVATE(p1,h2,p2) do p1 = 1, mo_num @@ -364,38 +368,163 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , mos_l_in_r_array_transp, n_points_final_grid, tmp2, n_points_final_grid & - , 1.d0, no_aba_contraction(p2,h2,1,1), mo_num*mo_num) + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO enddo ! p1 enddo ! h1 enddo ! i - double precision :: integral, int_direct, int_exc_13, int_exc_12 - ! TODO + + + + + + ! purely open-shell part if(Ne(2) < Ne(1)) then - do ii = Ne(2) + 1, Ne(1) i = occ(ii,1) - call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) - int_direct = -1.d0 * integral + do h1 = 1, mo_num - call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) - int_exc_13 = -1.d0 * integral + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + !$OMP END DO + !$OMP END PARALLEL - call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) - int_exc_12 = -1.d0 * integral + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL - no_aba_contraction(p2,h2,p1,h1) += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) - enddo + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo !i endif - ! --- + + + + + + + + + + + + + + + deallocate(tmp_3d) deallocate(tmp1, tmp2) @@ -403,17 +532,121 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ deallocate(tmpvec_1, tmpvec_2) - !$OMP PARALLEL DO PRIVATE(h1,h2,p1,p2) - do h1 = 1, mo_num - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - no_aba_contraction(p2,h2,p1,h1) = -0.5d0 * (no_aba_contraction(p2,h2,p1,h1) + no_aba_contraction(p1,h1,p2,h2)) - enddo - enddo - enddo - enddo - !$OMP END PARALLEL DO + + + + + + + + no_aba_contraction = -0.5d0 * no_aba_contraction + call sum_A_At(no_aba_contraction(1,1,1,1), mo_num*mo_num) + +! do h1 = 1, mo_num +! do p1 = 1, mo_num +! do h2 = 1, mo_num +! do p2 = 1, mo_num +! no_aba_contraction(p2,h2,p1,h1) = -0.5d0 * (tmp_4d(p2,h2,p1,h1) + tmp_4d(p1,h1,p2,h2)) +! enddo +! enddo +! enddo +! enddo + + + ! --- + + double precision :: integral, int_direct, int_exc_13, int_exc_12 + +! no_aba_contraction = 0.d0 +! +! ! purely closed shell part +! do ii = 1, Ne(2) +! i = occ(ii,1) +! +! !$OMP PARALLEL & +! !$OMP DEFAULT (NONE) & +! !$OMP PRIVATE (h1, h2, p1, p2, int_direct, int_exc_13, int_exc_12, integral) & +! !$OMP SHARED (mo_num, i, no_aba_contraction) +! !$OMP DO SCHEDULE (static) +! do h1 = 1, mo_num +! do p1 = 1, mo_num +! do h2 = 1, mo_num +! do p2 = 1, mo_num +! +! call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) +! int_direct = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) +! int_exc_13 = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) +! int_exc_12 = -1.d0 * integral +! +! !no_aba_contraction(p2,h2,p1,h1) += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) +! enddo +! enddo +! enddo +! enddo +! !$OMP END DO +! !$OMP END PARALLEL +! enddo + +! ! purely open-shell part +! if(Ne(2) < Ne(1)) then +! +! do ii = Ne(2) + 1, Ne(1) +! i = occ(ii,1) +! +! !$OMP PARALLEL & +! !$OMP DEFAULT (NONE) & +! !$OMP PRIVATE (h1, h2, p1, p2, int_direct, int_exc_13, int_exc_12, integral) & +! !$OMP SHARED (mo_num, i, no_aba_contraction) +! !$OMP DO SCHEDULE (static) +! do h1 = 1, mo_num +! do p1 = 1, mo_num +! do h2 = 1, mo_num +! do p2 = 1, mo_num +! +! call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) +! int_direct = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) +! int_exc_13 = -1.d0 * integral +! +! call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) +! int_exc_12 = -1.d0 * integral +! +! no_aba_contraction(p2,h2,p1,h1) += 0.5d0 * int_direct - 0.25d0 * (int_exc_13 + int_exc_12) +! enddo +! enddo +! enddo +! enddo +! !$OMP END DO +! !$OMP END PARALLEL +! enddo +! endif + + ! --- + +! !$OMP PARALLEL & +! !$OMP DEFAULT (NONE) & +! !$OMP PRIVATE (h1, h2, p1, p2, integral) & +! !$OMP SHARED (mo_num, N_int,Ne, occ, no_aba_contraction) +! !$OMP DO SCHEDULE (static) +! do h1 = 1, mo_num +! do p1 = 1, mo_num +! do h2 = 1, mo_num +! do p2 = 1, mo_num +! call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, integral) +! no_aba_contraction(p2,h2,p1,h1) = 0.5d0 * integral +! enddo +! enddo +! enddo +! enddo +! !$OMP END DO +! !$OMP END PARALLEL + + END_PROVIDER diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index df86ea65..33b5c5aa 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -11,12 +11,14 @@ program tc_bi_ortho touch read_wf touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - call test_h_u0 +! call test_h_u0 ! call test_slater_tc_opt ! call timing_tot ! call timing_diag ! call timing_single ! call timing_double + + call test_no() end subroutine test_h_u0 @@ -252,3 +254,47 @@ subroutine timing_double end +! --- + +subroutine test_no() + + implicit none + integer :: i, j, k, l + double precision :: accu, contrib, new, ref, thr + + print*, ' testing normal_two_body_bi_orth ...' + + thr = 1d-8 + + PROVIDE normal_two_body_bi_orth_old + PROVIDE normal_two_body_bi_orth + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = normal_two_body_bi_orth (l,k,j,i) + ref = normal_two_body_bi_orth_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem on normal_two_body_bi_orth' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on normal_two_body_bi_orth = ', accu / dble(mo_num)**4 + + return +end + +! --- + + From 3a5dd05d7eb61dc21b1ec16eba330e3687b54001 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Mon, 5 Jun 2023 15:13:18 +0200 Subject: [PATCH 158/337] NO: working on AAB contractions --- src/tc_bi_ortho/normal_ordered.irp.f | 428 +++++++++++---------------- src/utils/util.irp.f | 37 ++- 2 files changed, 217 insertions(+), 248 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index 59e78b92..b3c413d3 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -45,14 +45,14 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) endif - ! opposite spin double excitations : s1 /= s2 PROVIDE no_aba_contraction + PROVIDE no_aab_contraction !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aab, hthree_aaa) & !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth, & - !$OMP no_aba_contraction) + !$OMP no_aba_contraction,no_aab_contraction) !$OMP DO SCHEDULE (static) do hh1 = 1, n_act_orb h1 = list_act(hh1) @@ -72,9 +72,6 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ ! same spin double excitations : s1 == s2 if((h1 < h2) .and. (p1 > p2)) then - ! with opposite spin contributions - call give_aab_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aab) ! exchange h1<->h2 - ! same spin double excitations with same spin contributions if(Ne(2) .ge. 3) then call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 @@ -84,9 +81,6 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ else - ! with opposite spin contributions - call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab) - if(Ne(2) .ge. 3) then ! same spin double excitations with same spin contributions call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) @@ -96,7 +90,9 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ endif - normal_two_body_bi_orth(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + 0.5d0*(hthree_aab + hthree_aaa) + normal_two_body_bi_orth(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) & + + no_aab_contraction(p2,h2,p1,h1) & + + 0.5d0 * hthree_aaa enddo enddo enddo @@ -123,106 +119,6 @@ END_PROVIDER ! --- -!subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) -! -! BEGIN_DOC -! ! pure same spin contribution to same spin double excitation s1=h1,p1, s2=h2,p2, with s1==s2 -! END_DOC -! -! use bitmasks ! you need to include the bitmasks_module.f90 features -! -! implicit none -! integer, intent(in) :: Nint, h1, h2, p1, p2 -! integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) -! double precision, intent(out) :: hthree -! integer :: ii,i -! double precision :: int_direct,int_exc_12,int_exc_13,int_exc_23 -! double precision :: integral,int_exc_l,int_exc_ll -! -! hthree = 0.d0 -! do ii = 1, Ne(2) ! purely closed shell part -! i = occ(ii,2) -! -! call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) -! int_direct = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p2, p1, i, i, h2, h1, integral) -! int_exc_l = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) -! int_exc_ll= -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) -! int_exc_12= -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) -! int_exc_13= -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) -! int_exc_23= -1.d0 * integral -! -! hthree += 1.d0 * int_direct + int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23) -! enddo -! -! do ii = Ne(2)+1,Ne(1) ! purely open-shell part -! i = occ(ii,1) -! -! call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) -! int_direct = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p2, p1, i , i, h2, h1, integral) -! int_exc_l = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p1, i, p2, i, h2, h1, integral) -! int_exc_ll = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) -! int_exc_12 = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) -! int_exc_13 = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) -! int_exc_23 = -1.d0 * integral -! -! hthree += 1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) -! enddo -! -! return -!end - -! --- - -!subroutine give_aab_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) -! -! use bitmasks ! you need to include the bitmasks_module.f90 features -! -! implicit none -! integer, intent(in) :: Nint, h1, h2, p1, p2 -! integer, intent(in) :: Ne(2), occ(Nint*bit_kind_size,2) -! double precision, intent(out) :: hthree -! integer :: ii, i -! double precision :: int_direct, int_exc_12, int_exc_13, int_exc_23 -! double precision :: integral, int_exc_l, int_exc_ll -! -! hthree = 0.d0 -! do ii = 1, Ne(2) ! purely closed shell part -! i = occ(ii,2) -! -! call give_integrals_3_body_bi_ort(p2, p1, i, h2, h1, i, integral) -! int_direct = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p1, p2, i, h2, h1, i, integral) -! int_exc_23= -1.d0 * integral -! -! hthree += 1.d0 * int_direct - int_exc_23 -! enddo -! -! return -!end - -! --- - BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_num)] use bitmasks ! you need to include the bitmasks_module.f90 features @@ -236,6 +132,7 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ double precision, allocatable :: tmp_3d(:,:,:) double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:) + double precision, allocatable :: tmp_2d(:,:) print*,' Providing no_aba_contraction ...' call wall_time(wall0) @@ -262,8 +159,6 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ allocate(tmpval_2(n_points_final_grid)) allocate(tmpvec_1(n_points_final_grid,3)) allocate(tmpvec_2(n_points_final_grid,3)) - - double precision, allocatable :: tmp_2d(:,:) allocate(tmp_2d(mo_num,mo_num)) @@ -386,13 +281,6 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ enddo ! i - - - - - - - ! purely open-shell part if(Ne(2) < Ne(1)) then do ii = Ne(2) + 1, Ne(1) @@ -510,146 +398,192 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ enddo !i endif - - - - - - - - - - - - - - - - deallocate(tmp_3d) deallocate(tmp1, tmp2) deallocate(tmpval_1, tmpval_2) deallocate(tmpvec_1, tmpvec_2) - - - - - - - - no_aba_contraction = -0.5d0 * no_aba_contraction call sum_A_At(no_aba_contraction(1,1,1,1), mo_num*mo_num) -! do h1 = 1, mo_num -! do p1 = 1, mo_num -! do h2 = 1, mo_num -! do p2 = 1, mo_num -! no_aba_contraction(p2,h2,p1,h1) = -0.5d0 * (tmp_4d(p2,h2,p1,h1) + tmp_4d(p1,h1,p2,h2)) -! enddo -! enddo -! enddo -! enddo - - - ! --- - - double precision :: integral, int_direct, int_exc_13, int_exc_12 - -! no_aba_contraction = 0.d0 -! -! ! purely closed shell part -! do ii = 1, Ne(2) -! i = occ(ii,1) -! -! !$OMP PARALLEL & -! !$OMP DEFAULT (NONE) & -! !$OMP PRIVATE (h1, h2, p1, p2, int_direct, int_exc_13, int_exc_12, integral) & -! !$OMP SHARED (mo_num, i, no_aba_contraction) -! !$OMP DO SCHEDULE (static) -! do h1 = 1, mo_num -! do p1 = 1, mo_num -! do h2 = 1, mo_num -! do p2 = 1, mo_num -! -! call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) -! int_direct = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) -! int_exc_13 = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) -! int_exc_12 = -1.d0 * integral -! -! !no_aba_contraction(p2,h2,p1,h1) += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) -! enddo -! enddo -! enddo -! enddo -! !$OMP END DO -! !$OMP END PARALLEL -! enddo - -! ! purely open-shell part -! if(Ne(2) < Ne(1)) then -! -! do ii = Ne(2) + 1, Ne(1) -! i = occ(ii,1) -! -! !$OMP PARALLEL & -! !$OMP DEFAULT (NONE) & -! !$OMP PRIVATE (h1, h2, p1, p2, int_direct, int_exc_13, int_exc_12, integral) & -! !$OMP SHARED (mo_num, i, no_aba_contraction) -! !$OMP DO SCHEDULE (static) -! do h1 = 1, mo_num -! do p1 = 1, mo_num -! do h2 = 1, mo_num -! do p2 = 1, mo_num -! -! call give_integrals_3_body_bi_ort(i, p2, p1, i, h2, h1, integral) -! int_direct = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p1, p2, i, i, h2, h1, integral) -! int_exc_13 = -1.d0 * integral -! -! call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) -! int_exc_12 = -1.d0 * integral -! -! no_aba_contraction(p2,h2,p1,h1) += 0.5d0 * int_direct - 0.25d0 * (int_exc_13 + int_exc_12) -! enddo -! enddo -! enddo -! enddo -! !$OMP END DO -! !$OMP END PARALLEL -! enddo -! endif - - ! --- - -! !$OMP PARALLEL & -! !$OMP DEFAULT (NONE) & -! !$OMP PRIVATE (h1, h2, p1, p2, integral) & -! !$OMP SHARED (mo_num, N_int,Ne, occ, no_aba_contraction) -! !$OMP DO SCHEDULE (static) -! do h1 = 1, mo_num -! do p1 = 1, mo_num -! do h2 = 1, mo_num -! do p2 = 1, mo_num -! call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, integral) -! no_aba_contraction(p2,h2,p1,h1) = 0.5d0 * integral -! enddo -! enddo -! enddo -! enddo -! !$OMP END DO -! !$OMP END PARALLEL - - + call wall_time(wall1) + print*,' Wall time for no_aba_contraction', wall1-wall0 END_PROVIDER ! --- +BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_num)] + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) + double precision, allocatable :: tmpval_1(:), tmpvec_1(:,:) + double precision, allocatable :: tmp_2d(:,:) + + print*,' Providing no_aab_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmp_2d(mo_num,mo_num)) + + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpvec_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo ! i + + deallocate(tmp_3d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1) + deallocate(tmpvec_1) + + no_aab_contraction = 0.5d0 * no_aab_contraction + call sub_A_At(no_aab_contraction(1,1,1,1), mo_num*mo_num) + + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aab_contraction(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + + call wall_time(wall1) + print*,' Wall time for no_aab_contraction', wall1-wall0 + + +END_PROVIDER + +! --- diff --git a/src/utils/util.irp.f b/src/utils/util.irp.f index aba99c2b..a9f1a438 100644 --- a/src/utils/util.irp.f +++ b/src/utils/util.irp.f @@ -490,7 +490,7 @@ end subroutine check_sym subroutine sum_A_At(A, N) !BEGIN_DOC - ! useful for symmetrizing a tensor without a temporary tensor + ! add a tensor with its transpose without a temporary tensor !END_DOC implicit none @@ -521,3 +521,38 @@ subroutine sum_A_At(A, N) end +! --- + +subroutine sub_A_At(A, N) + + !BEGIN_DOC + ! substruct a tensor with its transpose without a temporary tensor + !END_DOC + + implicit none + integer, intent(in) :: N + double precision, intent(inout) :: A(N,N) + integer :: i, j + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j) & + !$OMP SHARED (A, N) + !$OMP DO + do j = 1, N + do i = j, N + A(i,j) -= A(j,i) + enddo + enddo + !$OMP END DO + + !$OMP DO + do j = 2, N + do i = 1, j-1 + A(i,j) = -A(j,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end From b0da0ac04d49b3fbbbe0eb9649b0b6da87cce6d2 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Mon, 5 Jun 2023 16:08:46 +0200 Subject: [PATCH 159/337] normal ordering: aab-DGEMM OK --- src/tc_bi_ortho/normal_ordered.irp.f | 73 +++++++++++++++++++--------- 1 file changed, 51 insertions(+), 22 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index b3c413d3..3a1e79fd 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -230,11 +230,11 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & !$OMP tmpval_1) !$OMP DO do ipoint = 1, n_points_final_grid @@ -398,7 +398,7 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ enddo !i endif - deallocate(tmp_3d) + deallocate(tmp_2d, tmp_3d) deallocate(tmp1, tmp2) deallocate(tmpval_1, tmpval_2) deallocate(tmpvec_1, tmpvec_2) @@ -446,12 +446,12 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) endif + allocate(tmp_2d(mo_num,mo_num)) allocate(tmp_3d(mo_num,mo_num,mo_num)) allocate(tmp1(n_points_final_grid,3,mo_num)) allocate(tmp2(n_points_final_grid,mo_num)) allocate(tmpval_1(n_points_final_grid)) allocate(tmpvec_1(n_points_final_grid,3)) - allocate(tmp_2d(mo_num,mo_num)) ! purely closed shell part @@ -471,10 +471,10 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP tmpval_1, tmpvec_1) !$OMP DO do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) enddo !$OMP END DO !$OMP END PARALLEL @@ -515,17 +515,17 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & !$OMP tmpval_1) !$OMP DO do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) enddo !$OMP END DO !$OMP END PARALLEL @@ -567,9 +567,38 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ deallocate(tmpval_1) deallocate(tmpvec_1) - no_aab_contraction = 0.5d0 * no_aab_contraction - call sub_A_At(no_aab_contraction(1,1,1,1), mo_num*mo_num) + no_aab_contraction = -0.5d0 * no_aab_contraction + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (no_aab_contraction, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + no_aab_contraction(p2,h2,p1,h1) -= no_aab_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aab_contraction(p2,h2,p1,h1) = -no_aab_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO do h1 = 1, mo_num-1 do h2 = h1+1, mo_num do p1 = 2, mo_num @@ -579,11 +608,11 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ enddo enddo enddo + !$OMP END PARALLEL call wall_time(wall1) print*,' Wall time for no_aab_contraction', wall1-wall0 - END_PROVIDER ! --- From aafca191f1fe271575f80f5d16eb80587290213c Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Mon, 5 Jun 2023 20:59:34 +0200 Subject: [PATCH 160/337] normal ordering: aaa-DGEMM OK --- src/tc_bi_ortho/normal_ordered.irp.f | 593 +++++++++++++++++++++-- src/tc_bi_ortho/normal_ordered_old.irp.f | 4 +- 2 files changed, 568 insertions(+), 29 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index 3a1e79fd..fea229c9 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -47,12 +47,13 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ PROVIDE no_aba_contraction PROVIDE no_aab_contraction + PROVIDE no_aaa_contraction !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aab, hthree_aaa) & !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth, & - !$OMP no_aba_contraction,no_aab_contraction) + !$OMP no_aba_contraction, no_aab_contraction, no_aaa_contraction) !$OMP DO SCHEDULE (static) do hh1 = 1, n_act_orb h1 = list_act(hh1) @@ -66,33 +67,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ do pp2 = 1, n_act_orb p2 = list_act(pp2) - ! all contributions from the 3-e terms to the double excitations - ! s1:(h1-->p1), s2:(h2-->p2) from the HF reference determinant - - ! same spin double excitations : s1 == s2 - if((h1 < h2) .and. (p1 > p2)) then - - ! same spin double excitations with same spin contributions - if(Ne(2) .ge. 3) then - call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2 - else - hthree_aaa = 0.d0 - endif - - else - - if(Ne(2) .ge. 3) then - ! same spin double excitations with same spin contributions - call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa) - else - hthree_aaa = 0.d0 - endif - - endif - - normal_two_body_bi_orth(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) & - + no_aab_contraction(p2,h2,p1,h1) & - + 0.5d0 * hthree_aaa + normal_two_body_bi_orth(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + no_aab_contraction(p2,h2,p1,h1) + no_aaa_contraction(p2,h2,p1,h1) enddo enddo enddo @@ -616,3 +591,565 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ END_PROVIDER ! --- + +BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_num)] + + BEGIN_DOC + ! + ! if: + ! h1 < h2 + ! p1 > p2 + ! + ! no_aaa_contraction(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h1,p1,h2) + Ibeta(p2,h1,p1,h2)] + ! = -0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] + ! + ! else: + ! + ! no_aaa_contraction(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] + ! + ! + ! I(p2,h2,p1,h1) = J(p2,h2,p1,h1) - J(p1,h2,p2,h1) + ! J(p2,h2,p1,h1) = \sum_i [ < i p2 p1 | i h2 h1 > + ! + < p2 p1 i | i h2 h1 > + ! + < p1 i p2 | i h2 h1 > ] + ! + ! + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_2d(:,:), tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) + + print*,' Providing no_aaa_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + if(Ne(2) .lt. 3) then + + no_aaa_contraction = 0.d0 + + else + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmpvec_3(n_points_final_grid,3)) + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_2, tmpvec_2, tmp1) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo ! i + + + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_2, tmpvec_2, tmp1) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo !i + endif + + deallocate(tmp_2d, tmp_3d) + deallocate(tmp1, tmp2, tmp3) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2, tmpvec_3) + + no_aaa_contraction = -0.5d0 * no_aaa_contraction + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (no_aaa_contraction, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + no_aaa_contraction(p2,h2,p1,h1) -= no_aaa_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aaa_contraction(p2,h2,p1,h1) = -no_aaa_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aaa_contraction(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END PARALLEL + + endif + + call wall_time(wall1) + print*,' Wall time for no_aaa_contraction', wall1-wall0 + +END_PROVIDER + +! --- diff --git a/src/tc_bi_ortho/normal_ordered_old.irp.f b/src/tc_bi_ortho/normal_ordered_old.irp.f index 553cafdb..417580dd 100644 --- a/src/tc_bi_ortho/normal_ordered_old.irp.f +++ b/src/tc_bi_ortho/normal_ordered_old.irp.f @@ -89,6 +89,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_old, (mo_num, mo_num, hthree_aaa = 0.d0 endif endif + normal_two_body_bi_orth_old(p2,h2,p1,h1) = 0.5d0*(hthree_aba + hthree_aab + hthree_aaa) enddo enddo @@ -350,7 +351,8 @@ subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) call give_integrals_3_body_bi_ort(i, p1, p2, i, h2, h1, integral) int_exc_23 = -1.d0 * integral - hthree += 1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) + !hthree += 1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) + hthree += 0.5d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll - (int_exc_12 + int_exc_13 + int_exc_23)) enddo return From 4b9b2a25603cda0d7687938e13384cb53877b9a9 Mon Sep 17 00:00:00 2001 From: ydamour Date: Thu, 8 Jun 2023 10:49:06 +0200 Subject: [PATCH 161/337] update ROHF F matrix with gamess parametrization --- src/scf_utils/fock_matrix.irp.f | 194 +++++++++++++++++++++++--------- 1 file changed, 138 insertions(+), 56 deletions(-) diff --git a/src/scf_utils/fock_matrix.irp.f b/src/scf_utils/fock_matrix.irp.f index 61633d3b..1942e542 100644 --- a/src/scf_utils/fock_matrix.irp.f +++ b/src/scf_utils/fock_matrix.irp.f @@ -5,6 +5,90 @@ ! Fock matrix on the MO basis. ! For open shells, the ROHF Fock Matrix is :: ! + ! | Rcc | F^b | Fcv | + ! |-----------------------| + ! | F^b | Roo | F^a | + ! |-----------------------| + ! | Fcv | F^a | Rvv | + ! + ! C: Core, O: Open, V: Virtual + ! + ! Rcc = Acc Fcc^a + Bcc Fcc^b + ! Roo = Aoo Foo^a + Boo Foo^b + ! Rvv = Avv Fvv^a + Bvv Fvv^b + ! Fcv = (F^a + F^b)/2 + ! + ! F^a: Fock matrix alpha (MO), F^b: Fock matrix beta (MO) + ! A,B: Coupling parameters + ! + ! J. Chem. Phys. 133, 141102 (2010), https://doi.org/10.1063/1.3503173 + ! Coupling parameters from J. Chem. Phys. 125, 204110 (2006); https://doi.org/10.1063/1.2393223. + ! cc oo vv + ! A -0.5 0.5 1.5 + ! B 1.5 0.5 -0.5 + ! + END_DOC + integer :: i,j,n + if (elec_alpha_num == elec_beta_num) then + Fock_matrix_mo = Fock_matrix_mo_alpha + else + ! Core + do j = 1, elec_beta_num + ! Core + do i = 1, elec_beta_num + fock_matrix_mo(i,j) = - 0.5d0 * fock_matrix_mo_alpha(i,j) & + + 1.5d0 * fock_matrix_mo_beta(i,j) + enddo + ! Open + do i = elec_beta_num+1, elec_alpha_num + fock_matrix_mo(i,j) = fock_matrix_mo_beta(i,j) + enddo + ! Virtual + do i = elec_alpha_num+1, mo_num + fock_matrix_mo(i,j) = 0.5d0 * fock_matrix_mo_alpha(i,j) & + + 0.5d0 * fock_matrix_mo_beta(i,j) + enddo + enddo + ! Open + do j = elec_beta_num+1, elec_alpha_num + ! Core + do i = 1, elec_beta_num + fock_matrix_mo(i,j) = fock_matrix_mo_beta(i,j) + enddo + ! Open + do i = elec_beta_num+1, elec_alpha_num + fock_matrix_mo(i,j) = 0.5d0 * fock_matrix_mo_alpha(i,j) & + + 0.5d0 * fock_matrix_mo_beta(i,j) + enddo + ! Virtual + do i = elec_alpha_num+1, mo_num + fock_matrix_mo(i,j) = fock_matrix_mo_alpha(i,j) + enddo + enddo + ! Virtual + do j = elec_alpha_num+1, mo_num + ! Core + do i = 1, elec_beta_num + fock_matrix_mo(i,j) = 0.5d0 * fock_matrix_mo_alpha(i,j) & + + 0.5d0 * fock_matrix_mo_beta(i,j) + enddo + ! Open + do i = elec_beta_num+1, elec_alpha_num + fock_matrix_mo(i,j) = fock_matrix_mo_alpha(i,j) + enddo + ! Virtual + do i = elec_alpha_num+1, mo_num + fock_matrix_mo(i,j) = 1.5d0 * fock_matrix_mo_alpha(i,j) & + - 0.5d0 * fock_matrix_mo_beta(i,j) + enddo + enddo + endif + + ! Old + ! BEGIN_DOC + ! Fock matrix on the MO basis. + ! For open shells, the ROHF Fock Matrix is :: + ! ! | F-K | F + K/2 | F | ! |---------------------------------| ! | F + K/2 | F | F - K/2 | @@ -16,64 +100,64 @@ ! ! K = Fb - Fa ! - END_DOC - integer :: i,j,n - if (elec_alpha_num == elec_beta_num) then - Fock_matrix_mo = Fock_matrix_mo_alpha - else + ! END_DOC + !integer :: i,j,n + !if (elec_alpha_num == elec_beta_num) then + ! Fock_matrix_mo = Fock_matrix_mo_alpha + !else - do j=1,elec_beta_num - ! F-K - do i=1,elec_beta_num !CC - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& - - (Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) - enddo - ! F+K/2 - do i=elec_beta_num+1,elec_alpha_num !CA - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& - + 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) - enddo - ! F - do i=elec_alpha_num+1, mo_num !CV - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) - enddo - enddo + ! do j=1,elec_beta_num + ! ! F-K + ! do i=1,elec_beta_num !CC + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& + ! - (Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + ! enddo + ! ! F+K/2 + ! do i=elec_beta_num+1,elec_alpha_num !CA + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& + ! + 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + ! enddo + ! ! F + ! do i=elec_alpha_num+1, mo_num !CV + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) + ! enddo + ! enddo - do j=elec_beta_num+1,elec_alpha_num - ! F+K/2 - do i=1,elec_beta_num !AC - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& - + 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) - enddo - ! F - do i=elec_beta_num+1,elec_alpha_num !AA - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) - enddo - ! F-K/2 - do i=elec_alpha_num+1, mo_num !AV - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& - - 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) - enddo - enddo + ! do j=elec_beta_num+1,elec_alpha_num + ! ! F+K/2 + ! do i=1,elec_beta_num !AC + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& + ! + 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + ! enddo + ! ! F + ! do i=elec_beta_num+1,elec_alpha_num !AA + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) + ! enddo + ! ! F-K/2 + ! do i=elec_alpha_num+1, mo_num !AV + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& + ! - 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + ! enddo + ! enddo - do j=elec_alpha_num+1, mo_num - ! F - do i=1,elec_beta_num !VC - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) - enddo - ! F-K/2 - do i=elec_beta_num+1,elec_alpha_num !VA - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& - - 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) - enddo - ! F+K - do i=elec_alpha_num+1,mo_num !VV - Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) & - + (Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) - enddo - enddo + ! do j=elec_alpha_num+1, mo_num + ! ! F + ! do i=1,elec_beta_num !VC + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) + ! enddo + ! ! F-K/2 + ! do i=elec_beta_num+1,elec_alpha_num !VA + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))& + ! - 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + ! enddo + ! ! F+K + ! do i=elec_alpha_num+1,mo_num !VV + ! Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) & + ! + (Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j)) + ! enddo + ! enddo - endif + !endif do i = 1, mo_num Fock_matrix_diag_mo(i) = Fock_matrix_mo(i,i) @@ -115,8 +199,6 @@ END_PROVIDER - - BEGIN_PROVIDER [ double precision, Fock_matrix_mo_alpha, (mo_num,mo_num) ] implicit none BEGIN_DOC From 374a88bc624396370660182f6da3d876934b35b9 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Thu, 8 Jun 2023 15:51:52 +0200 Subject: [PATCH 162/337] normal ordering with DGEMM: OK --- src/tc_bi_ortho/normal_ordered.irp.f | 1230 ++++++++--------- .../normal_ordered_contractions.irp.f | 1062 ++++++++++++++ 2 files changed, 1615 insertions(+), 677 deletions(-) create mode 100644 src/tc_bi_ortho/normal_ordered_contractions.irp.f diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index fea229c9..7259c270 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -11,16 +11,15 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ implicit none - integer :: i, h1, p1, h2, p2 + integer :: i, ii, h1, p1, h2, p2, ipoint integer :: hh1, hh2, pp1, pp2 integer :: Ne(2) - double precision :: hthree_aaa, hthree_aab - double precision :: wall0, wall1 + double precision :: wall0, wall1, walli, wallf integer, allocatable :: occ(:,:) integer(bit_kind), allocatable :: key_i_core(:,:) print*,' Providing normal_two_body_bi_orth ...' - call wall_time(wall0) + call wall_time(walli) if(read_tc_norm_ord) then @@ -30,6 +29,11 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ else + double precision, allocatable :: tmp_2d(:,:), tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) + double precision, allocatable :: tmp(:,:,:,:) + PROVIDE N_int allocate( occ(N_int*bit_kind_size,2) ) @@ -45,224 +49,33 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) endif - PROVIDE no_aba_contraction - PROVIDE no_aab_contraction - PROVIDE no_aaa_contraction + allocate(tmp(mo_num,mo_num,mo_num,mo_num)) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aab, hthree_aaa) & - !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth, & - !$OMP no_aba_contraction, no_aab_contraction, no_aaa_contraction) - !$OMP DO SCHEDULE (static) - do hh1 = 1, n_act_orb - h1 = list_act(hh1) + ! --- + ! aba contraction - do pp1 = 1, n_act_orb - p1 = list_act(pp1) + print*,' Providing aba_contraction ...' + call wall_time(wall0) - do hh2 = 1, n_act_orb - h2 = list_act(hh2) + tmp = 0.d0 - do pp2 = 1, n_act_orb - p2 = list_act(pp2) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmp_2d(mo_num,mo_num)) - normal_two_body_bi_orth(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + no_aab_contraction(p2,h2,p1,h1) + no_aaa_contraction(p2,h2,p1,h1) - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - deallocate( occ ) - deallocate( key_i_core ) - endif - - if(write_tc_norm_ord.and.mpi_master) then - open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="write") - call ezfio_set_work_empty(.False.) - write(11) normal_two_body_bi_orth - close(11) - call ezfio_set_tc_keywords_io_tc_integ('Read') - endif - - call wall_time(wall1) - print*,' Wall time for normal_two_body_bi_orth ', wall1-wall0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_num)] - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer :: i, ii, h1, p1, h2, p2, ipoint - integer :: Ne(2) - double precision :: wall0, wall1 - integer, allocatable :: occ(:,:) - integer(bit_kind), allocatable :: key_i_core(:,:) - double precision, allocatable :: tmp_3d(:,:,:) - double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) - double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:) - double precision, allocatable :: tmp_2d(:,:) - - print*,' Providing no_aba_contraction ...' - call wall_time(wall0) - - PROVIDE N_int - - allocate(occ(N_int*bit_kind_size,2)) - allocate(key_i_core(N_int,2)) - - if(core_tc_op) then - do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) - enddo - call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) - else - call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) - endif - - allocate(tmp_3d(mo_num,mo_num,mo_num)) - allocate(tmp1(n_points_final_grid,3,mo_num)) - allocate(tmp2(n_points_final_grid,mo_num)) - allocate(tmpval_1(n_points_final_grid)) - allocate(tmpval_2(n_points_final_grid)) - allocate(tmpvec_1(n_points_final_grid,3)) - allocate(tmpvec_2(n_points_final_grid,3)) - allocate(tmp_2d(mo_num,mo_num)) - - - ! purely closed shell part - do ii = 1, Ne(2) - i = occ(ii,2) - - ! to avoid tmp(N^4) - do h1 = 1, mo_num - - ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) - !$OMP DO - do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p1, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & - !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) - !$OMP DO - do p1 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(p1,h2,p2) - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - enddo - enddo - enddo - !$OMP END PARALLEL DO + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) ! to avoid tmp(N^4) - do p1 = 1, mo_num - - ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1) - !$OMP DO - do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & - - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & - - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & - - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, & - !$OMP mos_r_in_r_array_transp, & - !$OMP tmpval_1, tmp2) - !$OMP DO - do h2 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(h2,p2) - do h2 = 1, mo_num - do p2 = 1, mo_num - no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo - !$OMP END PARALLEL DO - - enddo ! p1 - enddo ! h1 - enddo ! i - - - ! purely open-shell part - if(Ne(2) < Ne(1)) then - do ii = Ne(2) + 1, Ne(1) - i = occ(ii,1) - do h1 = 1, mo_num + ! to minimize the number of operations !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (ipoint) & @@ -304,29 +117,30 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) !$OMP PARALLEL DO PRIVATE(p1,h2,p2) do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) enddo enddo enddo !$OMP END PARALLEL DO + ! to avoid tmp(N^4) do p1 = 1, mo_num ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & !$OMP tmpval_1) !$OMP DO do ipoint = 1, n_points_final_grid @@ -355,313 +169,171 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & , 0.d0, tmp_2d(1,1), mo_num) !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO enddo ! p1 enddo ! h1 - enddo !i - endif + enddo ! i - deallocate(tmp_2d, tmp_3d) - deallocate(tmp1, tmp2) - deallocate(tmpval_1, tmpval_2) - deallocate(tmpvec_1, tmpvec_2) + ! purely open-shell part + if(Ne(2) < Ne(1)) then + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) - no_aba_contraction = -0.5d0 * no_aba_contraction - call sum_A_At(no_aba_contraction(1,1,1,1), mo_num*mo_num) + do h1 = 1, mo_num - call wall_time(wall1) - print*,' Wall time for no_aba_contraction', wall1-wall0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_num)] - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - integer :: i, ii, h1, p1, h2, p2, ipoint - integer :: Ne(2) - double precision :: wall0, wall1 - integer, allocatable :: occ(:,:) - integer(bit_kind), allocatable :: key_i_core(:,:) - double precision, allocatable :: tmp_3d(:,:,:) - double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) - double precision, allocatable :: tmpval_1(:), tmpvec_1(:,:) - double precision, allocatable :: tmp_2d(:,:) - - print*,' Providing no_aab_contraction ...' - call wall_time(wall0) - - PROVIDE N_int - - allocate(occ(N_int*bit_kind_size,2)) - allocate(key_i_core(N_int,2)) - - if(core_tc_op) then - do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) - enddo - call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) - else - call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) - endif - - allocate(tmp_2d(mo_num,mo_num)) - allocate(tmp_3d(mo_num,mo_num,mo_num)) - allocate(tmp1(n_points_final_grid,3,mo_num)) - allocate(tmp2(n_points_final_grid,mo_num)) - allocate(tmpval_1(n_points_final_grid)) - allocate(tmpvec_1(n_points_final_grid,3)) - - - ! purely closed shell part - do ii = 1, Ne(2) - i = occ(ii,2) - - ! to avoid tmp(N^4) - do h1 = 1, mo_num - - ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1, tmpvec_1) - !$OMP DO - do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p1, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & - !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & - !$OMP tmpval_1, tmpvec_1, tmp1) - !$OMP DO - do p1 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(p1,h2,p2) - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - enddo - enddo - enddo - !$OMP END PARALLEL DO - - ! to avoid tmp(N^4) - do p1 = 1, mo_num - - ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1) - !$OMP DO - do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, & - !$OMP mos_r_in_r_array_transp, & - !$OMP tmpval_1, tmp2) - !$OMP DO - do h2 = 1, mo_num + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) + !$OMP DO do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) enddo - enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(h2,p2) - do h2 = 1, mo_num - do p2 = 1, mo_num - no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo enddo - enddo - !$OMP END PARALLEL DO + !$OMP END DO + !$OMP END PARALLEL - enddo ! p1 - enddo ! h1 - enddo ! i + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) - deallocate(tmp_3d) - deallocate(tmp1, tmp2) - deallocate(tmpval_1) - deallocate(tmpvec_1) + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO - no_aab_contraction = -0.5d0 * no_aab_contraction + do p1 = 1, mo_num - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h1, h2, p1, p2) & - !$OMP SHARED (no_aab_contraction, mo_num) + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL - !$OMP DO - do h1 = 1, mo_num - do h2 = 1, mo_num - do p1 = 1, mo_num - do p2 = p1, mo_num - no_aab_contraction(p2,h2,p1,h1) -= no_aab_contraction(p1,h2,p2,h1) - enddo - enddo - enddo - enddo - !$OMP END DO + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL - !$OMP DO - do h1 = 1, mo_num - do h2 = 1, mo_num - do p1 = 2, mo_num - do p2 = 1, p1-1 - no_aab_contraction(p2,h2,p1,h1) = -no_aab_contraction(p1,h2,p2,h1) - enddo - enddo - enddo - enddo - !$OMP END DO + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) - !$OMP DO - do h1 = 1, mo_num-1 - do h2 = h1+1, mo_num - do p1 = 2, mo_num - do p2 = 1, p1-1 - no_aab_contraction(p2,h2,p1,h1) *= -1.d0 - enddo - enddo - enddo - enddo - !$OMP END PARALLEL + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO - call wall_time(wall1) - print*,' Wall time for no_aab_contraction', wall1-wall0 + enddo ! p1 + enddo ! h1 + enddo !i + endif -END_PROVIDER + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmpval_1) + deallocate(tmpval_2) + deallocate(tmpvec_1) + deallocate(tmpvec_2) + deallocate(tmp_2d) -! --- + tmp = -0.5d0 * tmp + call sum_A_At(tmp(1,1,1,1), mo_num*mo_num) -BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_num)] + call wall_time(wall1) + print*,' Wall time for aba_contraction', wall1-wall0 - BEGIN_DOC - ! - ! if: - ! h1 < h2 - ! p1 > p2 - ! - ! no_aaa_contraction(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h1,p1,h2) + Ibeta(p2,h1,p1,h2)] - ! = -0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] - ! - ! else: - ! - ! no_aaa_contraction(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] - ! - ! - ! I(p2,h2,p1,h1) = J(p2,h2,p1,h1) - J(p1,h2,p2,h1) - ! J(p2,h2,p1,h1) = \sum_i [ < i p2 p1 | i h2 h1 > - ! + < p2 p1 i | i h2 h1 > - ! + < p1 i p2 | i h2 h1 > ] - ! - ! - END_DOC + normal_two_body_bi_orth = tmp - use bitmasks ! you need to include the bitmasks_module.f90 features + ! --- + ! aab contraction - implicit none - integer :: i, ii, h1, p1, h2, p2, ipoint - integer :: Ne(2) - double precision :: wall0, wall1 - integer, allocatable :: occ(:,:) - integer(bit_kind), allocatable :: key_i_core(:,:) - double precision, allocatable :: tmp_2d(:,:), tmp_3d(:,:,:) - double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) - double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) + print*,' Providing aab_contraction ...' + call wall_time(wall0) - print*,' Providing no_aaa_contraction ...' - call wall_time(wall0) - - PROVIDE N_int - - allocate(occ(N_int*bit_kind_size,2)) - allocate(key_i_core(N_int,2)) - - if(core_tc_op) then - do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) - enddo - call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) - else - call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) - endif - - if(Ne(2) .lt. 3) then - - no_aaa_contraction = 0.d0 - - else + tmp = 0.d0 allocate(tmp_2d(mo_num,mo_num)) allocate(tmp_3d(mo_num,mo_num,mo_num)) allocate(tmp1(n_points_final_grid,3,mo_num)) allocate(tmp2(n_points_final_grid,mo_num)) - allocate(tmp3(n_points_final_grid,3,mo_num)) allocate(tmpval_1(n_points_final_grid)) - allocate(tmpval_2(n_points_final_grid)) allocate(tmpvec_1(n_points_final_grid,3)) - allocate(tmpvec_2(n_points_final_grid,3)) - allocate(tmpvec_3(n_points_final_grid,3)) ! purely closed shell part do ii = 1, Ne(2) @@ -677,21 +349,13 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP SHARED (n_points_final_grid, i, h1, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) + !$OMP tmpval_1, tmpvec_1) !$OMP DO do ipoint = 1, n_points_final_grid - - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) - + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) - - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) enddo !$OMP END DO !$OMP END PARALLEL @@ -722,39 +386,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - enddo - enddo - enddo - !$OMP END PARALLEL DO - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & - !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & - !$OMP tmpval_2, tmpvec_2, tmp1) - !$OMP DO - do p2 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) - tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) - tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp1(1,1,1), 3*n_points_final_grid & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(p1,h2,p2) - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) enddo enddo enddo @@ -763,58 +395,32 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ ! to avoid tmp(N^4) do p1 = 1, mo_num - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) !$OMP DO do ipoint = 1, n_points_final_grid - - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & - ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) - - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) - - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - - tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) - tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) - tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) enddo !$OMP END DO !$OMP END PARALLEL - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, i, & - !$OMP mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, & - !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) !$OMP DO do h2 = 1, mo_num do ipoint = 1, n_points_final_grid - - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & - + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & - + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) - - tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) - tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) - tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) - + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) enddo enddo !$OMP END DO @@ -828,47 +434,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo - !$OMP END PARALLEL DO - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & - !$OMP int2_grad1_u12_bimo_t, & - !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) - !$OMP DO - do p2 = 1, mo_num - do ipoint = 1, n_points_final_grid - - tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & - + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & - + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) - - tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) - tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) - tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , tmp2(1,1), n_points_final_grid & - , mos_r_in_r_array_transp(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) - - call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , tmp3(1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 1.d0, tmp_2d(1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(h2,p2) - do h2 = 1, mo_num - do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO @@ -877,14 +443,85 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ enddo ! h1 enddo ! i + deallocate(tmp_2d) + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmpval_1) + deallocate(tmpvec_1) + tmp = -0.5d0 * tmp - ! purely open-shell part - if(Ne(2) < Ne(1)) then + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (tmp, mo_num) - do ii = Ne(2) + 1, Ne(1) - i = occ(ii,1) + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + tmp(p2,h2,p1,h1) -= tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) = -tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END PARALLEL + + call wall_time(wall1) + print*,' Wall time for aab_contraction', wall1-wall0 + + normal_two_body_bi_orth += tmp + + ! --- + ! aaa contraction + + if(Ne(2) .ge. 3) then + + print*,' Providing aaa_contraction ...' + call wall_time(wall0) + + tmp = 0.d0 + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmpvec_3(n_points_final_grid,3)) + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) ! to avoid tmp(N^4) do h1 = 1, mo_num @@ -932,16 +569,16 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) !$OMP PARALLEL DO PRIVATE(p1,h2,p2) do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) enddo enddo enddo @@ -964,16 +601,16 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & - , tmp1(1,1,1), 3*n_points_final_grid & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & , 0.d0, tmp_3d(1,1,1), mo_num) !$OMP PARALLEL DO PRIVATE(p1,h2,p2) do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) enddo enddo enddo @@ -1039,15 +676,15 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & , 0.d0, tmp_2d(1,1), mo_num) !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO @@ -1074,82 +711,321 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & - , tmp2(1,1), n_points_final_grid & - , mos_r_in_r_array_transp(1,1), n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & , 0.d0, tmp_2d(1,1), mo_num) - call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & - , tmp3(1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & , 1.d0, tmp_2d(1,1), mo_num) !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO enddo ! p1 enddo ! h1 - enddo !i - endif + enddo ! i - deallocate(tmp_2d, tmp_3d) - deallocate(tmp1, tmp2, tmp3) - deallocate(tmpval_1, tmpval_2) - deallocate(tmpvec_1, tmpvec_2, tmpvec_3) + ! purely open-shell part + if(Ne(2) < Ne(1)) then - no_aaa_contraction = -0.5d0 * no_aaa_contraction + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h1, h2, p1, p2) & - !$OMP SHARED (no_aaa_contraction, mo_num) + ! to avoid tmp(N^4) + do h1 = 1, mo_num - !$OMP DO - do h1 = 1, mo_num - do h2 = 1, mo_num - do p1 = 1, mo_num - do p2 = p1, mo_num - no_aaa_contraction(p2,h2,p1,h1) -= no_aaa_contraction(p1,h2,p2,h1) + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_2, tmpvec_2, tmp1) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo !i + endif + + deallocate(tmp_2d) + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmp3) + deallocate(tmpval_1) + deallocate(tmpval_2) + deallocate(tmpvec_1) + deallocate(tmpvec_2) + deallocate(tmpvec_3) + + tmp = -0.5d0 * tmp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (tmp, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + tmp(p2,h2,p1,h1) -= tmp(p1,h2,p2,h1) + enddo enddo enddo enddo - enddo - !$OMP END DO + !$OMP END DO - !$OMP DO - do h1 = 1, mo_num - do h2 = 1, mo_num - do p1 = 2, mo_num - do p2 = 1, p1-1 - no_aaa_contraction(p2,h2,p1,h1) = -no_aaa_contraction(p1,h2,p2,h1) + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) = -tmp(p1,h2,p2,h1) + enddo enddo enddo enddo - enddo - !$OMP END DO + !$OMP END DO - !$OMP DO - do h1 = 1, mo_num-1 - do h2 = h1+1, mo_num - do p1 = 2, mo_num - do p2 = 1, p1-1 - no_aaa_contraction(p2,h2,p1,h1) *= -1.d0 + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) *= -1.d0 + enddo enddo enddo enddo - enddo - !$OMP END PARALLEL + !$OMP END PARALLEL + call wall_time(wallf) + print*,' Wall time for aaa_contraction', wall1-wall0 + + normal_two_body_bi_orth += tmp + endif ! Ne(2) .ge. 3 + + deallocate(tmp) + + endif ! read_tc_norm_ord + + if(write_tc_norm_ord.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="write") + call ezfio_set_work_empty(.False.) + write(11) normal_two_body_bi_orth + close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') endif - call wall_time(wall1) - print*,' Wall time for no_aaa_contraction', wall1-wall0 + call wall_time(wallf) + print*,' Wall time for normal_two_body_bi_orth ', wallf-walli -END_PROVIDER +END_PROVIDER ! --- + diff --git a/src/tc_bi_ortho/normal_ordered_contractions.irp.f b/src/tc_bi_ortho/normal_ordered_contractions.irp.f new file mode 100644 index 00000000..855cfd17 --- /dev/null +++ b/src/tc_bi_ortho/normal_ordered_contractions.irp.f @@ -0,0 +1,1062 @@ + +! --- + +BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_num)] + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:) + double precision, allocatable :: tmp_2d(:,:) + + print*,' Providing no_aba_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmp_2d(mo_num,mo_num)) + + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo ! i + + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + do h1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo !i + endif + + deallocate(tmp_2d, tmp_3d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2) + + no_aba_contraction = -0.5d0 * no_aba_contraction + call sum_A_At(no_aba_contraction(1,1,1,1), mo_num*mo_num) + + call wall_time(wall1) + print*,' Wall time for no_aba_contraction', wall1-wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_num)] + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) + double precision, allocatable :: tmpval_1(:), tmpvec_1(:,:) + double precision, allocatable :: tmp_2d(:,:) + + print*,' Providing no_aab_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpvec_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo ! i + + deallocate(tmp_3d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1) + deallocate(tmpvec_1) + + no_aab_contraction = -0.5d0 * no_aab_contraction + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (no_aab_contraction, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + no_aab_contraction(p2,h2,p1,h1) -= no_aab_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aab_contraction(p2,h2,p1,h1) = -no_aab_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aab_contraction(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END PARALLEL + + call wall_time(wall1) + print*,' Wall time for no_aab_contraction', wall1-wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_num)] + + BEGIN_DOC + ! + ! if: + ! h1 < h2 + ! p1 > p2 + ! + ! no_aaa_contraction(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h1,p1,h2) + Ibeta(p2,h1,p1,h2)] + ! = -0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] + ! + ! else: + ! + ! no_aaa_contraction(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] + ! + ! + ! I(p2,h2,p1,h1) = J(p2,h2,p1,h1) - J(p1,h2,p2,h1) + ! J(p2,h2,p1,h1) = \sum_i [ < i p2 p1 | i h2 h1 > + ! + < p2 p1 i | i h2 h1 > + ! + < p1 i p2 | i h2 h1 > ] + ! + ! + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_2d(:,:), tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) + + print*,' Providing no_aaa_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + if(Ne(2) .lt. 3) then + + no_aaa_contraction = 0.d0 + + else + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmpvec_3(n_points_final_grid,3)) + + ! purely closed shell part + do ii = 1, Ne(2) + i = occ(ii,2) + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_2, tmpvec_2, tmp1) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo ! i + + + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + + ! to avoid tmp(N^4) + do h1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpvec_1, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_2, tmpvec_2, tmp1) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + ! to avoid tmp(N^4) + do p1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) + !$OMP DO + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) + !$OMP DO + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo !i + endif + + deallocate(tmp_2d, tmp_3d) + deallocate(tmp1, tmp2, tmp3) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2, tmpvec_3) + + no_aaa_contraction = -0.5d0 * no_aaa_contraction + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (no_aaa_contraction, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + no_aaa_contraction(p2,h2,p1,h1) -= no_aaa_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aaa_contraction(p2,h2,p1,h1) = -no_aaa_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aaa_contraction(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END PARALLEL + + endif + + call wall_time(wall1) + print*,' Wall time for no_aaa_contraction', wall1-wall0 + +END_PROVIDER + +! --- From ee06ddf85e2b3fc83faa25515e80b262a2932aa7 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Thu, 8 Jun 2023 15:59:14 +0200 Subject: [PATCH 163/337] free two (3xN_gridxMOxMO) tables in TC-CIPSI --- src/fci_tc_bi/fci_tc_bi_ortho.irp.f | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f index f9bda058..3e6f229b 100644 --- a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f +++ b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f @@ -63,7 +63,9 @@ subroutine run_cipsi_tc call provide_all_three_ints_bi_ortho() endif endif - ! --- + + FREE int2_grad1_u12_bimo_transp int2_grad1_u12_ao_transp + write(json_unit,json_array_open_fmt) 'fci_tc' if (do_pt2) then @@ -78,13 +80,16 @@ subroutine run_cipsi_tc call json_close else + PROVIDE mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e pt2_min_parallel_tasks + if(elec_alpha_num+elec_beta_num.ge.3)then if(three_body_h_tc)then call provide_all_three_ints_bi_ortho endif endif - ! --- + + FREE int2_grad1_u12_bimo_transp int2_grad1_u12_ao_transp call run_slave_cipsi From 22e1dcd1c4a5cdce7159d926443968b83dfb271c Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Fri, 9 Jun 2023 21:32:13 +0200 Subject: [PATCH 164/337] 4-idx tensors: DGEMM with tmp(N3) added --- src/bi_ort_ints/bi_ort_ints.irp.f | 192 +++++--- src/bi_ort_ints/three_body_ijmk.irp.f | 564 +++++++---------------- src/bi_ort_ints/three_body_ijmk_n4.irp.f | 484 +++++++++++++++++++ 3 files changed, 771 insertions(+), 469 deletions(-) create mode 100644 src/bi_ort_ints/three_body_ijmk_n4.irp.f diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index 75af8fb1..7f90c6f3 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -18,10 +18,11 @@ program bi_ort_ints ! call test_3e ! call test_5idx ! call test_5idx2 - !call test_4idx - call test_4idx2() - call test_5idx2 - call test_5idx + call test_4idx() + call test_4idx_n4() + !call test_4idx2() + !call test_5idx2 + !call test_5idx end subroutine test_5idx2 @@ -167,13 +168,138 @@ end ! --- +subroutine test_4idx_n4() + + implicit none + integer :: i, j, k, l + double precision :: accu, contrib, new, ref, thr + + thr = 1d-10 + + PROVIDE three_e_4_idx_direct_bi_ort_old + PROVIDE three_e_4_idx_direct_bi_ort_n4 + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_direct_bi_ort_n4 (l,k,j,i) + ref = three_e_4_idx_direct_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_direct_bi_ort_n4' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_direct_bi_ort_n4 = ', accu / dble(mo_num)**4 + + ! --- + + PROVIDE three_e_4_idx_exch13_bi_ort_old + PROVIDE three_e_4_idx_exch13_bi_ort_n4 + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_exch13_bi_ort_n4 (l,k,j,i) + ref = three_e_4_idx_exch13_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_exch13_bi_ort_n4' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_exch13_bi_ort_n4 = ', accu / dble(mo_num)**4 + + ! --- + + PROVIDE three_e_4_idx_cycle_1_bi_ort_old + PROVIDE three_e_4_idx_cycle_1_bi_ort_n4 + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_cycle_1_bi_ort_n4 (l,k,j,i) + ref = three_e_4_idx_cycle_1_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_cycle_1_bi_ort_n4' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_cycle_1_bi_ort_n4 = ', accu / dble(mo_num)**4 + + ! --- + + PROVIDE three_e_4_idx_exch23_bi_ort_old + PROVIDE three_e_4_idx_exch23_bi_ort_n4 + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = three_e_4_idx_exch23_bi_ort_n4 (l,k,j,i) + ref = three_e_4_idx_exch23_bi_ort_old(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem in three_e_4_idx_exch23_bi_ort_n4' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on three_e_4_idx_exch23_bi_ort_n4 = ', accu / dble(mo_num)**4 + + ! --- + + return +end + +! --- + subroutine test_4idx() implicit none integer :: i, j, k, l double precision :: accu, contrib, new, ref, thr - thr = 1d-5 + thr = 1d-10 PROVIDE three_e_4_idx_direct_bi_ort_old PROVIDE three_e_4_idx_direct_bi_ort @@ -231,34 +357,6 @@ subroutine test_4idx() ! --- -! PROVIDE three_e_4_idx_exch12_bi_ort_old -! PROVIDE three_e_4_idx_exch12_bi_ort -! -! accu = 0.d0 -! do i = 1, mo_num -! do j = 1, mo_num -! do k = 1, mo_num -! do l = 1, mo_num -! -! new = three_e_4_idx_exch12_bi_ort (l,k,j,i) -! ref = three_e_4_idx_exch12_bi_ort_old(l,k,j,i) -! contrib = dabs(new - ref) -! accu += contrib -! if(contrib .gt. thr) then -! print*, ' problem in three_e_4_idx_exch12_bi_ort' -! print*, l, k, j, i -! print*, ref, new, contrib -! stop -! endif -! -! enddo -! enddo -! enddo -! enddo -! print*, ' accu on three_e_4_idx_exch12_bi_ort = ', accu / dble(mo_num)**4 - - ! --- - PROVIDE three_e_4_idx_cycle_1_bi_ort_old PROVIDE three_e_4_idx_cycle_1_bi_ort @@ -287,34 +385,6 @@ subroutine test_4idx() ! --- -! PROVIDE three_e_4_idx_cycle_2_bi_ort_old -! PROVIDE three_e_4_idx_cycle_2_bi_ort -! -! accu = 0.d0 -! do i = 1, mo_num -! do j = 1, mo_num -! do k = 1, mo_num -! do l = 1, mo_num -! -! new = three_e_4_idx_cycle_2_bi_ort (l,k,j,i) -! ref = three_e_4_idx_cycle_2_bi_ort_old(l,k,j,i) -! contrib = dabs(new - ref) -! accu += contrib -! if(contrib .gt. thr) then -! print*, ' problem in three_e_4_idx_cycle_2_bi_ort' -! print*, l, k, j, i -! print*, ref, new, contrib -! stop -! endif -! -! enddo -! enddo -! enddo -! enddo -! print*, ' accu on three_e_4_idx_cycle_2_bi_ort = ', accu / dble(mo_num)**4 - - ! --- - PROVIDE three_e_4_idx_exch23_bi_ort_old PROVIDE three_e_4_idx_exch23_bi_ort diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f index ee7e88ef..0d466f9f 100644 --- a/src/bi_ort_ints/three_body_ijmk.irp.f +++ b/src/bi_ort_ints/three_body_ijmk.irp.f @@ -3,9 +3,8 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort , (mo_num, mo_num, mo_num, mo_num)] &BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort , (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort , (mo_num, mo_num, mo_num, mo_num)] &BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num)] -!&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort , (mo_num, mo_num, mo_num, mo_num)] -!&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC ! @@ -13,28 +12,25 @@ ! ! three_e_4_idx_direct_bi_ort (m,j,k,i) = < m j k | -L | m j i > ::: notice that i is the RIGHT MO and k is the LEFT MO ! three_e_4_idx_exch13_bi_ort (m,j,k,i) = < m j k | -L | i j m > ::: notice that i is the RIGHT MO and k is the LEFT MO - ! three_e_4_idx_exch12_bi_ort (m,j,k,i) = < m j k | -L | m i j > ::: notice that i is the RIGHT MO and k is the LEFT MO - ! = three_e_4_idx_exch13_bi_ort (j,m,k,i) + ! three_e_4_idx_exch23_bi_ort (m,j,k,i) = < m j k | -L | j m i > ::: notice that i is the RIGHT MO and k is the LEFT MO ! three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = < m j k | -L | j i m > ::: notice that i is the RIGHT MO and k is the LEFT MO - ! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = < m j k | -L | i m j > ::: notice that i is the RIGHT MO and k is the LEFT MO - ! = three_e_4_idx_cycle_1_bi_ort(j,m,k,i) ! ! notice the -1 sign: in this way three_e_4_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign ! ! three_e_4_idx_direct_bi_ort (m,j,k,i) : Lk Ri Imm Ijj + Lj Rj Imm Iki + Lm Rm Ijj Iki ! three_e_4_idx_exch13_bi_ort (m,j,k,i) : Lk Rm Imi Ijj + Lj Rj Imi Ikm + Lm Ri Ijj Ikm + ! three_e_4_idx_exch23_bi_ort (m,j,k,i) : Lk Ri Imj Ijm + Lj Rm Imj Iki + Lm Rj Ijm Iki ! three_e_4_idx_cycle_1_bi_ort(m,j,k,i) : Lk Rm Imj Iji + Lj Ri Imj Ikm + Lm Rj Iji Ikm ! END_DOC implicit none - integer :: ipoint, i, j, k, l, m + integer :: ipoint, i, j, k, m, n double precision :: wall1, wall0 - double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:,:,:), tmp3(:,:,:,:) - double precision, allocatable :: tmp_4d(:,:,:,:) - double precision, allocatable :: tmp4(:,:,:) - double precision, allocatable :: tmp5(:,:) - double precision, allocatable :: tmp_3d(:,:,:) + double precision :: tmp_loc_1, tmp_loc_2 + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:,:) + double precision, allocatable :: tmp_2d(:,:) + double precision, allocatable :: tmp_aux_1(:,:,:), tmp_aux_2(:,:) print *, ' Providing the three_e_4_idx_bi_ort ...' call wall_time(wall0) @@ -42,324 +38,188 @@ provide mos_r_in_r_array_transp mos_l_in_r_array_transp - allocate(tmp_4d(mo_num,mo_num,mo_num,mo_num)) - - allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) - allocate(tmp2(n_points_final_grid,3,mo_num,mo_num)) - allocate(tmp3(n_points_final_grid,3,mo_num,mo_num)) + ! to reduce the number of operations + allocate(tmp_aux_1(n_points_final_grid,4,mo_num)) + allocate(tmp_aux_2(n_points_final_grid,mo_num)) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, l, ipoint) & + !$OMP PRIVATE (n, ipoint) & !$OMP SHARED (mo_num, n_points_final_grid, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp1, tmp2, tmp3) - !$OMP DO COLLAPSE(2) - do i = 1, mo_num - do l = 1, mo_num - do ipoint = 1, n_points_final_grid - - tmp1(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint) - tmp1(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint) - tmp1(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint) - - tmp2(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_r_in_r_array_transp(ipoint,i) - tmp2(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_r_in_r_array_transp(ipoint,i) - tmp2(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_r_in_r_array_transp(ipoint,i) - - tmp3(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_r_in_r_array_transp(ipoint,l) - tmp3(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_r_in_r_array_transp(ipoint,l) - tmp3(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_r_in_r_array_transp(ipoint,l) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp1, 3*n_points_final_grid, tmp2, 3*n_points_final_grid & - , 0.d0, tmp_4d, mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(i,j,k,m) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - three_e_4_idx_direct_bi_ort(m,j,k,i) = -tmp_4d(m,k,j,i) - enddo - enddo - enddo - enddo - !$OMP END PARALLEL DO - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp3, 3*n_points_final_grid, tmp1, 3*n_points_final_grid & - , 0.d0, tmp_4d, mo_num*mo_num) - - deallocate(tmp1) - - !$OMP PARALLEL DO PRIVATE(i,j,k,m) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - three_e_4_idx_exch13_bi_ort(m,j,k,i) = -tmp_4d(m,i,j,k) - enddo - enddo - enddo - enddo - !$OMP END PARALLEL DO - - - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, l, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp1) - !$OMP DO COLLAPSE(2) - do i = 1, mo_num - do l = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) - tmp1(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) - tmp1(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp1, 3*n_points_final_grid, tmp2, 3*n_points_final_grid & - , 0.d0, tmp_4d, mo_num*mo_num) - - deallocate(tmp2) - - !$OMP PARALLEL DO PRIVATE(i,j,k,m) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - three_e_4_idx_exch13_bi_ort(m,j,k,i) = three_e_4_idx_exch13_bi_ort(m,j,k,i) - tmp_4d(m,k,j,i) - enddo - enddo - enddo - enddo - !$OMP END PARALLEL DO - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp1, 3*n_points_final_grid, tmp3, 3*n_points_final_grid & - , 0.d0, tmp_4d, mo_num*mo_num) - - deallocate(tmp3) - - !$OMP PARALLEL DO PRIVATE(i,j,k,m) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = -tmp_4d(m,k,j,i) - enddo - enddo - enddo - enddo - !$OMP END PARALLEL DO - - - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, l, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp1) - !$OMP DO COLLAPSE(2) - do i = 1, mo_num - do l = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmp1(ipoint,2,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmp1(ipoint,3,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp1, 3*n_points_final_grid, int2_grad1_u12_bimo_t, 3*n_points_final_grid & - , 0.d0, tmp_4d, mo_num*mo_num) - - deallocate(tmp1) - - !$OMP PARALLEL DO PRIVATE(i,j,k,m) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - three_e_4_idx_direct_bi_ort(m,j,k,i) = three_e_4_idx_direct_bi_ort(m,j,k,i) - tmp_4d(m,j,k,i) - tmp_4d(j,m,k,i) - enddo - enddo - enddo - enddo - !$OMP END PARALLEL DO - - deallocate(tmp_4d) - - - allocate(tmp_3d(mo_num,mo_num,mo_num)) - allocate(tmp5(n_points_final_grid,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP final_weight_at_r_vector, & - !$OMP tmp5) + !$OMP tmp_aux_1, tmp_aux_2) !$OMP DO - do i = 1, mo_num + do n = 1, mo_num do ipoint = 1, n_points_final_grid - tmp5(ipoint,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmp_aux_1(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,n,n) * final_weight_at_r_vector(ipoint) + tmp_aux_1(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,n,n) * final_weight_at_r_vector(ipoint) + tmp_aux_1(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,n,n) * final_weight_at_r_vector(ipoint) + tmp_aux_1(ipoint,4,n) = mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,n) * final_weight_at_r_vector(ipoint) + + tmp_aux_2(ipoint,n) = mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,n) enddo enddo !$OMP END DO !$OMP END PARALLEL + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,4,mo_num)) + allocate(tmp2(n_points_final_grid,4,mo_num)) - allocate(tmp4(n_points_final_grid,mo_num,mo_num)) - - do m = 1, mo_num - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, m, & - !$OMP int2_grad1_u12_bimo_t, & - !$OMP tmp4) - !$OMP DO COLLAPSE(2) + ! loops approach to break the O(N^4) scaling in memory + do k = 1, mo_num do i = 1, mo_num - do k = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp4(ipoint,k,i) = int2_grad1_u12_bimo_t(ipoint,1,k,m) * int2_grad1_u12_bimo_t(ipoint,1,m,i) & - + int2_grad1_u12_bimo_t(ipoint,2,k,m) * int2_grad1_u12_bimo_t(ipoint,2,m,i) & - + int2_grad1_u12_bimo_t(ipoint,3,k,m) * int2_grad1_u12_bimo_t(ipoint,3,m,i) - enddo + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (n, ipoint, tmp_loc_1, tmp_loc_2) & + !$OMP SHARED (mo_num, n_points_final_grid, i, k, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp_aux_2, tmp1) + !$OMP DO + do n = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp_loc_1 = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) + tmp_loc_2 = tmp_aux_2(ipoint,n) + + tmp1(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,1,k,i) * tmp_loc_2 + tmp1(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,2,k,i) * tmp_loc_2 + tmp1(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,3,k,i) * tmp_loc_2 + tmp1(ipoint,4,n) = int2_grad1_u12_bimo_t(ipoint,1,n,n) * int2_grad1_u12_bimo_t(ipoint,1,k,i) & + + int2_grad1_u12_bimo_t(ipoint,2,n,n) * int2_grad1_u12_bimo_t(ipoint,2,k,i) & + + int2_grad1_u12_bimo_t(ipoint,3,n,n) * int2_grad1_u12_bimo_t(ipoint,3,k,i) + enddo enddo !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num, mo_num*mo_num, n_points_final_grid, 1.d0 & - , tmp5, n_points_final_grid, tmp4, n_points_final_grid & - , 0.d0, tmp_3d, mo_num) + call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 & + , tmp_aux_1(1,1,1), 4*n_points_final_grid, tmp1(1,1,1), 4*n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) - !$OMP PARALLEL DO PRIVATE(i,j,k) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - three_e_4_idx_exch13_bi_ort(m,j,k,i) = three_e_4_idx_exch13_bi_ort(m,j,k,i) - tmp_3d(j,k,i) - enddo - enddo - enddo - !$OMP END PARALLEL DO - - - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (j, k, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, m, & - !$OMP mos_l_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp4) - !$OMP DO COLLAPSE(2) - do k = 1, mo_num - do j = 1, mo_num - do ipoint = 1, n_points_final_grid - - tmp4(ipoint,j,k) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) & - * ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,k,m) & - + int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,k,m) & - + int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,k,m) ) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, 1.d0 & - , tmp4, n_points_final_grid, mos_r_in_r_array_transp, n_points_final_grid & - , 0.d0, tmp_3d, mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(i,j,k) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = three_e_4_idx_cycle_1_bi_ort(m,j,k,i) - tmp_3d(j,k,i) - enddo - enddo - enddo - !$OMP END PARALLEL DO - - enddo - - deallocate(tmp5) - deallocate(tmp_3d) - - - - do i = 1, mo_num - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (m, j, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, i, & - !$OMP mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp4) - !$OMP DO COLLAPSE(2) + !$OMP PARALLEL DO PRIVATE(j,m) do j = 1, mo_num do m = 1, mo_num - do ipoint = 1, n_points_final_grid + three_e_4_idx_direct_bi_ort(m,j,k,i) = -tmp_2d(m,j) + enddo + enddo + !$OMP END PARALLEL DO - tmp4(ipoint,m,j) = final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,m) & - * ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) - enddo + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (n, ipoint, tmp_loc_1, tmp_loc_2) & + !$OMP SHARED (mo_num, n_points_final_grid, i, k, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1, tmp2) + !$OMP DO + do n = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp_loc_1 = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,n) + tmp_loc_2 = mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,i) + + tmp1(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,n,i) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,1,k,n) * tmp_loc_2 + tmp1(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,n,i) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,2,k,n) * tmp_loc_2 + tmp1(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,n,i) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,3,k,n) * tmp_loc_2 + tmp1(ipoint,4,n) = int2_grad1_u12_bimo_t(ipoint,1,n,i) * int2_grad1_u12_bimo_t(ipoint,1,k,n) & + + int2_grad1_u12_bimo_t(ipoint,2,n,i) * int2_grad1_u12_bimo_t(ipoint,2,k,n) & + + int2_grad1_u12_bimo_t(ipoint,3,n,i) * int2_grad1_u12_bimo_t(ipoint,3,k,n) + + tmp2(ipoint,1,n) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,n) + tmp2(ipoint,2,n) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,n) + tmp2(ipoint,3,n) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,n) + tmp2(ipoint,4,n) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,n) enddo enddo !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, -1.d0 & - , tmp4, n_points_final_grid, mos_l_in_r_array_transp, n_points_final_grid & - , 1.d0, three_e_4_idx_cycle_1_bi_ort(1,1,1,i), mo_num*mo_num) + call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 4*n_points_final_grid, tmp_aux_1(1,1,1), 4*n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) - enddo + !$OMP PARALLEL DO PRIVATE(j,m) + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch13_bi_ort(m,j,k,i) = -tmp_2d(m,j) + enddo + enddo + !$OMP END PARALLEL DO - deallocate(tmp4) + call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 4*n_points_final_grid, tmp2(1,1,1), 4*n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + !$OMP PARALLEL DO PRIVATE(j,m) + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_cycle_1_bi_ort(m,i,k,j) = -tmp_2d(m,j) + enddo + enddo + !$OMP END PARALLEL DO -! !$OMP PARALLEL DO PRIVATE(i,j,k,m) -! do i = 1, mo_num -! do k = 1, mo_num -! do j = 1, mo_num -! do m = 1, mo_num -! three_e_4_idx_exch12_bi_ort (m,j,k,i) = three_e_4_idx_exch13_bi_ort (j,m,k,i) -! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = three_e_4_idx_cycle_1_bi_ort(j,m,k,i) -! enddo -! enddo -! enddo -! enddo -! !$OMP END PARALLEL DO + enddo ! i + + do j = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (n, ipoint, tmp_loc_1, tmp_loc_2) & + !$OMP SHARED (mo_num, n_points_final_grid, j, k, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1, tmp2) + !$OMP DO + do n = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp_loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,n) + tmp_loc_2 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,j) + + tmp1(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,n,j) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,1,j,n) * tmp_loc_2 + tmp1(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,n,j) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,2,j,n) * tmp_loc_2 + tmp1(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,n,j) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,3,j,n) * tmp_loc_2 + tmp1(ipoint,4,n) = int2_grad1_u12_bimo_t(ipoint,1,n,j) * int2_grad1_u12_bimo_t(ipoint,1,j,n) & + + int2_grad1_u12_bimo_t(ipoint,2,n,j) * int2_grad1_u12_bimo_t(ipoint,2,j,n) & + + int2_grad1_u12_bimo_t(ipoint,3,n,j) * int2_grad1_u12_bimo_t(ipoint,3,j,n) + + tmp2(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,k,n) + tmp2(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,k,n) + tmp2(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,k,n) + tmp2(ipoint,4,n) = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,n) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 4*n_points_final_grid, tmp2(1,1,1), 4*n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(i,m) + do i = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch23_bi_ort(m,j,k,i) = -tmp_2d(m,i) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! j + enddo !k + + deallocate(tmp_2d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmp_aux_1) + deallocate(tmp_aux_2) call wall_time(wall1) @@ -370,115 +230,3 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort , (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! - ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs - ! - ! three_e_4_idx_exch23_bi_ort (m,j,k,i) = < m j k | -L | j m i > ::: notice that i is the RIGHT MO and k is the LEFT MO - ! - ! notice the -1 sign: in this way three_e_4_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign - ! - ! three_e_4_idx_exch23_bi_ort (m,j,k,i) : Lk Ri Imj Ijm + Lj Rm Imj Iki + Lm Rj Ijm Iki - ! - END_DOC - - implicit none - integer :: i, j, k, l, m, ipoint - double precision :: wall1, wall0 - double precision, allocatable :: tmp1(:,:,:,:), tmp_4d(:,:,:,:) - double precision, allocatable :: tmp5(:,:,:), tmp6(:,:,:) - - print *, ' Providing the three_e_4_idx_exch23_bi_ort ...' - call wall_time(wall0) - - provide mos_r_in_r_array_transp mos_l_in_r_array_transp - - - allocate(tmp5(n_points_final_grid,mo_num,mo_num)) - allocate(tmp6(n_points_final_grid,mo_num,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, l, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp5, tmp6) - !$OMP DO COLLAPSE(2) - do i = 1, mo_num - do l = 1, mo_num - do ipoint = 1, n_points_final_grid - - tmp5(ipoint,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * int2_grad1_u12_bimo_t(ipoint,1,i,l) & - + int2_grad1_u12_bimo_t(ipoint,2,l,i) * int2_grad1_u12_bimo_t(ipoint,2,i,l) & - + int2_grad1_u12_bimo_t(ipoint,3,l,i) * int2_grad1_u12_bimo_t(ipoint,3,i,l) - - tmp6(ipoint,l,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, n_points_final_grid, -1.d0 & - , tmp5, n_points_final_grid, tmp6, n_points_final_grid & - , 0.d0, three_e_4_idx_exch23_bi_ort, mo_num*mo_num) - - deallocate(tmp5) - deallocate(tmp6) - - - allocate(tmp_4d(mo_num,mo_num,mo_num,mo_num)) - allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, l, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp1) - !$OMP DO COLLAPSE(2) - do i = 1, mo_num - do l = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l) - tmp1(ipoint,2,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l) - tmp1(ipoint,3,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp1, 3*n_points_final_grid, int2_grad1_u12_bimo_t, 3*n_points_final_grid & - , 0.d0, tmp_4d, mo_num*mo_num) - - deallocate(tmp1) - - !$OMP PARALLEL DO PRIVATE(i,j,k,m) - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - do m = 1, mo_num - three_e_4_idx_exch23_bi_ort(m,j,k,i) = three_e_4_idx_exch23_bi_ort(m,j,k,i) - tmp_4d(m,j,k,i) - tmp_4d(j,m,k,i) - enddo - enddo - enddo - enddo - !$OMP END PARALLEL DO - - deallocate(tmp_4d) - - - call wall_time(wall1) - print *, ' wall time for three_e_4_idx_exch23_bi_ort', wall1 - wall0 - call print_memory_usage() - -END_PROVIDER - -! --- - diff --git a/src/bi_ort_ints/three_body_ijmk_n4.irp.f b/src/bi_ort_ints/three_body_ijmk_n4.irp.f new file mode 100644 index 00000000..157b70f4 --- /dev/null +++ b/src/bi_ort_ints/three_body_ijmk_n4.irp.f @@ -0,0 +1,484 @@ + +! --- + + BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort_n4 , (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort_n4 , (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort_n4, (mo_num, mo_num, mo_num, mo_num)] +!&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort , (mo_num, mo_num, mo_num, mo_num)] +!&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_direct_bi_ort_n4 (m,j,k,i) = < m j k | -L | m j i > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_exch13_bi_ort_n4 (m,j,k,i) = < m j k | -L | i j m > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_exch12_bi_ort (m,j,k,i) = < m j k | -L | m i j > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! = three_e_4_idx_exch13_bi_ort_n4 (j,m,k,i) + ! three_e_4_idx_cycle_1_bi_ort_n4(m,j,k,i) = < m j k | -L | j i m > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = < m j k | -L | i m j > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! = three_e_4_idx_cycle_1_bi_ort_n4(j,m,k,i) + ! + ! notice the -1 sign: in this way three_e_4_idx_direct_bi_ort_n4 can be directly used to compute Slater rules with a + sign + ! + ! three_e_4_idx_direct_bi_ort_n4 (m,j,k,i) : Lk Ri Imm Ijj + Lj Rj Imm Iki + Lm Rm Ijj Iki + ! three_e_4_idx_exch13_bi_ort_n4 (m,j,k,i) : Lk Rm Imi Ijj + Lj Rj Imi Ikm + Lm Ri Ijj Ikm + ! three_e_4_idx_cycle_1_bi_ort_n4(m,j,k,i) : Lk Rm Imj Iji + Lj Ri Imj Ikm + Lm Rj Iji Ikm + ! + END_DOC + + implicit none + integer :: ipoint, i, j, k, l, m + double precision :: wall1, wall0 + double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:,:,:), tmp3(:,:,:,:) + double precision, allocatable :: tmp_4d(:,:,:,:) + double precision, allocatable :: tmp4(:,:,:) + double precision, allocatable :: tmp5(:,:) + double precision, allocatable :: tmp_3d(:,:,:) + + print *, ' Providing the O(N^4) three_e_4_idx_bi_ort ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + + allocate(tmp_4d(mo_num,mo_num,mo_num,mo_num)) + + allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) + allocate(tmp2(n_points_final_grid,3,mo_num,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1, tmp2, tmp3) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp1(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint) + + tmp2(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_r_in_r_array_transp(ipoint,i) + tmp2(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_r_in_r_array_transp(ipoint,i) + tmp2(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_r_in_r_array_transp(ipoint,i) + + tmp3(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_r_in_r_array_transp(ipoint,l) + tmp3(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_r_in_r_array_transp(ipoint,l) + tmp3(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_r_in_r_array_transp(ipoint,l) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1,1), 3*n_points_final_grid, tmp2(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_direct_bi_ort_n4(m,j,k,i) = -tmp_4d(m,k,j,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3(1,1,1,1), 3*n_points_final_grid, tmp1(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num) + + deallocate(tmp1) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch13_bi_ort_n4(m,j,k,i) = -tmp_4d(m,i,j,k) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + tmp1(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1,1), 3*n_points_final_grid, tmp2(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num) + + deallocate(tmp2) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch13_bi_ort_n4(m,j,k,i) = three_e_4_idx_exch13_bi_ort_n4(m,j,k,i) - tmp_4d(m,k,j,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1,1), 3*n_points_final_grid, tmp3(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num) + + deallocate(tmp3) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_cycle_1_bi_ort_n4(m,j,k,i) = -tmp_4d(m,k,j,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp1(ipoint,2,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp1(ipoint,3,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1,1), 3*n_points_final_grid, int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num) + + deallocate(tmp1) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_direct_bi_ort_n4(m,j,k,i) = three_e_4_idx_direct_bi_ort(m,j,k,i) - tmp_4d(m,j,k,i) - tmp_4d(j,m,k,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(tmp_4d) + + + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp5(n_points_final_grid,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP final_weight_at_r_vector, & + !$OMP tmp5) + !$OMP DO + do i = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp5(ipoint,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + + allocate(tmp4(n_points_final_grid,mo_num,mo_num)) + + do m = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, k, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, m, & + !$OMP int2_grad1_u12_bimo_t, & + !$OMP tmp4) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp4(ipoint,k,i) = int2_grad1_u12_bimo_t(ipoint,1,k,m) * int2_grad1_u12_bimo_t(ipoint,1,m,i) & + + int2_grad1_u12_bimo_t(ipoint,2,k,m) * int2_grad1_u12_bimo_t(ipoint,2,m,i) & + + int2_grad1_u12_bimo_t(ipoint,3,k,m) * int2_grad1_u12_bimo_t(ipoint,3,m,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, n_points_final_grid, 1.d0 & + , tmp5(1,1), n_points_final_grid, tmp4(1,1,1), n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + three_e_4_idx_exch13_bi_ort_n4(m,j,k,i) = three_e_4_idx_exch13_bi_ort_n4(m,j,k,i) - tmp_3d(j,k,i) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, k, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, m, & + !$OMP mos_l_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp4) + !$OMP DO COLLAPSE(2) + do k = 1, mo_num + do j = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp4(ipoint,j,k) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) & + * ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,k,m) & + + int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,k,m) & + + int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,k,m) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp4, n_points_final_grid, mos_r_in_r_array_transp, n_points_final_grid & + , 0.d0, tmp_3d, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + three_e_4_idx_cycle_1_bi_ort_n4(m,j,k,i) = three_e_4_idx_cycle_1_bi_ort_n4(m,j,k,i) - tmp_3d(j,k,i) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + enddo + + deallocate(tmp5) + deallocate(tmp_3d) + + + + do i = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (m, j, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, i, & + !$OMP mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp4) + !$OMP DO COLLAPSE(2) + do j = 1, mo_num + do m = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp4(ipoint,m,j) = final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,m) & + * ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, -1.d0 & + , tmp4, n_points_final_grid, mos_l_in_r_array_transp, n_points_final_grid & + , 1.d0, three_e_4_idx_cycle_1_bi_ort_n4(1,1,1,i), mo_num*mo_num) + + enddo + + deallocate(tmp4) + + +! !$OMP PARALLEL DO PRIVATE(i,j,k,m) +! do i = 1, mo_num +! do k = 1, mo_num +! do j = 1, mo_num +! do m = 1, mo_num +! three_e_4_idx_exch12_bi_ort (m,j,k,i) = three_e_4_idx_exch13_bi_ort_n4 (j,m,k,i) +! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = three_e_4_idx_cycle_1_bi_ort_n4(j,m,k,i) +! enddo +! enddo +! enddo +! enddo +! !$OMP END PARALLEL DO + + + call wall_time(wall1) + print *, ' wall time for O(N^4) three_e_4_idx_bi_ort', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort_n4 , (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_4_idx_exch23_bi_ort_n4 (m,j,k,i) = < m j k | -L | j m i > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_4_idx_direct_bi_ort_n4 can be directly used to compute Slater rules with a + sign + ! + ! three_e_4_idx_exch23_bi_ort_n4 (m,j,k,i) : Lk Ri Imj Ijm + Lj Rm Imj Iki + Lm Rj Ijm Iki + ! + END_DOC + + implicit none + integer :: i, j, k, l, m, ipoint + double precision :: wall1, wall0 + double precision, allocatable :: tmp1(:,:,:,:), tmp_4d(:,:,:,:) + double precision, allocatable :: tmp5(:,:,:), tmp6(:,:,:) + + print *, ' Providing the O(N^4) three_e_4_idx_exch23_bi_ort_n4 ...' + call wall_time(wall0) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + + + allocate(tmp5(n_points_final_grid,mo_num,mo_num)) + allocate(tmp6(n_points_final_grid,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp5, tmp6) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp5(ipoint,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * int2_grad1_u12_bimo_t(ipoint,1,i,l) & + + int2_grad1_u12_bimo_t(ipoint,2,l,i) * int2_grad1_u12_bimo_t(ipoint,2,i,l) & + + int2_grad1_u12_bimo_t(ipoint,3,l,i) * int2_grad1_u12_bimo_t(ipoint,3,i,l) + + tmp6(ipoint,l,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, n_points_final_grid, -1.d0 & + , tmp5(1,1,1), n_points_final_grid, tmp6(1,1,1), n_points_final_grid & + , 0.d0, three_e_4_idx_exch23_bi_ort_n4(1,1,1,1), mo_num*mo_num) + + deallocate(tmp5) + deallocate(tmp6) + + + allocate(tmp_4d(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, l, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp1) + !$OMP DO COLLAPSE(2) + do i = 1, mo_num + do l = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l) + tmp1(ipoint,2,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l) + tmp1(ipoint,3,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1,1), 3*n_points_final_grid, int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num) + + deallocate(tmp1) + + !$OMP PARALLEL DO PRIVATE(i,j,k,m) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch23_bi_ort_n4(m,j,k,i) = three_e_4_idx_exch23_bi_ort_n4(m,j,k,i) - tmp_4d(m,j,k,i) - tmp_4d(j,m,k,i) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(tmp_4d) + + + call wall_time(wall1) + print *, ' wall time for O(N^4) three_e_4_idx_exch23_bi_ort_n4', wall1 - wall0 + call print_memory_usage() + +END_PROVIDER + +! --- + From ba65e672166d5f9f41cebdf28b05f26f3adfef61 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Fri, 9 Jun 2023 22:05:55 +0200 Subject: [PATCH 165/337] 4-idx tensors seems to be correct --- src/bi_ort_ints/three_body_ijmk.irp.f | 2 +- src/bi_ort_ints/three_body_ijmk_n4.irp.f | 38 +++++++++++++----------- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f index 0d466f9f..669861b7 100644 --- a/src/bi_ort_ints/three_body_ijmk.irp.f +++ b/src/bi_ort_ints/three_body_ijmk.irp.f @@ -194,7 +194,7 @@ tmp2(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,k,n) tmp2(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,k,n) tmp2(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,k,n) - tmp2(ipoint,4,n) = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,n) + tmp2(ipoint,4,n) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,n) enddo enddo !$OMP END DO diff --git a/src/bi_ort_ints/three_body_ijmk_n4.irp.f b/src/bi_ort_ints/three_body_ijmk_n4.irp.f index 157b70f4..e3faeff0 100644 --- a/src/bi_ort_ints/three_body_ijmk_n4.irp.f +++ b/src/bi_ort_ints/three_body_ijmk_n4.irp.f @@ -1,11 +1,11 @@ ! --- - BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort_n4 , (mo_num, mo_num, mo_num, mo_num)] -&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort_n4 , (mo_num, mo_num, mo_num, mo_num)] -&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort_n4, (mo_num, mo_num, mo_num, mo_num)] -!&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort , (mo_num, mo_num, mo_num, mo_num)] -!&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num)] + BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort_n4 , (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort_n4 , (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort_n4, (mo_num, mo_num, mo_num, mo_num)] +!&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort_n4, (mo_num, mo_num, mo_num, mo_num)] +!&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort_n4, (mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC ! @@ -13,11 +13,11 @@ ! ! three_e_4_idx_direct_bi_ort_n4 (m,j,k,i) = < m j k | -L | m j i > ::: notice that i is the RIGHT MO and k is the LEFT MO ! three_e_4_idx_exch13_bi_ort_n4 (m,j,k,i) = < m j k | -L | i j m > ::: notice that i is the RIGHT MO and k is the LEFT MO - ! three_e_4_idx_exch12_bi_ort (m,j,k,i) = < m j k | -L | m i j > ::: notice that i is the RIGHT MO and k is the LEFT MO - ! = three_e_4_idx_exch13_bi_ort_n4 (j,m,k,i) + ! three_e_4_idx_exch12_bi_ort_n4 (m,j,k,i) = < m j k | -L | m i j > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! = three_e_4_idx_exch13_bi_ort_n4 (j,m,k,i) ! three_e_4_idx_cycle_1_bi_ort_n4(m,j,k,i) = < m j k | -L | j i m > ::: notice that i is the RIGHT MO and k is the LEFT MO - ! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = < m j k | -L | i m j > ::: notice that i is the RIGHT MO and k is the LEFT MO - ! = three_e_4_idx_cycle_1_bi_ort_n4(j,m,k,i) + ! three_e_4_idx_cycle_2_bi_ort_n4(m,j,k,i) = < m j k | -L | i m j > ::: notice that i is the RIGHT MO and k is the LEFT MO + ! = three_e_4_idx_cycle_1_bi_ort_n4(j,m,k,i) ! ! notice the -1 sign: in this way three_e_4_idx_direct_bi_ort_n4 can be directly used to compute Slater rules with a + sign ! @@ -77,6 +77,7 @@ !$OMP END DO !$OMP END PARALLEL + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & , tmp1(1,1,1,1), 3*n_points_final_grid, tmp2(1,1,1,1), 3*n_points_final_grid & , 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num) @@ -97,7 +98,6 @@ , tmp3(1,1,1,1), 3*n_points_final_grid, tmp1(1,1,1,1), 3*n_points_final_grid & , 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num) - deallocate(tmp1) !$OMP PARALLEL DO PRIVATE(i,j,k,m) do i = 1, mo_num @@ -133,10 +133,12 @@ !$OMP END DO !$OMP END PARALLEL + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & , tmp1(1,1,1,1), 3*n_points_final_grid, tmp2(1,1,1,1), 3*n_points_final_grid & , 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num) + deallocate(tmp2) !$OMP PARALLEL DO PRIVATE(i,j,k,m) @@ -202,7 +204,7 @@ do k = 1, mo_num do j = 1, mo_num do m = 1, mo_num - three_e_4_idx_direct_bi_ort_n4(m,j,k,i) = three_e_4_idx_direct_bi_ort(m,j,k,i) - tmp_4d(m,j,k,i) - tmp_4d(j,m,k,i) + three_e_4_idx_direct_bi_ort_n4(m,j,k,i) = three_e_4_idx_direct_bi_ort_n4(m,j,k,i) - tmp_4d(m,j,k,i) - tmp_4d(j,m,k,i) enddo enddo enddo @@ -294,9 +296,9 @@ !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, 1.d0 & - , tmp4, n_points_final_grid, mos_r_in_r_array_transp, n_points_final_grid & - , 0.d0, tmp_3d, mo_num*mo_num) + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp4(1,1,1), n_points_final_grid, mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) !$OMP PARALLEL DO PRIVATE(i,j,k) do i = 1, mo_num @@ -339,8 +341,8 @@ !$OMP END DO !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, -1.d0 & - , tmp4, n_points_final_grid, mos_l_in_r_array_transp, n_points_final_grid & + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, -1.d0 & + , tmp4(1,1,1), n_points_final_grid, mos_l_in_r_array_transp(1,1), n_points_final_grid & , 1.d0, three_e_4_idx_cycle_1_bi_ort_n4(1,1,1,i), mo_num*mo_num) enddo @@ -353,8 +355,8 @@ ! do k = 1, mo_num ! do j = 1, mo_num ! do m = 1, mo_num -! three_e_4_idx_exch12_bi_ort (m,j,k,i) = three_e_4_idx_exch13_bi_ort_n4 (j,m,k,i) -! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = three_e_4_idx_cycle_1_bi_ort_n4(j,m,k,i) +! three_e_4_idx_exch12_bi_ort_n4 (m,j,k,i) = three_e_4_idx_exch13_bi_ort_n4 (j,m,k,i) +! three_e_4_idx_cycle_2_bi_ort_n4(m,j,k,i) = three_e_4_idx_cycle_1_bi_ort_n4(j,m,k,i) ! enddo ! enddo ! enddo From 6e31ca280d5a11db7b09c5fa04e2f36a7d11c39f Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 10 Jun 2023 10:42:32 +0200 Subject: [PATCH 166/337] // in Norm_Ord --- .../normal_ordered_contractions.irp.f | 289 +++++++++++++++++- 1 file changed, 280 insertions(+), 9 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered_contractions.irp.f b/src/tc_bi_ortho/normal_ordered_contractions.irp.f index 855cfd17..6f70516d 100644 --- a/src/tc_bi_ortho/normal_ordered_contractions.irp.f +++ b/src/tc_bi_ortho/normal_ordered_contractions.irp.f @@ -1,7 +1,7 @@ ! --- -BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_num)] +BEGIN_PROVIDER [ double precision, no_aba_contraction_v0, (mo_num,mo_num,mo_num,mo_num)] use bitmasks ! you need to include the bitmasks_module.f90 features @@ -16,7 +16,7 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:) double precision, allocatable :: tmp_2d(:,:) - print*,' Providing no_aba_contraction ...' + print*,' Providing no_aba_contraction_v0 ...' call wall_time(wall0) PROVIDE N_int @@ -102,7 +102,7 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + no_aba_contraction_v0(p2,h2,p1,h1) = no_aba_contraction_v0(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) enddo enddo enddo @@ -153,7 +153,7 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + no_aba_contraction_v0(p2,h2,p1,h1) = no_aba_contraction_v0(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO @@ -220,7 +220,7 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + no_aba_contraction_v0(p2,h2,p1,h1) = no_aba_contraction_v0(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) enddo enddo enddo @@ -270,7 +270,7 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + no_aba_contraction_v0(p2,h2,p1,h1) = no_aba_contraction_v0(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO @@ -285,11 +285,11 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ deallocate(tmpval_1, tmpval_2) deallocate(tmpvec_1, tmpvec_2) - no_aba_contraction = -0.5d0 * no_aba_contraction - call sum_A_At(no_aba_contraction(1,1,1,1), mo_num*mo_num) + no_aba_contraction_v0 = -0.5d0 * no_aba_contraction_v0 + call sum_A_At(no_aba_contraction_v0(1,1,1,1), mo_num*mo_num) call wall_time(wall1) - print*,' Wall time for no_aba_contraction', wall1-wall0 + print*,' Wall time for no_aba_contraction_v0', wall1-wall0 END_PROVIDER @@ -1060,3 +1060,274 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ END_PROVIDER ! --- + +BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_num)] + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:) + double precision, allocatable :: tmp_2d(:,:) + + print*,' Providing no_aba_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, h1, p1, h2, p2, i, ii, & + !$OMP tmp_3d, tmp_2d, tmp1, tmp2, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP no_aba_contraction) + + + allocate(tmp_3d(mo_num,mo_num,mo_num), tmp_2d(mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num), tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid), tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3), tmpvec_2(n_points_final_grid,3)) + + !$OMP DO + + do ii = 1, Ne(2) + i = occ(ii,2) + + do h1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + + do p1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + + enddo ! p1 + enddo ! h1 + enddo ! i + + !$OMP END DO + + deallocate(tmp_3d, tmp_2d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2) + + !$OMP END PARALLEL + + + allocate(tmp_3d(mo_num,mo_num,mo_num), tmp_2d(mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num), tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid), tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3), tmpvec_2(n_points_final_grid,3)) + + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + do h1 = 1, mo_num + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p1, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & + !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) + !$OMP DO + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(p1,h2,p2) + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + do p1 = 1, mo_num + + ! to minimize the number of operations + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint) & + !$OMP SHARED (n_points_final_grid, i, h1, p1, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmpval_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h2, ipoint) & + !$OMP SHARED (mo_num, n_points_final_grid, & + !$OMP mos_r_in_r_array_transp, & + !$OMP tmpval_1, tmp2) + !$OMP DO + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + !$OMP PARALLEL DO PRIVATE(h2,p2) + do h2 = 1, mo_num + do p2 = 1, mo_num + no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + !$OMP END PARALLEL DO + + enddo ! p1 + enddo ! h1 + enddo !i + endif + + deallocate(tmp_2d, tmp_3d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2) + + no_aba_contraction = -0.5d0 * no_aba_contraction + call sum_A_At(no_aba_contraction(1,1,1,1), mo_num*mo_num) + + call wall_time(wall1) + print*,' Wall time for no_aba_contraction', wall1-wall0 + +END_PROVIDER From d9921922fc00efd0146aa5669219c15bb0c408e9 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 10 Jun 2023 11:24:06 +0200 Subject: [PATCH 167/337] NO aba // ok --- .../normal_ordered_contractions.irp.f | 118 +++++++++--------- src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 44 ++++++- 2 files changed, 101 insertions(+), 61 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered_contractions.irp.f b/src/tc_bi_ortho/normal_ordered_contractions.irp.f index 6f70516d..980181e7 100644 --- a/src/tc_bi_ortho/normal_ordered_contractions.irp.f +++ b/src/tc_bi_ortho/normal_ordered_contractions.irp.f @@ -1104,12 +1104,20 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & !$OMP no_aba_contraction) - allocate(tmp_3d(mo_num,mo_num,mo_num), tmp_2d(mo_num,mo_num)) allocate(tmp1(n_points_final_grid,3,mo_num), tmp2(n_points_final_grid,mo_num)) allocate(tmpval_1(n_points_final_grid), tmpval_2(n_points_final_grid)) allocate(tmpvec_1(n_points_final_grid,3), tmpvec_2(n_points_final_grid,3)) + tmp_3d = 0.d0 + tmp_2d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmpval_1 = 0.d0 + tmpval_2 = 0.d0 + tmpvec_1 = 0.d0 + tmpvec_2 = 0.d0 + !$OMP DO do ii = 1, Ne(2) @@ -1147,7 +1155,9 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num + !$OMP CRITICAL no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + !$OMP END CRITICAL enddo enddo enddo @@ -1177,7 +1187,9 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ do h2 = 1, mo_num do p2 = 1, mo_num + !$OMP CRITICAL no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL enddo enddo @@ -1195,28 +1207,40 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP END PARALLEL - allocate(tmp_3d(mo_num,mo_num,mo_num), tmp_2d(mo_num,mo_num)) - allocate(tmp1(n_points_final_grid,3,mo_num), tmp2(n_points_final_grid,mo_num)) - allocate(tmpval_1(n_points_final_grid), tmpval_2(n_points_final_grid)) - allocate(tmpvec_1(n_points_final_grid,3), tmpvec_2(n_points_final_grid,3)) - - ! purely open-shell part if(Ne(2) < Ne(1)) then + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, h1, p1, h2, p2, i, ii, & + !$OMP tmp_3d, tmp_2d, tmp1, tmp2, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP no_aba_contraction) + + Allocate(tmp_3d(mo_num,mo_num,mo_num), tmp_2d(mo_num,mo_num)) + Allocate(tmp1(n_points_final_grid,3,mo_num), tmp2(n_points_final_grid,mo_num)) + Allocate(tmpval_1(n_points_final_grid), tmpval_2(n_points_final_grid)) + Allocate(tmpvec_1(n_points_final_grid,3), tmpvec_2(n_points_final_grid,3)) + + Tmp_3d = 0.d0 + Tmp_2d = 0.d0 + Tmp1 = 0.d0 + Tmp2 = 0.d0 + Tmpval_1 = 0.d0 + Tmpval_2 = 0.d0 + Tmpvec_1 = 0.d0 + Tmpvec_2 = 0.d0 + + !$OMP DO + do ii = Ne(2) + 1, Ne(1) i = occ(ii,1) do h1 = 1, mo_num - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) - !$OMP DO do ipoint = 1, n_points_final_grid tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) @@ -1227,16 +1251,7 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) enddo - !$OMP END DO - !$OMP END PARALLEL - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p1, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & - !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) - !$OMP DO do p1 = 1, mo_num do ipoint = 1, n_points_final_grid tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & @@ -1247,82 +1262,65 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) enddo enddo - !$OMP END DO - !$OMP END PARALLEL call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & , tmp1(1,1,1), 3*n_points_final_grid & , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) - !$OMP PARALLEL DO PRIVATE(p1,h2,p2) do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num + !$OMP CRITICAL no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + !$OMP END CRITICAL enddo enddo enddo - !$OMP END PARALLEL DO do p1 = 1, mo_num - ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1) - !$OMP DO do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & - - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & - - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & - - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) enddo - !$OMP END DO - !$OMP END PARALLEL - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, & - !$OMP mos_r_in_r_array_transp, & - !$OMP tmpval_1, tmp2) - !$OMP DO do h2 = 1, mo_num do ipoint = 1, n_points_final_grid tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) enddo enddo - !$OMP END DO - !$OMP END PARALLEL call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & , mos_l_in_r_array_transp(1,1), n_points_final_grid & , tmp2(1,1), n_points_final_grid & , 0.d0, tmp_2d(1,1), mo_num) - !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num + !$OMP CRITICAL no_aba_contraction(p2,h2,p1,h1) = no_aba_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL enddo enddo - !$OMP END PARALLEL DO enddo ! p1 enddo ! h1 enddo !i - endif + !$OMP END DO - deallocate(tmp_2d, tmp_3d) - deallocate(tmp1, tmp2) - deallocate(tmpval_1, tmpval_2) - deallocate(tmpvec_1, tmpvec_2) + deallocate(tmp_3d, tmp_2d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2) + + !$OMP END PARALLEL + endif no_aba_contraction = -0.5d0 * no_aba_contraction call sum_A_At(no_aba_contraction(1,1,1,1), mo_num*mo_num) diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index 33b5c5aa..a3cb1692 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -18,7 +18,8 @@ program tc_bi_ortho ! call timing_single ! call timing_double - call test_no() + !call test_no() + call test_no_aba() end subroutine test_h_u0 @@ -297,4 +298,45 @@ end ! --- +subroutine test_no_aba() + + implicit none + integer :: i, j, k, l + double precision :: accu, contrib, new, ref, thr + + print*, ' testing no_aba_contraction ...' + + thr = 1d-8 + + PROVIDE no_aba_contraction_v0 + PROVIDE no_aba_contraction + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = no_aba_contraction (l,k,j,i) + ref = no_aba_contraction_v0(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem on no_aba_contraction' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on no_aba_contraction = ', accu / dble(mo_num)**4 + + return +end + +! --- + From 92a72a096840c829d9ae5bb8ec0d683bc62ec0d9 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 10 Jun 2023 11:38:41 +0200 Subject: [PATCH 168/337] no aab // --- .../normal_ordered_contractions.irp.f | 210 +++++++++++++++++- src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 43 ++++ 2 files changed, 243 insertions(+), 10 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered_contractions.irp.f b/src/tc_bi_ortho/normal_ordered_contractions.irp.f index 980181e7..f066c958 100644 --- a/src/tc_bi_ortho/normal_ordered_contractions.irp.f +++ b/src/tc_bi_ortho/normal_ordered_contractions.irp.f @@ -295,7 +295,7 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_num)] +BEGIN_PROVIDER [ double precision, no_aab_contraction_v0, (mo_num,mo_num,mo_num,mo_num)] use bitmasks ! you need to include the bitmasks_module.f90 features @@ -310,7 +310,7 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ double precision, allocatable :: tmpval_1(:), tmpvec_1(:,:) double precision, allocatable :: tmp_2d(:,:) - print*,' Providing no_aab_contraction ...' + print*,' Providing no_aab_contraction_v0 ...' call wall_time(wall0) PROVIDE N_int @@ -387,7 +387,7 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + no_aab_contraction_v0(p2,h2,p1,h1) = no_aab_contraction_v0(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) enddo enddo enddo @@ -435,7 +435,7 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + no_aab_contraction_v0(p2,h2,p1,h1) = no_aab_contraction_v0(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO @@ -449,19 +449,19 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ deallocate(tmpval_1) deallocate(tmpvec_1) - no_aab_contraction = -0.5d0 * no_aab_contraction + no_aab_contraction_v0 = -0.5d0 * no_aab_contraction_v0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (h1, h2, p1, p2) & - !$OMP SHARED (no_aab_contraction, mo_num) + !$OMP SHARED (no_aab_contraction_v0, mo_num) !$OMP DO do h1 = 1, mo_num do h2 = 1, mo_num do p1 = 1, mo_num do p2 = p1, mo_num - no_aab_contraction(p2,h2,p1,h1) -= no_aab_contraction(p1,h2,p2,h1) + no_aab_contraction_v0(p2,h2,p1,h1) -= no_aab_contraction_v0(p1,h2,p2,h1) enddo enddo enddo @@ -473,7 +473,7 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ do h2 = 1, mo_num do p1 = 2, mo_num do p2 = 1, p1-1 - no_aab_contraction(p2,h2,p1,h1) = -no_aab_contraction(p1,h2,p2,h1) + no_aab_contraction_v0(p2,h2,p1,h1) = -no_aab_contraction_v0(p1,h2,p2,h1) enddo enddo enddo @@ -485,15 +485,16 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ do h2 = h1+1, mo_num do p1 = 2, mo_num do p2 = 1, p1-1 - no_aab_contraction(p2,h2,p1,h1) *= -1.d0 + no_aab_contraction_v0(p2,h2,p1,h1) *= -1.d0 enddo enddo enddo enddo + !$OMP END DO !$OMP END PARALLEL call wall_time(wall1) - print*,' Wall time for no_aab_contraction', wall1-wall0 + print*,' Wall time for no_aab_contraction_v0', wall1-wall0 END_PROVIDER @@ -1329,3 +1330,192 @@ BEGIN_PROVIDER [ double precision, no_aba_contraction, (mo_num,mo_num,mo_num,mo_ print*,' Wall time for no_aba_contraction', wall1-wall0 END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_num)] + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) + double precision, allocatable :: tmpval_1(:), tmpvec_1(:,:) + double precision, allocatable :: tmp_2d(:,:) + + print*,' Providing no_aab_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, ii, i, h1, p1, h2, p2, & + !$OMP tmp_2d, tmp_3d, tmp1, tmp2, & + !$OMP tmpval_1, tmpvec_1) & + !$OMP SHARED (n_points_final_grid, mo_num, Ne, occ, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP no_aab_contraction) + + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + + tmp_2d = 0.d0 + tmp_3d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmpval_1 = 0.d0 + tmpvec_1 = 0.d0 + + !$OMP DO + + do ii = 1, Ne(2) + i = occ(ii,2) + + do h1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + enddo + + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + enddo + + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aab_contraction(p2,h2,p1,h1) = no_aab_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + enddo ! p1 + enddo ! h1 + enddo ! i + + !$OMP END DO + + deallocate(tmp_3d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1) + deallocate(tmpvec_1) + + !$OMP END PARALLEL + + no_aab_contraction = -0.5d0 * no_aab_contraction + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (no_aab_contraction, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + no_aab_contraction(p2,h2,p1,h1) -= no_aab_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aab_contraction(p2,h2,p1,h1) = -no_aab_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aab_contraction(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print*,' Wall time for no_aab_contraction', wall1-wall0 + +END_PROVIDER + +! --- diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index a3cb1692..4f190407 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -20,6 +20,7 @@ program tc_bi_ortho !call test_no() call test_no_aba() + call test_no_aab() end subroutine test_h_u0 @@ -340,3 +341,45 @@ end ! --- +subroutine test_no_aab() + + implicit none + integer :: i, j, k, l + double precision :: accu, contrib, new, ref, thr + + print*, ' testing no_aab_contraction ...' + + thr = 1d-8 + + PROVIDE no_aab_contraction_v0 + PROVIDE no_aab_contraction + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = no_aab_contraction (l,k,j,i) + ref = no_aab_contraction_v0(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem on no_aab_contraction' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on no_aab_contraction = ', accu / dble(mo_num)**4 + + return +end + +! --- + + From caa11f20ea4a9aa812e6bc0c6dcd2faa3e0d485b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 10 Jun 2023 11:56:07 +0200 Subject: [PATCH 169/337] Fixed singles when no beta exc --- scripts/compilation/cache_compile.py | 2 +- src/cipsi/selection.irp.f | 425 ++++----------------------- src/cipsi/selection_old.irp.f | 350 ++++++++++++++++++++++ src/cipsi/selection_singles.irp.f | 356 ++++++++++++++++++++++ src/ezfio_files/00.create.bats | 28 ++ src/fci/40.fci.bats | 113 ++++--- src/hartree_fock/10.hf.bats | 29 ++ src/tools/print_wf.irp.f | 1 + tests/input/h2_1.xyz | 6 + tests/input/h2_3.xyz | 6 + tests/input/h3_2.xyz | 7 + tests/input/h3_4.xyz | 7 + tests/input/h4_1.xyz | 7 + tests/input/h4_3.xyz | 7 + tests/input/h4_5.xyz | 7 + 15 files changed, 947 insertions(+), 404 deletions(-) create mode 100644 src/cipsi/selection_old.irp.f create mode 100644 src/cipsi/selection_singles.irp.f create mode 100644 tests/input/h2_1.xyz create mode 100644 tests/input/h2_3.xyz create mode 100644 tests/input/h3_2.xyz create mode 100644 tests/input/h3_4.xyz create mode 100644 tests/input/h4_1.xyz create mode 100644 tests/input/h4_3.xyz create mode 100644 tests/input/h4_5.xyz diff --git a/scripts/compilation/cache_compile.py b/scripts/compilation/cache_compile.py index 440f6498..473976e7 100755 --- a/scripts/compilation/cache_compile.py +++ b/scripts/compilation/cache_compile.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """ Save the .o from a .f90 -and is the .o is asked a second time, retur it +and is the .o is asked a second time, return it Take in argv command like: ifort -g -openmp -I IRPF90_temp/Ezfio_files/ -c IRPF90_temp/Integrals_Monoelec/kin_ao_ints.irp.module.F90 -o IRPF90_temp/Integrals_Monoelec/kin_ao_ints.irp.module.o """ diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f index 0705d103..b8fa2895 100644 --- a/src/cipsi/selection.irp.f +++ b/src/cipsi/selection.irp.f @@ -88,6 +88,10 @@ subroutine select_connected(i_generator,E0,pt2_data,b,subset,csubset) particle_mask(k,1) = iand(generators_bitmask(k,1,s_part), not(psi_det_generators(k,1,i_generator)) ) particle_mask(k,2) = iand(generators_bitmask(k,2,s_part), not(psi_det_generators(k,2,i_generator)) ) enddo + if ((subset == 1).and.(sum(hole_mask(:,2)) == 0_bit_kind)) then + ! No beta electron to excite + call select_singles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2_data,b) + endif call select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2_data,b,subset,csubset) deallocate(fock_diag_tmp) end subroutine @@ -142,7 +146,7 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d use selection_types implicit none BEGIN_DOC -! WARNING /!\ : It is assumed that the generators and selectors are psi_det_sorted +! WARNING /!\ : It is assumed that the generators and selectors are psi_det_sorted END_DOC integer, intent(in) :: i_generator, subset, csubset @@ -237,7 +241,6 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d enddo ! Iterate on 0S alpha, and find betas TQ such that exc_degree <= 4 - ! Remove also contributions < 1.d-20) do j=1,N_det_alpha_unique call get_excitation_degree_spin(psi_det_alpha_unique(1,j), & psi_det_generators(1,1,i_generator), nt, N_int) @@ -480,7 +483,9 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d do s2=s1,2 sp = s1 - if(s1 /= s2) sp = 3 + if(s1 /= s2) then + sp = 3 + endif ib = 1 if(s1 == s2) ib = i1+1 @@ -528,7 +533,10 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d deallocate(preinteresting, prefullinteresting, interesting, fullinteresting) deallocate(banned, bannedOrb,mat) end subroutine -subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf) + +BEGIN_TEMPLATE + +subroutine fill_buffer_$DOUBLE(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf) use bitmasks use selection_types implicit none @@ -562,7 +570,20 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d s1 = sp s2 = sp end if - call apply_holes(psi_det_generators(1,1,i_generator), s1, h1, s2, h2, mask, ok, N_int) + + if ($IS_DOUBLE) then + if (h2 == 0) then + print *, 'h2=0 in '//trim(irp_here) + stop + endif + call apply_holes(psi_det_generators(1,1,i_generator), s1, h1, s2, h2, mask, ok, N_int) + else + if (h2 /= 0) then + print *, 'h2 /= in '//trim(irp_here) + stop + endif + call apply_hole(psi_det_generators(1,1,i_generator), s1, h1, mask, ok, N_int) + endif E_shift = 0.d0 if (h0_type == 'CFG') then @@ -570,12 +591,15 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d E_shift = psi_det_Hii(i_generator) - psi_configuration_Hii(j) endif - do p1=1,mo_num - if(bannedOrb(p1, s1)) cycle + $DO_p1 +! do p1=1,mo_num + + if (bannedOrb(p1, s1)) cycle ib = 1 if(sp /= 3) ib = p1+1 - do p2=ib,mo_num + $DO_p2 + ! do p2=ib,mo_num ! ----- ! /!\ Generating only single excited determinants doesn't work because a @@ -584,9 +608,10 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d ! detected as already generated when generating in the future with a ! double excitation. ! ----- - - if(bannedOrb(p2, s2)) cycle - if(banned(p1,p2)) cycle + if ($IS_DOUBLE) then + if(bannedOrb(p2, s2)) cycle + if(banned(p1,p2)) cycle + endif if(pseudo_sym)then if(dabs(mat(1, p1, p2)).lt.thresh_sym)then @@ -596,7 +621,11 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d val = maxval(abs(mat(1:N_states, p1, p2))) if( val == 0d0) cycle - call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) + if ($IS_DOUBLE) then + call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) + else + call apply_particle(mask, s1, p1, det, ok, N_int) + endif if (do_only_cas) then integer, external :: number_of_holes, number_of_particles @@ -797,7 +826,7 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d case(5) ! Variance selection if (h0_type == 'CFG') then - w = min(w, - alpha_h_psi * alpha_h_psi * s_weight(istate,istate)) & + w = min(w, - alpha_h_psi * alpha_h_psi * s_weight(istate,istate)) & / c0_weight(istate) else w = min(w, - alpha_h_psi * alpha_h_psi * s_weight(istate,istate)) @@ -857,10 +886,19 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d if(w <= buf%mini) then call add_to_selection_buffer(buf, det, w) end if - end do - end do + ! enddo + $ENDDO_p1 +! enddo + $ENDDO_p2 end +SUBST [ DOUBLE , DO_p1 , ENDDO_p1 , DO_p2 , ENDDO_p2 , IS_DOUBLE ] + +double ; do p1=1,mo_num ; enddo ; do p2=ib,mo_num ; enddo ; .True. ;; +single ; do p1=1,mo_num ; enddo ; p2=1 ; ; .False. ;; + +END_TEMPLATE + subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, interesting) use bitmasks implicit none @@ -882,6 +920,7 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere PROVIDE psi_selectors_coef_transp psi_det_sorted mat = 0d0 + p=0 do i=1,N_int negMask(i,1) = not(mask(i,1)) @@ -1435,7 +1474,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) p1 = p(1,sp) p2 = p(2,sp) do puti=1, mo_num - if(bannedOrb(puti, sp)) cycle + if (bannedOrb(puti, sp)) cycle call get_mo_two_e_integrals(puti,p2,p1,mo_num,hij_cache1,mo_integrals_map) call get_mo_two_e_integrals(puti,p1,p2,mo_num,hij_cache2,mo_integrals_map) do putj=puti+1, mo_num @@ -1446,7 +1485,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) call i_h_j(gen, det, N_int, hij) if (hij == 0.d0) cycle else - hij = (mo_two_e_integral(p1, p2, puti, putj) - mo_two_e_integral(p2, p1, puti, putj)) + hij = hij_cache1(putj) - hij_cache2(putj) if (hij == 0.d0) cycle hij = hij * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) end if @@ -1506,7 +1545,7 @@ subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting) use bitmasks implicit none BEGIN_DOC -! Identify the determinants in det which are in the internal space. These are +! Identify the determinants in det that are in the internal space. These are ! the determinants that can be produced by creating two particles on the mask. END_DOC @@ -1534,7 +1573,7 @@ subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting) if(iand(det(j,2,i), mask(j,2)) /= mask(j, 2)) cycle genl end do - ! If det(i) < det(i_gen), it hs already been considered + ! If det(i) < det(i_gen), it has already been considered if(interesting(i) < i_gen) then fullMatch = .true. return @@ -1585,352 +1624,4 @@ end -! OLD unoptimized routines for debugging -! ====================================== - -subroutine get_d0_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) - use bitmasks - implicit none - - integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) - integer(bit_kind), intent(in) :: phasemask(N_int,2) - logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) - integer(bit_kind) :: det(N_int, 2) - double precision, intent(in) :: coefs(N_states) - double precision, intent(inout) :: mat(N_states, mo_num, mo_num) - integer, intent(in) :: h(0:2,2), p(0:4,2), sp - - integer :: i, j, s, h1, h2, p1, p2, puti, putj - double precision :: hij, phase - double precision, external :: get_phase_bi, mo_two_e_integral - logical :: ok - - integer :: bant - bant = 1 - - - if(sp == 3) then ! AB - h1 = p(1,1) - h2 = p(1,2) - do p1=1, mo_num - if(bannedOrb(p1, 1)) cycle - do p2=1, mo_num - if(bannedOrb(p2,2)) cycle - if(banned(p1, p2, bant)) cycle ! rentable? - if(p1 == h1 .or. p2 == h2) then - call apply_particles(mask, 1,p1,2,p2, det, ok, N_int) - call i_h_j(gen, det, N_int, hij) - else - phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) - hij = mo_two_e_integral(p1, p2, h1, h2) * phase - end if - mat(:, p1, p2) = mat(:, p1, p2) + coefs(:) * hij - end do - end do - else ! AA BB - p1 = p(1,sp) - p2 = p(2,sp) - do puti=1, mo_num - if(bannedOrb(puti, sp)) cycle - do putj=puti+1, mo_num - if(bannedOrb(putj, sp)) cycle - if(banned(puti, putj, bant)) cycle ! rentable? - if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then - call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) - call i_h_j(gen, det, N_int, hij) - else - hij = (mo_two_e_integral(p1, p2, puti, putj) - mo_two_e_integral(p2, p1, puti, putj))* get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) - end if - mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij - end do - end do - end if -end - -subroutine get_d1_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) - use bitmasks - implicit none - - integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) - integer(bit_kind), intent(in) :: phasemask(N_int,2) - logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) - integer(bit_kind) :: det(N_int, 2) - double precision, intent(in) :: coefs(N_states) - double precision, intent(inout) :: mat(N_states, mo_num, mo_num) - integer, intent(in) :: h(0:2,2), p(0:4,2), sp - double precision :: hij, tmp_row(N_states, mo_num), tmp_row2(N_states, mo_num) - double precision, external :: get_phase_bi, mo_two_e_integral - logical :: ok - - logical, allocatable :: lbanned(:,:) - integer :: puti, putj, ma, mi, s1, s2, i, i1, i2, j - integer :: hfix, pfix, h1, h2, p1, p2, ib - - integer, parameter :: turn2(2) = (/2,1/) - integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) - - integer :: bant - - - allocate (lbanned(mo_num, 2)) - lbanned = bannedOrb - - do i=1, p(0,1) - lbanned(p(i,1), 1) = .true. - end do - do i=1, p(0,2) - lbanned(p(i,2), 2) = .true. - end do - - ma = 1 - if(p(0,2) >= 2) ma = 2 - mi = turn2(ma) - - bant = 1 - - if(sp == 3) then - !move MA - if(ma == 2) bant = 2 - puti = p(1,mi) - hfix = h(1,ma) - p1 = p(1,ma) - p2 = p(2,ma) - if(.not. bannedOrb(puti, mi)) then - tmp_row = 0d0 - do putj=1, hfix-1 - if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle - hij = (mo_two_e_integral(p1, p2, putj, hfix)-mo_two_e_integral(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) - tmp_row(1:N_states,putj) = tmp_row(1:N_states,putj) + hij * coefs(1:N_states) - end do - do putj=hfix+1, mo_num - if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle - hij = (mo_two_e_integral(p1, p2, hfix, putj)-mo_two_e_integral(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) - tmp_row(1:N_states,putj) = tmp_row(1:N_states,putj) + hij * coefs(1:N_states) - end do - - if(ma == 1) then - mat(1:N_states,1:mo_num,puti) = mat(1:N_states,1:mo_num,puti) + tmp_row(1:N_states,1:mo_num) - else - mat(1:N_states,puti,1:mo_num) = mat(1:N_states,puti,1:mo_num) + tmp_row(1:N_states,1:mo_num) - end if - end if - - !MOVE MI - pfix = p(1,mi) - tmp_row = 0d0 - tmp_row2 = 0d0 - do puti=1,mo_num - if(lbanned(puti,mi)) cycle - !p1 fixed - putj = p1 - if(.not. banned(putj,puti,bant)) then - hij = mo_two_e_integral(p2,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) - tmp_row(:,puti) = tmp_row(:,puti) + hij * coefs(:) - end if - - putj = p2 - if(.not. banned(putj,puti,bant)) then - hij = mo_two_e_integral(p1,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) - tmp_row2(:,puti) = tmp_row2(:,puti) + hij * coefs(:) - end if - end do - - if(mi == 1) then - mat(:,:,p1) = mat(:,:,p1) + tmp_row(:,:) - mat(:,:,p2) = mat(:,:,p2) + tmp_row2(:,:) - else - mat(:,p1,:) = mat(:,p1,:) + tmp_row(:,:) - mat(:,p2,:) = mat(:,p2,:) + tmp_row2(:,:) - end if - else - if(p(0,ma) == 3) then - do i=1,3 - hfix = h(1,ma) - puti = p(i, ma) - p1 = p(turn3(1,i), ma) - p2 = p(turn3(2,i), ma) - tmp_row = 0d0 - do putj=1,hfix-1 - if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle - hij = (mo_two_e_integral(p1, p2, putj, hfix)-mo_two_e_integral(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) - tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) - end do - do putj=hfix+1,mo_num - if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle - hij = (mo_two_e_integral(p1, p2, hfix, putj)-mo_two_e_integral(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) - tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) - end do - - mat(:, :puti-1, puti) = mat(:, :puti-1, puti) + tmp_row(:,:puti-1) - mat(:, puti, puti:) = mat(:, puti, puti:) + tmp_row(:,puti:) - end do - else - hfix = h(1,mi) - pfix = p(1,mi) - p1 = p(1,ma) - p2 = p(2,ma) - tmp_row = 0d0 - tmp_row2 = 0d0 - do puti=1,mo_num - if(lbanned(puti,ma)) cycle - putj = p2 - if(.not. banned(puti,putj,1)) then - hij = mo_two_e_integral(pfix, p1, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) - tmp_row(:,puti) = tmp_row(:,puti) + hij * coefs(:) - end if - - putj = p1 - if(.not. banned(puti,putj,1)) then - hij = mo_two_e_integral(pfix, p2, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) - tmp_row2(:,puti) = tmp_row2(:,puti) + hij * coefs(:) - end if - end do - mat(:,:p2-1,p2) = mat(:,:p2-1,p2) + tmp_row(:,:p2-1) - mat(:,p2,p2:) = mat(:,p2,p2:) + tmp_row(:,p2:) - mat(:,:p1-1,p1) = mat(:,:p1-1,p1) + tmp_row2(:,:p1-1) - mat(:,p1,p1:) = mat(:,p1,p1:) + tmp_row2(:,p1:) - end if - end if - deallocate(lbanned) - - !! MONO - if(sp == 3) then - s1 = 1 - s2 = 2 - else - s1 = sp - s2 = sp - end if - - do i1=1,p(0,s1) - ib = 1 - if(s1 == s2) ib = i1+1 - do i2=ib,p(0,s2) - p1 = p(i1,s1) - p2 = p(i2,s2) - if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle - call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) - call i_h_j(gen, det, N_int, hij) - mat(:, p1, p2) = mat(:, p1, p2) + coefs(:) * hij - end do - end do -end - -subroutine get_d2_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) - use bitmasks - implicit none - - integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) - integer(bit_kind), intent(in) :: phasemask(2,N_int) - logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) - double precision, intent(in) :: coefs(N_states) - double precision, intent(inout) :: mat(N_states, mo_num, mo_num) - integer, intent(in) :: h(0:2,2), p(0:4,2), sp - - double precision, external :: get_phase_bi, mo_two_e_integral - - integer :: i, j, tip, ma, mi, puti, putj - integer :: h1, h2, p1, p2, i1, i2 - double precision :: hij, phase - - integer, parameter:: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0, 3,4, 0,0, 0,0, 2,4, 1,4, 0,0, 2,3, 1,3, 1,2 /), (/2,3,4/)) - integer, parameter :: turn2(2) = (/2, 1/) - integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) - - integer :: bant - bant = 1 - - tip = p(0,1) * p(0,2) - - ma = sp - if(p(0,1) > p(0,2)) ma = 1 - if(p(0,1) < p(0,2)) ma = 2 - mi = mod(ma, 2) + 1 - - if(sp == 3) then - if(ma == 2) bant = 2 - - if(tip == 3) then - puti = p(1, mi) - do i = 1, 3 - putj = p(i, ma) - if(banned(putj,puti,bant)) cycle - i1 = turn3(1,i) - i2 = turn3(2,i) - p1 = p(i1, ma) - p2 = p(i2, ma) - h1 = h(1, ma) - h2 = h(2, ma) - - hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) - if(ma == 1) then - mat(:, putj, puti) = mat(:, putj, puti) + coefs(:) * hij - else - mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij - end if - end do - else - h1 = h(1,1) - h2 = h(1,2) - do j = 1,2 - putj = p(j, 2) - p2 = p(turn2(j), 2) - do i = 1,2 - puti = p(i, 1) - - if(banned(puti,putj,bant)) cycle - p1 = p(turn2(i), 1) - - hij = mo_two_e_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2,N_int) - mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij - end do - end do - end if - - else - if(tip == 0) then - h1 = h(1, ma) - h2 = h(2, ma) - do i=1,3 - puti = p(i, ma) - do j=i+1,4 - putj = p(j, ma) - if(banned(puti,putj,1)) cycle - - i1 = turn2d(1, i, j) - i2 = turn2d(2, i, j) - p1 = p(i1, ma) - p2 = p(i2, ma) - hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2,N_int) - mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij - end do - end do - else if(tip == 3) then - h1 = h(1, mi) - h2 = h(1, ma) - p1 = p(1, mi) - do i=1,3 - puti = p(turn3(1,i), ma) - putj = p(turn3(2,i), ma) - if(banned(puti,putj,1)) cycle - p2 = p(i, ma) - - hij = mo_two_e_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2,N_int) - mat(:, min(puti, putj), max(puti, putj)) = mat(:, min(puti, putj), max(puti, putj)) + coefs(:) * hij - end do - else ! tip == 4 - puti = p(1, sp) - putj = p(2, sp) - if(.not. banned(puti,putj,1)) then - p1 = p(1, mi) - p2 = p(2, mi) - h1 = h(1, mi) - h2 = h(2, mi) - hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2,N_int) - mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij - end if - end if - end if -end - diff --git a/src/cipsi/selection_old.irp.f b/src/cipsi/selection_old.irp.f new file mode 100644 index 00000000..8fd5bc2b --- /dev/null +++ b/src/cipsi/selection_old.irp.f @@ -0,0 +1,350 @@ + +! OLD unoptimized routines for debugging +! ====================================== + +subroutine get_d0_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + integer(bit_kind) :: det(N_int, 2) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: mat(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + + integer :: i, j, s, h1, h2, p1, p2, puti, putj + double precision :: hij, phase + double precision, external :: get_phase_bi, mo_two_e_integral + logical :: ok + + integer :: bant + bant = 1 + + + if(sp == 3) then ! AB + h1 = p(1,1) + h2 = p(1,2) + do p1=1, mo_num + if(bannedOrb(p1, 1)) cycle + do p2=1, mo_num + if(bannedOrb(p2,2)) cycle + if(banned(p1, p2, bant)) cycle ! rentable? + if(p1 == h1 .or. p2 == h2) then + call apply_particles(mask, 1,p1,2,p2, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + else + phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) + hij = mo_two_e_integral(p1, p2, h1, h2) * phase + end if + mat(:, p1, p2) = mat(:, p1, p2) + coefs(:) * hij + end do + end do + else ! AA BB + p1 = p(1,sp) + p2 = p(2,sp) + do puti=1, mo_num +! do not cycle here? otherwise singles will be missed?? + if(bannedOrb(puti, sp)) cycle + do putj=puti+1, mo_num + if(bannedOrb(putj, sp)) cycle + if(banned(puti, putj, bant)) cycle ! rentable? + if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then + call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + else + hij = (mo_two_e_integral(p1, p2, puti, putj) - mo_two_e_integral(p2, p1, puti, putj))* get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int) + end if + mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij + end do + end do + end if +end + +subroutine get_d1_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int,2) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + integer(bit_kind) :: det(N_int, 2) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: mat(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + double precision :: hij, tmp_row(N_states, mo_num), tmp_row2(N_states, mo_num) + double precision, external :: get_phase_bi, mo_two_e_integral + logical :: ok + + logical, allocatable :: lbanned(:,:) + integer :: puti, putj, ma, mi, s1, s2, i, i1, i2, j + integer :: hfix, pfix, h1, h2, p1, p2, ib + + integer, parameter :: turn2(2) = (/2,1/) + integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + + integer :: bant + + + allocate (lbanned(mo_num, 2)) + lbanned = bannedOrb + + do i=1, p(0,1) + lbanned(p(i,1), 1) = .true. + end do + do i=1, p(0,2) + lbanned(p(i,2), 2) = .true. + end do + + ma = 1 + if(p(0,2) >= 2) ma = 2 + mi = turn2(ma) + + bant = 1 + + if(sp == 3) then + !move MA + if(ma == 2) bant = 2 + puti = p(1,mi) + hfix = h(1,ma) + p1 = p(1,ma) + p2 = p(2,ma) + if(.not. bannedOrb(puti, mi)) then + tmp_row = 0d0 + do putj=1, hfix-1 + if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle + hij = (mo_two_e_integral(p1, p2, putj, hfix)-mo_two_e_integral(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) + tmp_row(1:N_states,putj) = tmp_row(1:N_states,putj) + hij * coefs(1:N_states) + end do + do putj=hfix+1, mo_num + if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle + hij = (mo_two_e_integral(p1, p2, hfix, putj)-mo_two_e_integral(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) + tmp_row(1:N_states,putj) = tmp_row(1:N_states,putj) + hij * coefs(1:N_states) + end do + + if(ma == 1) then + mat(1:N_states,1:mo_num,puti) = mat(1:N_states,1:mo_num,puti) + tmp_row(1:N_states,1:mo_num) + else + mat(1:N_states,puti,1:mo_num) = mat(1:N_states,puti,1:mo_num) + tmp_row(1:N_states,1:mo_num) + end if + end if + + !MOVE MI + pfix = p(1,mi) + tmp_row = 0d0 + tmp_row2 = 0d0 + do puti=1,mo_num + if(lbanned(puti,mi)) cycle + !p1 fixed + putj = p1 + if(.not. banned(putj,puti,bant)) then + hij = mo_two_e_integral(p2,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) + tmp_row(:,puti) = tmp_row(:,puti) + hij * coefs(:) + end if + + putj = p2 + if(.not. banned(putj,puti,bant)) then + hij = mo_two_e_integral(p1,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) + tmp_row2(:,puti) = tmp_row2(:,puti) + hij * coefs(:) + end if + end do + + if(mi == 1) then + mat(:,:,p1) = mat(:,:,p1) + tmp_row(:,:) + mat(:,:,p2) = mat(:,:,p2) + tmp_row2(:,:) + else + mat(:,p1,:) = mat(:,p1,:) + tmp_row(:,:) + mat(:,p2,:) = mat(:,p2,:) + tmp_row2(:,:) + end if + else + if(p(0,ma) == 3) then + do i=1,3 + hfix = h(1,ma) + puti = p(i, ma) + p1 = p(turn3(1,i), ma) + p2 = p(turn3(2,i), ma) + tmp_row = 0d0 + do putj=1,hfix-1 + if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle + hij = (mo_two_e_integral(p1, p2, putj, hfix)-mo_two_e_integral(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) + tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) + end do + do putj=hfix+1,mo_num + if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle + hij = (mo_two_e_integral(p1, p2, hfix, putj)-mo_two_e_integral(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) + tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:) + end do + + mat(:, :puti-1, puti) = mat(:, :puti-1, puti) + tmp_row(:,:puti-1) + mat(:, puti, puti:) = mat(:, puti, puti:) + tmp_row(:,puti:) + end do + else + hfix = h(1,mi) + pfix = p(1,mi) + p1 = p(1,ma) + p2 = p(2,ma) + tmp_row = 0d0 + tmp_row2 = 0d0 + do puti=1,mo_num + if(lbanned(puti,ma)) cycle + putj = p2 + if(.not. banned(puti,putj,1)) then + hij = mo_two_e_integral(pfix, p1, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) + tmp_row(:,puti) = tmp_row(:,puti) + hij * coefs(:) + end if + + putj = p1 + if(.not. banned(puti,putj,1)) then + hij = mo_two_e_integral(pfix, p2, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) + tmp_row2(:,puti) = tmp_row2(:,puti) + hij * coefs(:) + end if + end do + mat(:,:p2-1,p2) = mat(:,:p2-1,p2) + tmp_row(:,:p2-1) + mat(:,p2,p2:) = mat(:,p2,p2:) + tmp_row(:,p2:) + mat(:,:p1-1,p1) = mat(:,:p1-1,p1) + tmp_row2(:,:p1-1) + mat(:,p1,p1:) = mat(:,p1,p1:) + tmp_row2(:,p1:) + end if + end if + deallocate(lbanned) + + !! MONO + if(sp == 3) then + s1 = 1 + s2 = 2 + else + s1 = sp + s2 = sp + end if + + do i1=1,p(0,s1) + ib = 1 + if(s1 == s2) ib = i1+1 + do i2=ib,p(0,s2) + p1 = p(i1,s1) + p2 = p(i2,s2) + if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle + call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + mat(:, p1, p2) = mat(:, p1, p2) + coefs(:) * hij + end do + end do +end + +subroutine get_d2_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(2,N_int) + logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: mat(N_states, mo_num, mo_num) + integer, intent(in) :: h(0:2,2), p(0:4,2), sp + + double precision, external :: get_phase_bi, mo_two_e_integral + + integer :: i, j, tip, ma, mi, puti, putj + integer :: h1, h2, p1, p2, i1, i2 + double precision :: hij, phase + + integer, parameter:: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0, 3,4, 0,0, 0,0, 2,4, 1,4, 0,0, 2,3, 1,3, 1,2 /), (/2,3,4/)) + integer, parameter :: turn2(2) = (/2, 1/) + integer, parameter :: turn3(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + + integer :: bant + bant = 1 + + tip = p(0,1) * p(0,2) + + ma = sp + if(p(0,1) > p(0,2)) ma = 1 + if(p(0,1) < p(0,2)) ma = 2 + mi = mod(ma, 2) + 1 + + if(sp == 3) then + if(ma == 2) bant = 2 + + if(tip == 3) then + puti = p(1, mi) + do i = 1, 3 + putj = p(i, ma) + if(banned(putj,puti,bant)) cycle + i1 = turn3(1,i) + i2 = turn3(2,i) + p1 = p(i1, ma) + p2 = p(i2, ma) + h1 = h(1, ma) + h2 = h(2, ma) + + hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) + if(ma == 1) then + mat(:, putj, puti) = mat(:, putj, puti) + coefs(:) * hij + else + mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij + end if + end do + else + h1 = h(1,1) + h2 = h(1,2) + do j = 1,2 + putj = p(j, 2) + p2 = p(turn2(j), 2) + do i = 1,2 + puti = p(i, 1) + + if(banned(puti,putj,bant)) cycle + p1 = p(turn2(i), 1) + + hij = mo_two_e_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2,N_int) + mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij + end do + end do + end if + + else + if(tip == 0) then + h1 = h(1, ma) + h2 = h(2, ma) + do i=1,3 + puti = p(i, ma) + do j=i+1,4 + putj = p(j, ma) + if(banned(puti,putj,1)) cycle + + i1 = turn2d(1, i, j) + i2 = turn2d(2, i, j) + p1 = p(i1, ma) + p2 = p(i2, ma) + hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2,N_int) + mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij + end do + end do + else if(tip == 3) then + h1 = h(1, mi) + h2 = h(1, ma) + p1 = p(1, mi) + do i=1,3 + puti = p(turn3(1,i), ma) + putj = p(turn3(2,i), ma) + if(banned(puti,putj,1)) cycle + p2 = p(i, ma) + + hij = mo_two_e_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2,N_int) + mat(:, min(puti, putj), max(puti, putj)) = mat(:, min(puti, putj), max(puti, putj)) + coefs(:) * hij + end do + else ! tip == 4 + puti = p(1, sp) + putj = p(2, sp) + if(.not. banned(puti,putj,1)) then + p1 = p(1, mi) + p2 = p(2, mi) + h1 = h(1, mi) + h2 = h(2, mi) + hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2,N_int) + mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij + end if + end if + end if +end + diff --git a/src/cipsi/selection_singles.irp.f b/src/cipsi/selection_singles.irp.f new file mode 100644 index 00000000..3821576c --- /dev/null +++ b/src/cipsi/selection_singles.irp.f @@ -0,0 +1,356 @@ +use bitmasks + +subroutine select_singles(i_gen,hole_mask,particle_mask,fock_diag_tmp,E0,pt2_data,buf) + use bitmasks + use selection_types + implicit none + BEGIN_DOC +! Select determinants connected to i_det by H + END_DOC + integer, intent(in) :: i_gen + integer(bit_kind), intent(in) :: hole_mask(N_int,2), particle_mask(N_int,2) + double precision, intent(in) :: fock_diag_tmp(mo_num) + double precision, intent(in) :: E0(N_states) + type(pt2_type), intent(inout) :: pt2_data + type(selection_buffer), intent(inout) :: buf + + logical, allocatable :: banned(:,:), bannedOrb(:) + double precision, allocatable :: mat(:,:,:) + integer :: i, j, k + integer :: h1,h2,s1,s2,i1,i2,ib,sp + integer(bit_kind) :: hole(N_int,2), particle(N_int,2), mask(N_int, 2) + logical :: fullMatch, ok + + + do k=1,N_int + hole (k,1) = iand(psi_det_generators(k,1,i_gen), hole_mask(k,1)) + hole (k,2) = iand(psi_det_generators(k,2,i_gen), hole_mask(k,2)) + particle(k,1) = iand(not(psi_det_generators(k,1,i_gen)), particle_mask(k,1)) + particle(k,2) = iand(not(psi_det_generators(k,2,i_gen)), particle_mask(k,2)) + enddo + + allocate(banned(mo_num,mo_num), bannedOrb(mo_num), mat(N_states, mo_num, 1)) + banned = .False. + + ! Create lists of holes and particles + ! ----------------------------------- + + integer :: N_holes(2), N_particles(2) + integer :: hole_list(N_int*bit_kind_size,2) + integer :: particle_list(N_int*bit_kind_size,2) + + call bitstring_to_list_ab(hole , hole_list , N_holes , N_int) + call bitstring_to_list_ab(particle, particle_list, N_particles, N_int) + + do sp=1,2 + do i=1, N_holes(sp) + h1 = hole_list(i,sp) + call apply_hole(psi_det_generators(1,1,i_gen), sp, h1, mask, ok, N_int) + bannedOrb = .true. + do j=1,N_particles(sp) + bannedOrb(particle_list(j, sp)) = .false. + end do + call spot_hasBeen(mask, sp, psi_det_sorted, i_gen, N_det, bannedOrb, fullMatch) + if(fullMatch) cycle + mat = 0d0 + call splash_p(mask, sp, psi_selectors(1,1,i_gen), psi_selectors_coef_transp(1,i_gen), N_det_selectors - i_gen + 1, bannedOrb, mat(1,1,1)) + call fill_buffer_single(i_gen, sp, h1, 0, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf) + end do + enddo +end subroutine + + +subroutine spot_hasBeen(mask, sp, det, i_gen, N, banned, fullMatch) + use bitmasks + implicit none + + integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N) + integer, intent(in) :: i_gen, N, sp + logical, intent(inout) :: banned(mo_num) + logical, intent(out) :: fullMatch + + + integer :: i, j, na, nb, list(3), nt + integer(bit_kind) :: myMask(N_int, 2), negMask(N_int, 2) + + fullMatch = .false. + + do i=1,N_int + negMask(i,1) = not(mask(i,1)) + negMask(i,2) = not(mask(i,2)) + end do + + genl : do i=1, N + nt = 0 + + do j=1, N_int + myMask(j, 1) = iand(det(j, 1, i), negMask(j, 1)) + myMask(j, 2) = iand(det(j, 2, i), negMask(j, 2)) + nt += popcnt(myMask(j, 1)) + popcnt(myMask(j, 2)) + end do + + if(nt > 3) cycle + + if(nt <= 2 .and. i < i_gen) then + fullMatch = .true. + return + end if + + call bitstring_to_list(myMask(1,sp), list(1), na, N_int) + + if(nt == 3 .and. i < i_gen) then + do j=1,na + banned(list(j)) = .true. + end do + else if(nt == 1 .and. na == 1) then + banned(list(1)) = .true. + end if + end do genl +end subroutine + + +subroutine splash_p(mask, sp, det, coefs, N_sel, bannedOrb, vect) + use bitmasks + implicit none + + integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int,2,N_sel) + double precision, intent(in) :: coefs(N_states, N_sel) + integer, intent(in) :: sp, N_sel + logical, intent(inout) :: bannedOrb(mo_num) + double precision, intent(inout) :: vect(N_states, mo_num) + + integer :: i, j, h(0:2,2), p(0:3,2), nt + integer(bit_kind) :: perMask(N_int, 2), mobMask(N_int, 2), negMask(N_int, 2) + integer(bit_kind) :: phasemask(N_int, 2) + + do i=1,N_int + negMask(i,1) = not(mask(i,1)) + negMask(i,2) = not(mask(i,2)) + end do + + do i=1, N_sel + nt = 0 + do j=1,N_int + mobMask(j,1) = iand(negMask(j,1), det(j,1,i)) + mobMask(j,2) = iand(negMask(j,2), det(j,2,i)) + nt += popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) + end do + + if(nt > 3) cycle + + do j=1,N_int + perMask(j,1) = iand(mask(j,1), not(det(j,1,i))) + perMask(j,2) = iand(mask(j,2), not(det(j,2,i))) + end do + + call bitstring_to_list(perMask(1,1), h(1,1), h(0,1), N_int) + call bitstring_to_list(perMask(1,2), h(1,2), h(0,2), N_int) + + call bitstring_to_list(mobMask(1,1), p(1,1), p(0,1), N_int) + call bitstring_to_list(mobMask(1,2), p(1,2), p(0,2), N_int) + + call get_mask_phase(psi_det_sorted(1,1,i), phasemask, N_int) + + if(nt == 3) then + call get_m2(det(1,1,i), phasemask, bannedOrb, vect, mask, h, p, sp, coefs(1, i)) + else if(nt == 2) then + call get_m1(det(1,1,i), phasemask, bannedOrb, vect, mask, h, p, sp, coefs(1, i)) + else + call get_m0(det(1,1,i), phasemask, bannedOrb, vect, mask, h, p, sp, coefs(1, i)) + end if + end do +end subroutine + + +subroutine get_m2(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int, 2) + logical, intent(in) :: bannedOrb(mo_num) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: vect(N_states, mo_num) + integer, intent(in) :: sp, h(0:2, 2), p(0:3, 2) + integer :: i, j, h1, h2, p1, p2, sfix, hfix, pfix, hmob, pmob, puti + double precision :: hij + double precision, external :: get_phase_bi, mo_two_e_integral + + integer, parameter :: turn3_2(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/)) + integer, parameter :: turn2(2) = (/2,1/) + + if(h(0,sp) == 2) then + h1 = h(1, sp) + h2 = h(2, sp) + do i=1,3 + puti = p(i, sp) + if(bannedOrb(puti)) cycle + p1 = p(turn3_2(1,i), sp) + p2 = p(turn3_2(2,i), sp) + hij = mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2, p1, h1, h2) + hij *= get_phase_bi(phasemask, sp, sp, h1, p1, h2, p2) + vect(:, puti) += hij * coefs + end do + else if(h(0,sp) == 1) then + sfix = turn2(sp) + hfix = h(1,sfix) + pfix = p(1,sfix) + hmob = h(1,sp) + do j=1,2 + puti = p(j, sp) + if(bannedOrb(puti)) cycle + pmob = p(turn2(j), sp) + hij = mo_two_e_integral(pfix, pmob, hfix, hmob) + hij *= get_phase_bi(phasemask, sp, sfix, hmob, pmob, hfix, pfix) + vect(:, puti) += hij * coefs + end do + else + puti = p(1,sp) + if(.not. bannedOrb(puti)) then + sfix = turn2(sp) + p1 = p(1,sfix) + p2 = p(2,sfix) + h1 = h(1,sfix) + h2 = h(2,sfix) + hij = (mo_two_e_integral(p1,p2,h1,h2) - mo_two_e_integral(p2,p1,h1,h2)) + hij *= get_phase_bi(phasemask, sfix, sfix, h1, p1, h2, p2) + vect(:, puti) += hij * coefs + end if + end if +end subroutine + +subroutine get_m1(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int, 2) + logical, intent(in) :: bannedOrb(mo_num) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: vect(N_states, mo_num) + integer, intent(in) :: sp, h(0:2, 2), p(0:3, 2) + integer :: i, hole, p1, p2, sh + logical :: ok, lbanned(mo_num) + integer(bit_kind) :: det(N_int, 2) + double precision :: hij + double precision, external :: get_phase_bi,mo_two_e_integral + + lbanned = bannedOrb + sh = 1 + if(h(0,2) == 1) sh = 2 + hole = h(1, sh) + lbanned(p(1,sp)) = .true. + if(p(0,sp) == 2) lbanned(p(2,sp)) = .true. + !print *, "SPm1", sp, sh + + p1 = p(1, sp) + + if(sp == sh) then + p2 = p(2, sp) + lbanned(p2) = .true. + + do i=1,hole-1 + if(lbanned(i)) cycle + hij = (mo_two_e_integral(p1, p2, i, hole) - mo_two_e_integral(p2, p1, i, hole)) + hij *= get_phase_bi(phasemask, sp, sp, i, p1, hole, p2) + vect(:,i) += hij * coefs + end do + do i=hole+1,mo_num + if(lbanned(i)) cycle + hij = (mo_two_e_integral(p1, p2, hole, i) - mo_two_e_integral(p2, p1, hole, i)) + hij *= get_phase_bi(phasemask, sp, sp, hole, p1, i, p2) + vect(:,i) += hij * coefs + end do + + call apply_particle(mask, sp, p2, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + vect(:, p2) += hij * coefs + else + p2 = p(1, sh) + do i=1,mo_num + if(lbanned(i)) cycle + hij = mo_two_e_integral(p1, p2, i, hole) + hij *= get_phase_bi(phasemask, sp, sh, i, p1, hole, p2) + vect(:,i) += hij * coefs + end do + end if + + call apply_particle(mask, sp, p1, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + vect(:, p1) += hij * coefs +end subroutine + +subroutine get_m0(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs) + use bitmasks + implicit none + + integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2) + integer(bit_kind), intent(in) :: phasemask(N_int, 2) + logical, intent(in) :: bannedOrb(mo_num) + double precision, intent(in) :: coefs(N_states) + double precision, intent(inout) :: vect(N_states, mo_num) + integer, intent(in) :: sp, h(0:2, 2), p(0:3, 2) + integer :: i + logical :: ok, lbanned(mo_num) + integer(bit_kind) :: det(N_int, 2) + double precision :: hij + + lbanned = bannedOrb + lbanned(p(1,sp)) = .true. + do i=1,mo_num + if(lbanned(i)) cycle + call apply_particle(mask, sp, i, det, ok, N_int) + call i_h_j(gen, det, N_int, hij) + vect(:, i) += hij * coefs + end do +end subroutine + + + +! +!subroutine fill_buffer_single(i_generator, sp, h1, bannedOrb, fock_diag_tmp, E0, pt2, vect, buf) +! use bitmasks +! use selection_types +! implicit none +! +! integer, intent(in) :: i_generator, sp, h1 +! double precision, intent(in) :: vect(N_states, mo_num) +! logical, intent(in) :: bannedOrb(mo_num) +! double precision, intent(in) :: fock_diag_tmp(mo_num) +! double precision, intent(in) :: E0(N_states) +! double precision, intent(inout) :: pt2(N_states) +! type(selection_buffer), intent(inout) :: buf +! logical :: ok +! integer :: s1, s2, p1, p2, ib, istate +! integer(bit_kind) :: mask(N_int, 2), det(N_int, 2) +! double precision :: e_pert, delta_E, val, Hii, max_e_pert, tmp +! double precision, external :: diag_H_mat_elem_fock +! +! +! call apply_hole(psi_det_generators(1,1,i_generator), sp, h1, mask, ok, N_int) +! +! do p1=1,mo_num +! if(bannedOrb(p1)) cycle +! if(vect(1, p1) == 0d0) cycle +! call apply_particle(mask, sp, p1, det, ok, N_int) +! +! +! Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int) +! max_e_pert = 0d0 +! +! do istate=1,N_states +! val = vect(istate, p1) + vect(istate, p1) +! delta_E = E0(istate) - Hii +! tmp = dsqrt(delta_E * delta_E + val * val) +! if (delta_E < 0.d0) then +! tmp = -tmp +! endif +! e_pert = 0.5d0 * ( tmp - delta_E) +! pt2(istate) += e_pert +! if(dabs(e_pert) > dabs(max_e_pert)) max_e_pert = e_pert +! end do +! +! if(dabs(max_e_pert) > buf%mini) call add_to_selection_buffer(buf, det, max_e_pert) +! end do +!end subroutine +! diff --git a/src/ezfio_files/00.create.bats b/src/ezfio_files/00.create.bats index cfa6247d..49430a0b 100644 --- a/src/ezfio_files/00.create.bats +++ b/src/ezfio_files/00.create.bats @@ -23,6 +23,34 @@ function run { qp set mo_two_e_ints io_mo_two_e_integrals "Write" } +@test "H2_1" { + run h2_1.xyz 1 0 cc-pvdz +} + +@test "H2_3" { + run h2_3.xyz 3 0 cc-pvdz +} + +@test "H3_2" { + run h3_2.xyz 2 0 cc-pvdz +} + +@test "H3_4" { + run h3_4.xyz 4 0 cc-pvdz +} + +@test "H4_1" { + run h4_1.xyz 1 0 cc-pvdz +} + +@test "H4_3" { + run h4_3.xyz 3 0 cc-pvdz +} + +@test "H4_5" { + run h4_5.xyz 5 0 cc-pvdz +} + @test "B-B" { qp set_file b2_stretched.ezfio diff --git a/src/fci/40.fci.bats b/src/fci/40.fci.bats index 4523d0e0..3c4a93c7 100644 --- a/src/fci/40.fci.bats +++ b/src/fci/40.fci.bats @@ -10,8 +10,8 @@ function run() { qp set perturbation do_pt2 False qp set determinants n_det_max 8000 qp set determinants n_states 1 - qp set davidson threshold_davidson 1.e-10 - qp set davidson n_states_diag 8 + qp set davidson_keywords threshold_davidson 1.e-10 + qp set davidson_keywords n_states_diag 8 qp run fci energy1="$(ezfio get fci energy | tr '[]' ' ' | cut -d ',' -f 1)" eq $energy1 $1 $thresh @@ -24,99 +24,134 @@ function run_stoch() { qp set perturbation do_pt2 True qp set determinants n_det_max $3 qp set determinants n_states 1 - qp set davidson threshold_davidson 1.e-10 - qp set davidson n_states_diag 1 + qp set davidson_keywords threshold_davidson 1.e-10 + qp set davidson_keywords n_states_diag 1 qp run fci energy1="$(ezfio get fci energy_pt2 | tr '[]' ' ' | cut -d ',' -f 1)" eq $energy1 $1 $thresh } -@test "B-B" { +@test "H2_1" { # 1s + qp set_file h2_1.ezfio + qp set perturbation pt2_max 0. + run_stoch -1.06415255 1.e-8 10000 +} + +@test "H2_3" { # 1s + qp set_file h2_3.ezfio + qp set perturbation pt2_max 0. + run_stoch -0.96029881 1.e-8 10000 +} + +@test "H3_2" { # 3s + qp set_file h3_2.ezfio + qp set perturbation pt2_max 0. + run_stoch -1.61003132 1.e-8 10000 +} + +@test "H3_4" { # 2s + qp set_file h3_4.ezfio + qp set perturbation pt2_max 0. + run_stoch -1.02434843 1.e-8 10000 +} + +@test "H4_1" { # 13s + qp set_file h4_1.ezfio + qp set perturbation pt2_max 0. + run_stoch -2.01675062 1.e-8 10000 +} + +@test "H4_3" { # 10s + qp set_file h4_3.ezfio + qp set perturbation pt2_max 0. + run_stoch -1.95927626 1.e-8 10000 +} + +@test "H4_5" { # 3s + qp set_file h4_5.ezfio + qp set perturbation pt2_max 0. + run_stoch -1.25852765 1.e-8 10000 +} + +@test "B-B" { # 10s qp set_file b2_stretched.ezfio qp set determinants n_det_max 10000 qp set_frozen_core run_stoch -49.14103054419 3.e-4 10000 } -@test "F2" { # 4.07m - [[ -n $TRAVIS ]] && skip - qp set_file f2.ezfio - qp set_frozen_core - run_stoch -199.304922384814 3.e-3 100000 -} - -@test "NH3" { # 10.6657s +@test "NH3" { # 8s qp set_file nh3.ezfio qp set_mo_class --core="[1-4]" --act="[5-72]" run -56.244753429144986 3.e-4 100000 } -@test "DHNO" { # 11.4721s +@test "DHNO" { # 8s qp set_file dhno.ezfio qp set_mo_class --core="[1-7]" --act="[8-64]" - run -130.459020029816 3.e-4 100000 + run -130.466208113547 3.e-4 100000 } -@test "HCO" { # 12.2868s +@test "HCO" { # 32s qp set_file hco.ezfio - run -113.393356604085 1.e-3 100000 + run -113.395751656985 1.e-3 100000 } -@test "H2O2" { # 12.9214s +@test "H2O2" { # 21s qp set_file h2o2.ezfio qp set_mo_class --core="[1-2]" --act="[3-24]" --del="[25-38]" run -151.005848404095 1.e-3 100000 } -@test "HBO" { # 13.3144s +@test "HBO" { # 18s [[ -n $TRAVIS ]] && skip qp set_file hbo.ezfio - run -100.213 1.5e-3 100000 + run -100.214 1.5e-3 100000 } -@test "H2O" { # 11.3727s +@test "H2O" { # 16s [[ -n $TRAVIS ]] && skip qp set_file h2o.ezfio - run -76.2361605151999 5.e-4 100000 + run -76.238051555276 5.e-4 100000 } -@test "ClO" { # 13.3755s +@test "ClO" { # 47s [[ -n $TRAVIS ]] && skip qp set_file clo.ezfio - run -534.546453546852 1.e-3 100000 + run -534.548529710256 1.e-3 100000 } -@test "SO" { # 13.4952s +@test "SO" { # 23s [[ -n $TRAVIS ]] && skip qp set_file so.ezfio run -26.015 3.e-3 100000 } -@test "H2S" { # 13.6745s +@test "H2S" { # 37s [[ -n $TRAVIS ]] && skip qp set_file h2s.ezfio - run -398.859577605891 5.e-4 100000 + run -398.864853669111 5.e-4 100000 } -@test "OH" { # 13.865s +@test "OH" { # 12s [[ -n $TRAVIS ]] && skip qp set_file oh.ezfio - run -75.6121856748294 3.e-4 100000 + run -75.615 1.5e-3 100000 } -@test "SiH2_3B1" { # 13.938ss +@test "SiH2_3B1" { # 10s [[ -n $TRAVIS ]] && skip qp set_file sih2_3b1.ezfio - run -290.0175411299477 3.e-4 100000 + run -290.0206626734517 3.e-4 100000 } -@test "H3COH" { # 14.7299s +@test "H3COH" { # 33s [[ -n $TRAVIS ]] && skip qp set_file h3coh.ezfio - run -115.205632960026 1.e-3 100000 + run -115.206784386204 1.e-3 100000 } -@test "SiH3" { # 15.99s +@test "SiH3" { # 15s [[ -n $TRAVIS ]] && skip qp set_file sih3.ezfio run -5.572 1.e-3 100000 @@ -132,7 +167,7 @@ function run_stoch() { @test "ClF" { # 16.8864s [[ -n $TRAVIS ]] && skip qp set_file clf.ezfio - run -559.169748890031 1.5e-3 100000 + run -559.174371468224 1.5e-3 100000 } @test "SO2" { # 17.5645s @@ -170,7 +205,6 @@ function run_stoch() { run -187.970184372047 1.6e-3 100000 } - @test "[Cu(NH3)4]2+" { # 25.0417s [[ -n $TRAVIS ]] && skip qp set_file cu_nh3_4_2plus.ezfio @@ -185,3 +219,10 @@ function run_stoch() { run -93.078 2.e-3 100000 } +@test "F2" { # 4.07m + [[ -n $TRAVIS ]] && skip + qp set_file f2.ezfio + qp set_frozen_core + run_stoch -199.304922384814 3.e-3 100000 +} + diff --git a/src/hartree_fock/10.hf.bats b/src/hartree_fock/10.hf.bats index df566032..3647b775 100644 --- a/src/hartree_fock/10.hf.bats +++ b/src/hartree_fock/10.hf.bats @@ -48,6 +48,35 @@ good=-92.76613324421798 rm -rf $EZFIO } + +@test "H2_1" { # 1s + run h2_1.ezfio -1.005924963288527 +} + +@test "H2_3" { # 1s + run h2_3.ezfio -0.9591011604845440 +} + +@test "H3_2" { # 1s + run h3_2.ezfio -1.558273529860488 +} + +@test "H3_4" { # 1s + run h3_4.ezfio -1.0158684760025190 +} + +@test "H4_1" { # 1s + run h4_1.ezfio -1.932022805374405 +} + +@test "H4_3" { # 1s + run h4_3.ezfio -1.8948449927787350 +} + +@test "H4_5" { # 1s + run h4_5.ezfio -1.2408338805496990 +} + @test "point charges" { run_pt_charges } diff --git a/src/tools/print_wf.irp.f b/src/tools/print_wf.irp.f index 64eb1a1f..9621ee89 100644 --- a/src/tools/print_wf.irp.f +++ b/src/tools/print_wf.irp.f @@ -47,6 +47,7 @@ subroutine routine do i = 1, min(N_det_print_wf,N_det) print*,'' print*,'i = ',i + print *,psi_det_sorted(1,1,i) call debug_det(psi_det_sorted(1,1,i),N_int) call get_excitation_degree(psi_det_sorted(1,1,i),psi_det_sorted(1,1,1),degree,N_int) print*,'degree = ',degree diff --git a/tests/input/h2_1.xyz b/tests/input/h2_1.xyz new file mode 100644 index 00000000..8ecd7dab --- /dev/null +++ b/tests/input/h2_1.xyz @@ -0,0 +1,6 @@ +2 +H2 +H 0.0 0.0 -0.74 +H 0.0 0.0 0.74 + + diff --git a/tests/input/h2_3.xyz b/tests/input/h2_3.xyz new file mode 100644 index 00000000..8ecd7dab --- /dev/null +++ b/tests/input/h2_3.xyz @@ -0,0 +1,6 @@ +2 +H2 +H 0.0 0.0 -0.74 +H 0.0 0.0 0.74 + + diff --git a/tests/input/h3_2.xyz b/tests/input/h3_2.xyz new file mode 100644 index 00000000..7c251c35 --- /dev/null +++ b/tests/input/h3_2.xyz @@ -0,0 +1,7 @@ +3 +h3 +H 0.0 0.0 -0.74 +H 0.0 0.0 0.74 +H 0.0 0.0 0.0 + + diff --git a/tests/input/h3_4.xyz b/tests/input/h3_4.xyz new file mode 100644 index 00000000..7c251c35 --- /dev/null +++ b/tests/input/h3_4.xyz @@ -0,0 +1,7 @@ +3 +h3 +H 0.0 0.0 -0.74 +H 0.0 0.0 0.74 +H 0.0 0.0 0.0 + + diff --git a/tests/input/h4_1.xyz b/tests/input/h4_1.xyz new file mode 100644 index 00000000..fe163388 --- /dev/null +++ b/tests/input/h4_1.xyz @@ -0,0 +1,7 @@ +4 +h4 +H 0.0 0.0 -0.74 +H 0.0 0.0 0.74 +H 0.0 0.74 0.0 +H 0.0 0.0 0.0 + diff --git a/tests/input/h4_3.xyz b/tests/input/h4_3.xyz new file mode 100644 index 00000000..fe163388 --- /dev/null +++ b/tests/input/h4_3.xyz @@ -0,0 +1,7 @@ +4 +h4 +H 0.0 0.0 -0.74 +H 0.0 0.0 0.74 +H 0.0 0.74 0.0 +H 0.0 0.0 0.0 + diff --git a/tests/input/h4_5.xyz b/tests/input/h4_5.xyz new file mode 100644 index 00000000..fe163388 --- /dev/null +++ b/tests/input/h4_5.xyz @@ -0,0 +1,7 @@ +4 +h4 +H 0.0 0.0 -0.74 +H 0.0 0.0 0.74 +H 0.0 0.74 0.0 +H 0.0 0.0 0.0 + From 93adc8d6c1316f95151fe7ab32829db657cbd34c Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 10 Jun 2023 11:57:28 +0200 Subject: [PATCH 170/337] no aaa // --- .../normal_ordered_contractions.irp.f | 539 +++++++++++++++++- src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 40 ++ 2 files changed, 560 insertions(+), 19 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered_contractions.irp.f b/src/tc_bi_ortho/normal_ordered_contractions.irp.f index f066c958..d11c6727 100644 --- a/src/tc_bi_ortho/normal_ordered_contractions.irp.f +++ b/src/tc_bi_ortho/normal_ordered_contractions.irp.f @@ -500,7 +500,7 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_num)] +BEGIN_PROVIDER [ double precision, no_aaa_contraction_v0, (mo_num,mo_num,mo_num,mo_num)] BEGIN_DOC ! @@ -508,12 +508,12 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ ! h1 < h2 ! p1 > p2 ! - ! no_aaa_contraction(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h1,p1,h2) + Ibeta(p2,h1,p1,h2)] + ! no_aaa_contraction_v0(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h1,p1,h2) + Ibeta(p2,h1,p1,h2)] ! = -0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] ! ! else: ! - ! no_aaa_contraction(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] + ! no_aaa_contraction_v0(p2,h2.p1,h1) = 0.5 [Ialpha(p2,h2,p1,h1) + Ibeta(p2,h2,p1,h1)] ! ! ! I(p2,h2,p1,h1) = J(p2,h2,p1,h1) - J(p1,h2,p2,h1) @@ -536,7 +536,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) - print*,' Providing no_aaa_contraction ...' + print*,' Providing no_aaa_contraction_v0 ...' call wall_time(wall0) PROVIDE N_int @@ -556,7 +556,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ if(Ne(2) .lt. 3) then - no_aaa_contraction = 0.d0 + no_aaa_contraction_v0 = 0.d0 else @@ -630,7 +630,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) enddo enddo enddo @@ -662,7 +662,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) enddo enddo enddo @@ -736,7 +736,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO @@ -776,7 +776,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO @@ -849,7 +849,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) enddo enddo enddo @@ -881,7 +881,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) enddo enddo enddo @@ -955,7 +955,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO @@ -995,7 +995,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ !$OMP PARALLEL DO PRIVATE(h2,p2) do h2 = 1, mo_num do p2 = 1, mo_num - no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + no_aaa_contraction_v0(p2,h2,p1,h1) = no_aaa_contraction_v0(p2,h2,p1,h1) + tmp_2d(p2,h2) enddo enddo !$OMP END PARALLEL DO @@ -1010,19 +1010,19 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ deallocate(tmpval_1, tmpval_2) deallocate(tmpvec_1, tmpvec_2, tmpvec_3) - no_aaa_contraction = -0.5d0 * no_aaa_contraction + no_aaa_contraction_v0 = -0.5d0 * no_aaa_contraction_v0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (h1, h2, p1, p2) & - !$OMP SHARED (no_aaa_contraction, mo_num) + !$OMP SHARED (no_aaa_contraction_v0, mo_num) !$OMP DO do h1 = 1, mo_num do h2 = 1, mo_num do p1 = 1, mo_num do p2 = p1, mo_num - no_aaa_contraction(p2,h2,p1,h1) -= no_aaa_contraction(p1,h2,p2,h1) + no_aaa_contraction_v0(p2,h2,p1,h1) -= no_aaa_contraction_v0(p1,h2,p2,h1) enddo enddo enddo @@ -1034,7 +1034,7 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ do h2 = 1, mo_num do p1 = 2, mo_num do p2 = 1, p1-1 - no_aaa_contraction(p2,h2,p1,h1) = -no_aaa_contraction(p1,h2,p2,h1) + no_aaa_contraction_v0(p2,h2,p1,h1) = -no_aaa_contraction_v0(p1,h2,p2,h1) enddo enddo enddo @@ -1046,17 +1046,18 @@ BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_ do h2 = h1+1, mo_num do p1 = 2, mo_num do p2 = 1, p1-1 - no_aaa_contraction(p2,h2,p1,h1) *= -1.d0 + no_aaa_contraction_v0(p2,h2,p1,h1) *= -1.d0 enddo enddo enddo enddo + !$OMP END DO !$OMP END PARALLEL endif call wall_time(wall1) - print*,' Wall time for no_aaa_contraction', wall1-wall0 + print*,' Wall time for no_aaa_contraction_v0', wall1-wall0 END_PROVIDER @@ -1519,3 +1520,503 @@ BEGIN_PROVIDER [ double precision, no_aab_contraction, (mo_num,mo_num,mo_num,mo_ END_PROVIDER ! --- + +BEGIN_PROVIDER [ double precision, no_aaa_contraction, (mo_num,mo_num,mo_num,mo_num)] + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: Ne(2) + double precision :: wall0, wall1 + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + double precision, allocatable :: tmp_2d(:,:), tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) + + print*,' Providing no_aaa_contraction ...' + call wall_time(wall0) + + PROVIDE N_int + + allocate(occ(N_int*bit_kind_size,2)) + allocate(key_i_core(N_int,2)) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + if(Ne(2) .lt. 3) then + + no_aaa_contraction = 0.d0 + + else + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, ii, h1, h2, p1, p2, & + !$OMP tmp_2d, tmp_3d, tmp1, tmp2, tmp3, & + !$OMP tmpval_1, tmpval_2, & + !$OMP tmpvec_1, tmpvec_2, tmpvec_3) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP no_aaa_contraction) + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmpvec_3(n_points_final_grid,3)) + + tmp_2d = 0.d0 + tmp_3d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmp3 = 0.d0 + tmpval_1 = 0.d0 + tmpval_2 = 0.d0 + tmpvec_1 = 0.d0 + tmpvec_2 = 0.d0 + tmpvec_3 = 0.d0 + + !$OMP DO + do ii = 1, Ne(2) + i = occ(ii,2) + + do h1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + enddo ! p1 + enddo ! h1 + enddo ! i + !$OMP END DO + + deallocate(tmp_2d) + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmp3) + deallocate(tmpval_1) + deallocate(tmpval_2) + deallocate(tmpvec_1) + deallocate(tmpvec_2) + deallocate(tmpvec_3) + + !$OMP END PARALLEL + + + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, ii, h1, h2, p1, p2, & + !$OMP tmp_2d, tmp_3d, tmp1, tmp2, tmp3, & + !$OMP tmpval_1, tmpval_2, & + !$OMP tmpvec_1, tmpvec_2, tmpvec_3) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP no_aaa_contraction) + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmpvec_3(n_points_final_grid,3)) + + tmp_2d = 0.d0 + tmp_3d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmp3 = 0.d0 + tmpval_1 = 0.d0 + tmpval_2 = 0.d0 + tmpvec_1 = 0.d0 + tmpvec_2 = 0.d0 + tmpvec_3 = 0.d0 + + !$OMP DO + + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + do h1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + no_aaa_contraction(p2,h2,p1,h1) = no_aaa_contraction(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + enddo ! p1 + enddo ! h1 + enddo !i + !$OMP END DO + + deallocate(tmp_2d) + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmp3) + deallocate(tmpval_1) + deallocate(tmpval_2) + deallocate(tmpvec_1) + deallocate(tmpvec_2) + deallocate(tmpvec_3) + + !$OMP END PARALLEL + endif + + no_aaa_contraction = -0.5d0 * no_aaa_contraction + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (no_aaa_contraction, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + no_aaa_contraction(p2,h2,p1,h1) -= no_aaa_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aaa_contraction(p2,h2,p1,h1) = -no_aaa_contraction(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + no_aaa_contraction(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + endif + + call wall_time(wall1) + print*,' Wall time for no_aaa_contraction', wall1-wall0 + +END_PROVIDER + +! --- diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index 4f190407..4404bc02 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -21,6 +21,7 @@ program tc_bi_ortho !call test_no() call test_no_aba() call test_no_aab() + call test_no_aaa() end subroutine test_h_u0 @@ -382,4 +383,43 @@ end ! --- +subroutine test_no_aaa() + implicit none + integer :: i, j, k, l + double precision :: accu, contrib, new, ref, thr + + print*, ' testing no_aaa_contraction ...' + + thr = 1d-8 + + PROVIDE no_aaa_contraction_v0 + PROVIDE no_aaa_contraction + + accu = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = no_aaa_contraction (l,k,j,i) + ref = no_aaa_contraction_v0(l,k,j,i) + contrib = dabs(new - ref) + accu += contrib + if(contrib .gt. thr) then + print*, ' problem on no_aaa_contraction' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + enddo + enddo + enddo + enddo + print*, ' accu on no_aaa_contraction = ', accu / dble(mo_num)**4 + + return +end + +! --- From 373c46303337fcaaea795425da5ea1cd53364c02 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 10 Jun 2023 18:09:20 +0200 Subject: [PATCH 171/337] Normal Ordering: Enhanced // --- src/tc_bi_ortho/normal_ordered.irp.f | 954 ++++++++++++++++++++++++- src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 8 +- 2 files changed, 951 insertions(+), 11 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index 7259c270..ca5875c9 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -1,7 +1,7 @@ ! --- -BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_num, mo_num)] +BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_v0, (mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC ! Normal ordering of the three body interaction on the HF density @@ -18,13 +18,13 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ integer, allocatable :: occ(:,:) integer(bit_kind), allocatable :: key_i_core(:,:) - print*,' Providing normal_two_body_bi_orth ...' + print*,' Providing normal_two_body_bi_orth_v0 ...' call wall_time(walli) if(read_tc_norm_ord) then - open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="read") - read(11) normal_two_body_bi_orth + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth_v0', action="read") + read(11) normal_two_body_bi_orth_v0 close(11) else @@ -318,7 +318,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ call wall_time(wall1) print*,' Wall time for aba_contraction', wall1-wall0 - normal_two_body_bi_orth = tmp + normal_two_body_bi_orth_v0 = tmp ! --- ! aab contraction @@ -491,12 +491,13 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ enddo enddo enddo + !$OMP END DO !$OMP END PARALLEL call wall_time(wall1) print*,' Wall time for aab_contraction', wall1-wall0 - normal_two_body_bi_orth += tmp + normal_two_body_bi_orth_v0 += tmp ! --- ! aaa contraction @@ -1002,9 +1003,948 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ enddo enddo enddo + !$OMP END DO !$OMP END PARALLEL - call wall_time(wallf) + call wall_time(wall1) + print*,' Wall time for aaa_contraction', wall1-wall0 + + normal_two_body_bi_orth_v0 += tmp + endif ! Ne(2) .ge. 3 + + deallocate(tmp) + + endif ! read_tc_norm_ord + + if(write_tc_norm_ord.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth_v0', action="write") + call ezfio_set_work_empty(.False.) + write(11) normal_two_body_bi_orth_v0 + close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') + endif + + call wall_time(wallf) + print*,' Wall time for normal_two_body_bi_orth_v0 ', wallf-walli + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! Normal ordering of the three body interaction on the HF density + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: hh1, hh2, pp1, pp2 + integer :: Ne(2) + double precision :: wall0, wall1, walli, wallf + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + + print*,' Providing normal_two_body_bi_orth ...' + call wall_time(walli) + + if(read_tc_norm_ord) then + + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="read") + read(11) normal_two_body_bi_orth + close(11) + + else + + double precision, allocatable :: tmp_2d(:,:), tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) + double precision, allocatable :: tmp(:,:,:,:) + + PROVIDE N_int + + allocate( occ(N_int*bit_kind_size,2) ) + allocate( key_i_core(N_int,2) ) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + allocate(tmp(mo_num,mo_num,mo_num,mo_num)) + + ! --- + ! aba contraction + + print*,' Providing aba_contraction ...' + call wall_time(wall0) + + tmp = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, h1, p1, h2, p2, i, ii, & + !$OMP tmp_3d, tmp_2d, tmp1, tmp2, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp) + + allocate(tmp_3d(mo_num,mo_num,mo_num), tmp_2d(mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num), tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid), tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3), tmpvec_2(n_points_final_grid,3)) + + tmp_3d = 0.d0 + tmp_2d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmpval_1 = 0.d0 + tmpval_2 = 0.d0 + tmpvec_1 = 0.d0 + tmpvec_2 = 0.d0 + + !$OMP DO + + do ii = 1, Ne(2) + i = occ(ii,2) + + do h1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + enddo ! p1 + enddo ! h1 + enddo ! i + + !$OMP END DO + + deallocate(tmp_3d, tmp_2d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2) + + !$OMP END PARALLEL + + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, h1, p1, h2, p2, i, ii, & + !$OMP tmp_3d, tmp_2d, tmp1, tmp2, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp) + + Allocate(tmp_3d(mo_num,mo_num,mo_num), tmp_2d(mo_num,mo_num)) + Allocate(tmp1(n_points_final_grid,3,mo_num), tmp2(n_points_final_grid,mo_num)) + Allocate(tmpval_1(n_points_final_grid), tmpval_2(n_points_final_grid)) + Allocate(tmpvec_1(n_points_final_grid,3), tmpvec_2(n_points_final_grid,3)) + + Tmp_3d = 0.d0 + Tmp_2d = 0.d0 + Tmp1 = 0.d0 + Tmp2 = 0.d0 + Tmpval_1 = 0.d0 + Tmpval_2 = 0.d0 + Tmpvec_1 = 0.d0 + Tmpvec_2 = 0.d0 + + !$OMP DO + + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + do h1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + enddo + + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + enddo + + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + enddo ! p1 + enddo ! h1 + enddo !i + !$OMP END DO + + deallocate(tmp_3d, tmp_2d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2) + + !$OMP END PARALLEL + endif + + tmp = -0.5d0 * tmp + call sum_A_At(tmp(1,1,1,1), mo_num*mo_num) + + call wall_time(wall1) + print*,' Wall time for aba_contraction', wall1-wall0 + + normal_two_body_bi_orth = tmp + + ! --- + ! aab contraction + + print*,' Providing aab_contraction ...' + call wall_time(wall0) + + tmp = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, ii, i, h1, p1, h2, p2, & + !$OMP tmp_2d, tmp_3d, tmp1, tmp2, & + !$OMP tmpval_1, tmpvec_1) & + !$OMP SHARED (n_points_final_grid, mo_num, Ne, occ, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp) + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + + tmp_2d = 0.d0 + tmp_3d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmpval_1 = 0.d0 + tmpvec_1 = 0.d0 + + !$OMP DO + + do ii = 1, Ne(2) + i = occ(ii,2) + + do h1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + enddo + + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + enddo + + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + enddo ! p1 + enddo ! h1 + enddo ! i + + !$OMP END DO + + deallocate(tmp_3d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1) + deallocate(tmpvec_1) + + !$OMP END PARALLEL + + tmp = -0.5d0 * tmp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (tmp, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + tmp(p2,h2,p1,h1) -= tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) = -tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print*,' Wall time for aab_contraction', wall1-wall0 + + normal_two_body_bi_orth += tmp + + ! --- + ! aaa contraction + + if(Ne(2) .ge. 3) then + + print*,' Providing aaa_contraction ...' + call wall_time(wall0) + + tmp = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, ii, h1, h2, p1, p2, & + !$OMP tmp_2d, tmp_3d, tmp1, tmp2, tmp3, & + !$OMP tmpval_1, tmpval_2, & + !$OMP tmpvec_1, tmpvec_2, tmpvec_3) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp) + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmpvec_3(n_points_final_grid,3)) + + tmp_2d = 0.d0 + tmp_3d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmp3 = 0.d0 + tmpval_1 = 0.d0 + tmpval_2 = 0.d0 + tmpvec_1 = 0.d0 + tmpvec_2 = 0.d0 + tmpvec_3 = 0.d0 + + !$OMP DO + do ii = 1, Ne(2) + i = occ(ii,2) + + do h1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + enddo ! p1 + enddo ! h1 + enddo ! i + !$OMP END DO + + deallocate(tmp_2d) + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmp3) + deallocate(tmpval_1) + deallocate(tmpval_2) + deallocate(tmpvec_1) + deallocate(tmpvec_2) + deallocate(tmpvec_3) + + !$OMP END PARALLEL + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, ii, h1, h2, p1, p2, & + !$OMP tmp_2d, tmp_3d, tmp1, tmp2, tmp3, & + !$OMP tmpval_1, tmpval_2, & + !$OMP tmpvec_1, tmpvec_2, tmpvec_3) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp) + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmpvec_3(n_points_final_grid,3)) + + tmp_2d = 0.d0 + tmp_3d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmp3 = 0.d0 + tmpval_1 = 0.d0 + tmpval_2 = 0.d0 + tmpvec_1 = 0.d0 + tmpvec_2 = 0.d0 + tmpvec_3 = 0.d0 + + !$OMP DO + + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + do h1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + + do p1 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + !$OMP END CRITICAL + enddo + enddo + enddo + + do p1 = 1, mo_num + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + + do h2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + do p2 = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + !$OMP CRITICAL + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + !$OMP END CRITICAL + enddo + enddo + + enddo ! p1 + enddo ! h1 + enddo !i + !$OMP END DO + + deallocate(tmp_2d) + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmp3) + deallocate(tmpval_1) + deallocate(tmpval_2) + deallocate(tmpvec_1) + deallocate(tmpvec_2) + deallocate(tmpvec_3) + + !$OMP END PARALLEL + endif + + tmp = -0.5d0 * tmp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (tmp, mo_num) + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + tmp(p2,h2,p1,h1) -= tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) = -tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) print*,' Wall time for aaa_contraction', wall1-wall0 normal_two_body_bi_orth += tmp diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index 4404bc02..902f7295 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -18,10 +18,10 @@ program tc_bi_ortho ! call timing_single ! call timing_double - !call test_no() - call test_no_aba() - call test_no_aab() - call test_no_aaa() + call test_no() + !call test_no_aba() + !call test_no_aab() + !call test_no_aaa() end subroutine test_h_u0 From 24f91e9bec8b255e9e790cb977d37c2d86877ce0 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 11 Jun 2023 11:41:48 +0200 Subject: [PATCH 172/337] Choose a port number based on PID --- ocaml/qp_run.ml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ocaml/qp_run.ml b/ocaml/qp_run.ml index b9d14efe..0cb862ae 100644 --- a/ocaml/qp_run.ml +++ b/ocaml/qp_run.ml @@ -38,7 +38,8 @@ let run slave ?prefix exe ezfio_file = | Unix.Unix_error _ -> try_new_port (port_number+100) in let result = - try_new_port 41279 + let port = 10*(Unix.getpid () mod 2823) + 32_769 in + try_new_port port in Zmq.Socket.close dummy_socket; Zmq.Context.terminate zmq_context; From 2f6c7e4ba00a7fb9a2dc6f1cabba87e8f036211f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 11 Jun 2023 12:19:39 +0200 Subject: [PATCH 173/337] Update test in FCI --- src/fci/40.fci.bats | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fci/40.fci.bats b/src/fci/40.fci.bats index 3c4a93c7..889bf90a 100644 --- a/src/fci/40.fci.bats +++ b/src/fci/40.fci.bats @@ -209,7 +209,7 @@ function run_stoch() { [[ -n $TRAVIS ]] && skip qp set_file cu_nh3_4_2plus.ezfio qp set_mo_class --core="[1-24]" --act="[25-45]" --del="[46-87]" - run -1862.9869374387192 3.e-04 100000 + run -1862.98320066637 3.e-04 100000 } @test "HCN" { # 20.3273s From bb23d6a5b5160387ef0695d1a07e7f4ef86f71b6 Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 12 Jun 2023 13:36:01 +0200 Subject: [PATCH 174/337] Fixed the pt_charges bug: + added the pt_charges integrals to the usual v_ne + added only the nuclei_pt_charge interaction to the usual nuclear_repulsion (and not the pt_charge_pt_charge interaction) --- src/ao_one_e_ints/pot_ao_ints.irp.f | 3 +++ src/hartree_fock/10.hf.bats | 5 +++-- src/nuclei/nuclei.irp.f | 7 ++++++- src/nuclei/point_charges.irp.f | 3 +++ 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/ao_one_e_ints/pot_ao_ints.irp.f b/src/ao_one_e_ints/pot_ao_ints.irp.f index 446bf730..4f9ae76d 100644 --- a/src/ao_one_e_ints/pot_ao_ints.irp.f +++ b/src/ao_one_e_ints/pot_ao_ints.irp.f @@ -104,6 +104,9 @@ BEGIN_PROVIDER [ double precision, ao_integrals_n_e, (ao_num,ao_num)] IF(do_pseudo) THEN ao_integrals_n_e += ao_pseudo_integrals ENDIF + IF(point_charges) THEN + ao_integrals_n_e += ao_integrals_pt_chrg + ENDIF endif diff --git a/src/hartree_fock/10.hf.bats b/src/hartree_fock/10.hf.bats index 3647b775..6e7d0233 100644 --- a/src/hartree_fock/10.hf.bats +++ b/src/hartree_fock/10.hf.bats @@ -43,12 +43,11 @@ python write_pt_charges.py ${EZFIO} qp set nuclei point_charges True qp run scf | tee ${EZFIO}.pt_charges.out energy="$(ezfio get hartree_fock energy)" -good=-92.76613324421798 +good=-92.79920682236470 eq $energy $good $thresh rm -rf $EZFIO } - @test "H2_1" { # 1s run h2_1.ezfio -1.005924963288527 } @@ -85,6 +84,8 @@ rm -rf $EZFIO run hcn.ezfio -92.88717500035233 } + + @test "B-B" { # 3s run b2_stretched.ezfio -48.9950585434279 } diff --git a/src/nuclei/nuclei.irp.f b/src/nuclei/nuclei.irp.f index fabdc42e..bb8cc782 100644 --- a/src/nuclei/nuclei.irp.f +++ b/src/nuclei/nuclei.irp.f @@ -206,7 +206,12 @@ BEGIN_PROVIDER [ double precision, nuclear_repulsion ] enddo nuclear_repulsion *= 0.5d0 if(point_charges)then - nuclear_repulsion += pt_chrg_nuclei_interaction + pt_chrg_interaction + print*,'bear nuclear repulsion = ',nuclear_repulsion + print*,'adding the interaction between the nuclein and the point charges' + print*,'to the usual nuclear repulsion ' + nuclear_repulsion += pt_chrg_nuclei_interaction + print*,'new nuclear repulsion = ',nuclear_repulsion + print*,'WARNING: we do not add the interaction between the point charges themselves' endif end if diff --git a/src/nuclei/point_charges.irp.f b/src/nuclei/point_charges.irp.f index b955537f..66905c8c 100644 --- a/src/nuclei/point_charges.irp.f +++ b/src/nuclei/point_charges.irp.f @@ -205,5 +205,8 @@ BEGIN_PROVIDER [ double precision, pt_chrg_nuclei_interaction] enddo print*,'Interaction between point charges and nuclei' print*,'pt_chrg_nuclei_interaction = ',pt_chrg_nuclei_interaction + if(point_charges)then + provide pt_chrg_interaction + endif END_PROVIDER From 4d9e28438c199c7f8956913b7380c5ba6ec07932 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 12 Jun 2023 14:05:36 +0200 Subject: [PATCH 175/337] Improved I/O in CCSD --- src/ao_two_e_ints/cholesky.irp.f | 13 ++---- src/ccsd/EZFIO.cfg | 11 +++++ src/ccsd/ccsd_space_orb_sub.irp.f | 11 +++-- src/ccsd/ccsd_spin_orb_sub.irp.f | 14 +++--- src/ccsd/save_energy.irp.f | 13 ++++++ src/mo_two_e_ints/cholesky.irp.f | 2 + src/utils/linear_algebra.irp.f | 6 +-- src/utils_cc/EZFIO.cfg | 16 +++---- src/utils_cc/guess_t.irp.f | 75 +++++++++++++++---------------- 9 files changed, 88 insertions(+), 73 deletions(-) create mode 100644 src/ccsd/EZFIO.cfg create mode 100644 src/ccsd/save_energy.irp.f diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index bb81b141..77eb6ddc 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -4,7 +4,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num_guess ] ! Number of Cholesky vectors in AO basis END_DOC - cholesky_ao_num_guess = ao_num*ao_num / 2 + cholesky_ao_num_guess = ao_num*ao_num END_PROVIDER BEGIN_PROVIDER [ integer, cholesky_ao_num ] @@ -44,19 +44,12 @@ END_PROVIDER do m=0,9 do l=1+m,ao_num,10 !$OMP DO SCHEDULE(dynamic) - do j=1,l + do j=1,ao_num do k=1,ao_num - do i=1,min(k,j) + do i=1,ao_num if (ao_two_e_integral_zero(i,j,k,l)) cycle integral = get_ao_two_e_integral(i,j,k,l, ao_integrals_map) ao_integrals(i,k,j,l) = integral - ao_integrals(k,i,j,l) = integral - ao_integrals(i,k,l,j) = integral - ao_integrals(k,i,l,j) = integral - ao_integrals(j,l,i,k) = integral - ao_integrals(j,l,k,i) = integral - ao_integrals(l,j,i,k) = integral - ao_integrals(l,j,k,i) = integral enddo enddo enddo diff --git a/src/ccsd/EZFIO.cfg b/src/ccsd/EZFIO.cfg new file mode 100644 index 00000000..328cd981 --- /dev/null +++ b/src/ccsd/EZFIO.cfg @@ -0,0 +1,11 @@ +[energy] +type: double precision +doc: CCSD energy +interface: ezfio + +[energy_t] +type: double precision +doc: CCSD(T) energy +interface: ezfio + + diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 1467d9a4..40c57188 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -135,8 +135,11 @@ subroutine run_ccsd_space_orb write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r print*,'' - call write_t1(nO,nV,t1) - call write_t2(nO,nV,t2) + if (write_amplitudes) then + call write_t1(nO,nV,t1) + call write_t2(nO,nV,t2) + call ezfio_set_utils_cc_io_amplitudes('Read') + endif ! Deallocation if (cc_update_method == 'diis') then @@ -147,6 +150,7 @@ subroutine run_ccsd_space_orb ! CCSD(T) double precision :: e_t + e_t = 0.d0 if (cc_par_t .and. elec_alpha_num + elec_beta_num > 2) then @@ -182,8 +186,7 @@ subroutine run_ccsd_space_orb print*,'' endif - print*,'Reference determinant:' - call print_det(det,N_int) + call save_energy(uncorr_energy + energy, e_t) deallocate(t1,t2) diff --git a/src/ccsd/ccsd_spin_orb_sub.irp.f b/src/ccsd/ccsd_spin_orb_sub.irp.f index 23e2cef1..a267cc45 100644 --- a/src/ccsd/ccsd_spin_orb_sub.irp.f +++ b/src/ccsd/ccsd_spin_orb_sub.irp.f @@ -269,8 +269,11 @@ subroutine run_ccsd_spin_orb write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r print*,'' - call write_t1(nO,nV,t1) - call write_t2(nO,nV,t2) + if (write_amplitudes) then + call write_t1(nO,nV,t1) + call write_t2(nO,nV,t2) + call ezfio_set_utils_cc_io_amplitudes('Read') + endif ! Deallocate if (cc_update_method == 'diis') then @@ -284,8 +287,9 @@ subroutine run_ccsd_spin_orb deallocate(v_ovoo,v_oovo) deallocate(v_ovvo,v_ovov,v_oovv) + double precision :: t_corr + t_corr = 0.d0 if (cc_par_t .and. elec_alpha_num +elec_beta_num > 2) then - double precision :: t_corr print*,'CCSD(T) calculation...' call wall_time(ta) !allocate(v_vvvo(nV,nV,nV,nO)) @@ -307,8 +311,8 @@ subroutine run_ccsd_spin_orb write(*,'(A15,F18.12,A3)') ' Correlation = ', energy + t_corr, ' Ha' print*,'' endif - print*,'Reference determinant:' - call print_det(det,N_int) + + call save_energy(uncorr_energy + energy, t_corr) deallocate(f_oo,f_ov,f_vv,f_o,f_v) deallocate(v_ooov,v_vvoo,t1,t2) diff --git a/src/ccsd/save_energy.irp.f b/src/ccsd/save_energy.irp.f new file mode 100644 index 00000000..30d93ec3 --- /dev/null +++ b/src/ccsd/save_energy.irp.f @@ -0,0 +1,13 @@ +subroutine save_energy(E,ET) + implicit none + BEGIN_DOC +! Saves the energy in |EZFIO|. + END_DOC + double precision, intent(in) :: E, ET + call ezfio_set_ccsd_energy(E) + if (ET /= 0.d0) then + call ezfio_set_ccsd_energy_t(E+ET) + endif +end + + diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 8b1e6e1c..32c0dccd 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -27,6 +27,8 @@ BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_ao_num, mo_num, double precision, allocatable :: buffer(:,:) print *, 'AO->MO Transformation of Cholesky vectors .' + + call set_multiple_levels_omp(.False.) !$OMP PARALLEL PRIVATE(i,j,k,buffer) allocate(buffer(mo_num,mo_num)) !$OMP DO SCHEDULE(static) diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index 69873bc0..76a539a6 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1831,7 +1831,7 @@ double precision, intent(in) :: tol integer, dimension(:), allocatable :: piv double precision, dimension(:), allocatable :: work -character, parameter :: uplo = "U" +character, parameter :: uplo = 'L' integer :: LDA integer :: info integer :: k, l, rank0 @@ -1848,14 +1848,14 @@ if (rank > rank0) then end if do k = 1, ndim - A(k+1:ndim, k) = 0.00D+0 + A(k,k+1:ndim) = 0.00D+0 end do ! TODO: It should be possible to use only one vector of size (1:rank) as a buffer ! to do the swapping in-place U(:,:) = 0.00D+0 do k = 1, ndim l = piv(k) - U(l, 1:rank) = A(1:rank, k) + U(l, 1:rank) = A(k,1:rank) end do end subroutine pivoted_cholesky diff --git a/src/utils_cc/EZFIO.cfg b/src/utils_cc/EZFIO.cfg index 71ee87e3..fb6d9034 100644 --- a/src/utils_cc/EZFIO.cfg +++ b/src/utils_cc/EZFIO.cfg @@ -46,17 +46,11 @@ doc: Guess used to initialize the T2 amplitudes. none -> 0, MP -> perturbation t interface: ezfio,ocaml,provider default: MP -[cc_write_t1] -type: logical -doc: If true, it will write on disk the T1 amplitudes at the end of the calculation. -interface: ezfio,ocaml,provider -default: False - -[cc_write_t2] -type: logical -doc: If true, it will write on disk the T2 amplitudes at the end of the calculation. -interface: ezfio,ocaml,provider -default: False +[io_amplitudes] +type: Disk_access +doc: Read/Write |CCSD| amplitudes from/to disk [ Write | Read | None ] +interface: ezfio,provider,ocaml +default: None [cc_par_t] type: logical diff --git a/src/utils_cc/guess_t.irp.f b/src/utils_cc/guess_t.irp.f index 42acdf78..bb26e133 100644 --- a/src/utils_cc/guess_t.irp.f +++ b/src/utils_cc/guess_t.irp.f @@ -91,16 +91,17 @@ subroutine write_t1(nO,nV,t1) double precision, intent(in) :: t1(nO, nV) ! internal - integer :: i,a + integer :: i,a, iunit + integer, external :: getunitandopen - if (cc_write_t1) then - open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T1') + if (write_amplitudes) then + iunit = getUnitAndOpen(trim(ezfio_filename)//'/work/T1','w') do a = 1, nV do i = 1, nO - write(11,'(F20.12)') t1(i,a) + write(iunit,'(F20.12)') t1(i,a) enddo enddo - close(11) + close(iunit) endif end @@ -120,20 +121,21 @@ subroutine write_t2(nO,nV,t2) double precision, intent(in) :: t2(nO, nO, nV, nV) ! internal - integer :: i,j,a,b + integer :: i,j,a,b, iunit + integer, external :: getunitandopen - if (cc_write_t2) then - open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T2') + if (write_amplitudes) then + iunit = getUnitAndOpen(trim(ezfio_filename)//'/work/T2','w') do b = 1, nV do a = 1, nV do j = 1, nO do i = 1, nO - write(11,'(F20.12)') t2(i,j,a,b) + write(iunit,'(F20.12)') t2(i,j,a,b) enddo enddo enddo enddo - close(11) + close(iunit) endif end @@ -153,23 +155,19 @@ subroutine read_t1(nO,nV,t1) double precision, intent(out) :: t1(nO, nV) ! internal - integer :: i,a + integer :: i,a, iunit logical :: ok + integer, external :: getunitandopen - inquire(file=trim(ezfio_filename)//'/cc_utils/T1', exist=ok) - if (.not. ok) then - print*, 'There is no file'// trim(ezfio_filename)//'/cc_utils/T1' - print*, 'Do a first calculation with cc_write_t1 = True' - print*, 'and cc_guess_t1 /= read before setting cc_guess_t1 = read' - call abort - endif - open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T1') - do a = 1, nV - do i = 1, nO - read(11,'(F20.12)') t1(i,a) + if (read_amplitudes) then + iunit = getUnitAndOpen(trim(ezfio_filename)//'/work/T1','r') + do a = 1, nV + do i = 1, nO + read(iunit,'(F20.12)') t1(i,a) + enddo enddo - enddo - close(11) + close(iunit) + endif end @@ -188,26 +186,23 @@ subroutine read_t2(nO,nV,t2) double precision, intent(out) :: t2(nO, nO, nV, nV) ! internal - integer :: i,j,a,b + integer :: i,j,a,b, iunit logical :: ok - inquire(file=trim(ezfio_filename)//'/cc_utils/T1', exist=ok) - if (.not. ok) then - print*, 'There is no file'// trim(ezfio_filename)//'/cc_utils/T1' - print*, 'Do a first calculation with cc_write_t2 = True' - print*, 'and cc_guess_t2 /= read before setting cc_guess_t2 = read' - call abort - endif - open(unit=11, file=trim(ezfio_filename)//'/cc_utils/T2') - do b = 1, nV - do a = 1, nV - do j = 1, nO - do i = 1, nO - read(11,'(F20.12)') t2(i,j,a,b) + integer, external :: getunitandopen + + if (read_amplitudes) then + iunit = getUnitAndOpen(trim(ezfio_filename)//'/work/T2','r') + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + read(iunit,'(F20.12)') t2(i,j,a,b) + enddo enddo enddo enddo - enddo - close(11) + close(iunit) + endif end From 2f246780eb0a08ee87ab5d98fe9c0e2f17685594 Mon Sep 17 00:00:00 2001 From: ydamour Date: Tue, 13 Jun 2023 14:05:13 +0200 Subject: [PATCH 176/337] fix bug in get_excitation_general --- src/utils_cc/org/phase.org | 2 ++ src/utils_cc/phase.irp.f | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/utils_cc/org/phase.org b/src/utils_cc/org/phase.org index 5f67859c..2156a251 100644 --- a/src/utils_cc/org/phase.org +++ b/src/utils_cc/org/phase.org @@ -137,6 +137,7 @@ subroutine get_excitation_general(det1,det2,degree,n,list_anni,list_crea,phase,N do j = 1, 2 k = 1 do i = 1, n1(j) + if (k > n_anni(j)) exit if (l1(i,j) /= list_anni(k,j)) cycle pos_anni(k,j) = i k = k + 1 @@ -147,6 +148,7 @@ subroutine get_excitation_general(det1,det2,degree,n,list_anni,list_crea,phase,N do j = 1, 2 k = 1 do i = 1, n2(j) + if (k > n_crea(j)) exit if (l2(i,j) /= list_crea(k,j)) cycle pos_crea(k,j) = i k = k + 1 diff --git a/src/utils_cc/phase.irp.f b/src/utils_cc/phase.irp.f index 01b41f49..e0703fb8 100644 --- a/src/utils_cc/phase.irp.f +++ b/src/utils_cc/phase.irp.f @@ -96,6 +96,7 @@ subroutine get_excitation_general(det1,det2,degree,n,list_anni,list_crea,phase,N do j = 1, 2 k = 1 do i = 1, n1(j) + if (k > n_anni(j)) exit if (l1(i,j) /= list_anni(k,j)) cycle pos_anni(k,j) = i k = k + 1 @@ -106,6 +107,7 @@ subroutine get_excitation_general(det1,det2,degree,n,list_anni,list_crea,phase,N do j = 1, 2 k = 1 do i = 1, n2(j) + if (k > n_crea(j)) exit if (l2(i,j) /= list_crea(k,j)) cycle pos_crea(k,j) = i k = k + 1 From a56644a808e0aea4c16d72c61b717ac4b2e0cabc Mon Sep 17 00:00:00 2001 From: ydamour Date: Tue, 13 Jun 2023 14:24:39 +0200 Subject: [PATCH 177/337] remove old stuffs --- src/mo_optimization/my_providers.irp.f | 141 ------------------------- 1 file changed, 141 deletions(-) delete mode 100644 src/mo_optimization/my_providers.irp.f diff --git a/src/mo_optimization/my_providers.irp.f b/src/mo_optimization/my_providers.irp.f deleted file mode 100644 index 7469ffd5..00000000 --- a/src/mo_optimization/my_providers.irp.f +++ /dev/null @@ -1,141 +0,0 @@ -! Dimensions of MOs - - -BEGIN_PROVIDER [ integer, n_mo_dim ] - implicit none - BEGIN_DOC - ! Number of different pairs (i,j) of MOs we can build, - ! with i>j - END_DOC - - n_mo_dim = mo_num*(mo_num-1)/2 - -END_PROVIDER - -BEGIN_PROVIDER [ integer, n_mo_dim_core ] - implicit none - BEGIN_DOC - ! Number of different pairs (i,j) of core MOs we can build, - ! with i>j - END_DOC - - n_mo_dim_core = dim_list_core_orb*(dim_list_core_orb-1)/2 - -END_PROVIDER - -BEGIN_PROVIDER [ integer, n_mo_dim_act ] - implicit none - BEGIN_DOC - ! Number of different pairs (i,j) of active MOs we can build, - ! with i>j - END_DOC - - n_mo_dim_act = dim_list_act_orb*(dim_list_act_orb-1)/2 - -END_PROVIDER - -BEGIN_PROVIDER [ integer, n_mo_dim_inact ] - implicit none - BEGIN_DOC - ! Number of different pairs (i,j) of inactive MOs we can build, - ! with i>j - END_DOC - - n_mo_dim_inact = dim_list_inact_orb*(dim_list_inact_orb-1)/2 - -END_PROVIDER - -BEGIN_PROVIDER [ integer, n_mo_dim_virt ] - implicit none - BEGIN_DOC - ! Number of different pairs (i,j) of virtual MOs we can build, - ! with i>j - END_DOC - - n_mo_dim_virt = dim_list_virt_orb*(dim_list_virt_orb-1)/2 - -END_PROVIDER - -! Energies/criterions - -BEGIN_PROVIDER [ double precision, my_st_av_energy ] - implicit none - BEGIN_DOC - ! State average CI energy - END_DOC - - !call update_st_av_ci_energy(my_st_av_energy) - call state_average_energy(my_st_av_energy) - -END_PROVIDER - -! With all the MOs - -BEGIN_PROVIDER [ double precision, my_gradient_opt, (n_mo_dim) ] -&BEGIN_PROVIDER [ double precision, my_CC1_opt ] - implicit none - BEGIN_DOC - ! - Gradient of the energy with respect to the MO rotations, for all the MOs. - ! - Maximal element of the gradient in absolute value - END_DOC - - double precision :: norm_grad - - PROVIDE mo_two_e_integrals_in_map - - call gradient_opt(n_mo_dim, my_gradient_opt, my_CC1_opt, norm_grad) - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, my_hessian_opt, (n_mo_dim, n_mo_dim) ] - implicit none - BEGIN_DOC - ! - Gradient of the energy with respect to the MO rotations, for all the MOs. - ! - Maximal element of the gradient in absolute value - END_DOC - - double precision, allocatable :: h_f(:,:,:,:) - - PROVIDE mo_two_e_integrals_in_map - - allocate(h_f(mo_num, mo_num, mo_num, mo_num)) - - call hessian_list_opt(n_mo_dim, my_hessian_opt, h_f) - -END_PROVIDER - -! With the list of active MOs -! Can be generalized to any mo_class by changing the list/dimension - -BEGIN_PROVIDER [ double precision, my_gradient_list_opt, (n_mo_dim_act) ] -&BEGIN_PROVIDER [ double precision, my_CC2_opt ] - implicit none - BEGIN_DOC - ! - Gradient of the energy with respect to the MO rotations, only for the active MOs ! - ! - Maximal element of the gradient in absolute value - END_DOC - - double precision :: norm_grad - - PROVIDE mo_two_e_integrals_in_map !one_e_dm_mo two_e_dm_mo mo_one_e_integrals - - call gradient_list_opt(n_mo_dim_act, dim_list_act_orb, list_act, my_gradient_list_opt, my_CC2_opt, norm_grad) - -END_PROVIDER - -BEGIN_PROVIDER [ double precision, my_hessian_list_opt, (n_mo_dim_act, n_mo_dim_act) ] - implicit none - BEGIN_DOC - ! - Gradient of the energy with respect to the MO rotations, only for the active MOs ! - ! - Maximal element of the gradient in absolute value - END_DOC - - double precision, allocatable :: h_f(:,:,:,:) - - PROVIDE mo_two_e_integrals_in_map - - allocate(h_f(dim_list_act_orb, dim_list_act_orb, dim_list_act_orb, dim_list_act_orb)) - - call hessian_list_opt(n_mo_dim_act, dim_list_act_orb, list_act, my_hessian_list_opt, h_f) - -END_PROVIDER From a4834d0acee2b18820a9efea2a090e6c9e804c33 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 13 Jun 2023 16:01:05 +0200 Subject: [PATCH 178/337] Allow merge with master --- src/davidson_keywords/usef.irp.f | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/davidson_keywords/usef.irp.f b/src/davidson_keywords/usef.irp.f index fed2ba9b..7ca2d203 100644 --- a/src/davidson_keywords/usef.irp.f +++ b/src/davidson_keywords/usef.irp.f @@ -13,7 +13,9 @@ BEGIN_PROVIDER [ integer, nthreads_davidson ] character*(32) :: env call getenv('QP_NTHREADS_DAVIDSON',env) if (trim(env) /= '') then + call lock_io read(env,*) nthreads_davidson + call unlock_io call write_int(6,nthreads_davidson,'Target number of threads for ') endif END_PROVIDER From 88f168724e65038b4d9ce9d4a566252de62f1fb5 Mon Sep 17 00:00:00 2001 From: ydamour Date: Thu, 15 Jun 2023 14:46:17 +0200 Subject: [PATCH 179/337] fix binary search (T) --- src/ccsd/ccsd_t_space_orb_stoch.irp.f | 82 +++++++++++++++------------ 1 file changed, 47 insertions(+), 35 deletions(-) diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f index b669025e..31fe67ce 100644 --- a/src/ccsd/ccsd_t_space_orb_stoch.irp.f +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -104,17 +104,17 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ integer*8, allocatable :: sampled(:) ! integer(omp_lock_kind), allocatable :: lock(:) integer*2 , allocatable :: abc(:,:) - integer*8 :: Nabc, i8 + integer*8 :: Nabc, i8,kiter integer*8, allocatable :: iorder(:) double precision :: eocc double precision :: norm - integer :: kiter, isample + integer :: isample ! Prepare table of triplets (a,b,c) Nabc = (int(nV,8) * int(nV+1,8) * int(nV+2,8))/6_8 - nV - allocate (memo(Nabc), sampled(Nabc), Pabc(Nabc), waccu(Nabc)) + allocate (memo(Nabc), sampled(Nabc), Pabc(Nabc), waccu(0:Nabc)) allocate (abc(4,Nabc), iorder(Nabc)) !, lock(Nabc)) ! eocc = 3.d0/dble(nO) * sum(f_o(1:nO)) @@ -124,21 +124,21 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ do c = b+1, nV Nabc = Nabc + 1_8 Pabc(Nabc) = -1.d0/(f_v(a) + f_v(b) + f_v(c)) - abc(1,Nabc) = a - abc(2,Nabc) = b - abc(3,Nabc) = c + abc(1,Nabc) = int(a,2) + abc(2,Nabc) = int(b,2) + abc(3,Nabc) = int(c,2) enddo Nabc = Nabc + 1_8 - abc(1,Nabc) = a - abc(2,Nabc) = b - abc(3,Nabc) = a + abc(1,Nabc) = int(a,2) + abc(2,Nabc) = int(b,2) + abc(3,Nabc) = int(a,2) Pabc(Nabc) = -1.d0/(2.d0*f_v(a) + f_v(b)) Nabc = Nabc + 1_8 - abc(1,Nabc) = b - abc(2,Nabc) = a - abc(3,Nabc) = b + abc(1,Nabc) = int(b,2) + abc(2,Nabc) = int(a,2) + abc(3,Nabc) = int(b,2) Pabc(Nabc) = -1.d0/(f_v(a) + 2.d0*f_v(b)) enddo enddo @@ -169,6 +169,7 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ waccu(i8) = waccu(i8+1) - Pabc(i8+1) enddo waccu(:) = waccu(:) + 1.d0 + waccu(0) = 0.d0 logical :: converged, do_comp double precision :: eta, variance, error, sample @@ -222,8 +223,12 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ do kiter=1,Nabc !$OMP MASTER - do while ((imin <= Nabc).and.(sampled(imin)>-1_8)) - imin = imin+1 + do while (imin <= Nabc) + if (sampled(imin)>-1_8) then + imin = imin+1 + else + exit + endif enddo ! Deterministic part @@ -301,6 +306,7 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ endif enddo + isample = min(isample,nbuckets) do ieta=bounds(1,isample), Nabc w = dble(max(sampled(ieta),0_8)) tmp = w * memo(ieta) * Pabc(ieta) @@ -331,33 +337,39 @@ end -integer*8 function binary_search(arr, key, size) +integer*8 function binary_search(arr, key, sze) implicit none BEGIN_DOC -! Searches the key in array arr(1:size) between l_in and r_in, and returns its index +! Searches the key in array arr(1:sze) between l_in and r_in, and returns its index END_DOC - integer*8 :: size, i, j, mid, l_in, r_in - double precision, dimension(size) :: arr(1:size) + integer*8 :: sze, i, j, mid + double precision :: arr(0:sze) double precision :: key - i = 1_8 - j = size + if ( key < arr(1) ) then + binary_search = 0_8 + return + end if - do while (j >= i) - mid = i + (j - i) / 2 - if (arr(mid) >= key) then - if (mid > 1 .and. arr(mid - 1) < key) then - binary_search = mid - return - end if - j = mid - 1 - else if (arr(mid) < key) then - i = mid + 1 - else - binary_search = mid + 1 - return - end if + if ( key >= arr(sze) ) then + binary_search = sze + return + end if + + i = 0_8 + j = sze + 1_8 + + do while (.True.) + mid = (i + j) / 2_8 + if ( key >= arr(mid) ) then + i = mid + else + j = mid + end if + if (j-i <= 1_8) then + binary_search = i + return + endif end do - binary_search = i end function binary_search From 71f6163c40d70b4f35bd65f221f4da7b370149df Mon Sep 17 00:00:00 2001 From: eginer Date: Sun, 18 Jun 2023 20:28:48 +0200 Subject: [PATCH 180/337] added some comments for normal ordering old --- src/tc_bi_ortho/normal_ordered_old.irp.f | 10 +++++- src/tc_bi_ortho/test_normal_order.irp.f | 43 ++++++++++++++++++++---- 2 files changed, 45 insertions(+), 8 deletions(-) diff --git a/src/tc_bi_ortho/normal_ordered_old.irp.f b/src/tc_bi_ortho/normal_ordered_old.irp.f index 417580dd..6ee21a14 100644 --- a/src/tc_bi_ortho/normal_ordered_old.irp.f +++ b/src/tc_bi_ortho/normal_ordered_old.irp.f @@ -120,6 +120,13 @@ END_PROVIDER subroutine give_aba_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) use bitmasks ! you need to include the bitmasks_module.f90 features + BEGIN_DOC +! give the contribution for a double excitation of opposite spin BUT averaged over spin +! +! it is the average of and +! +! because the orbitals h1,h2,p1,p2 are spatial orbitals and therefore can be of different spins + END_DOC implicit none integer, intent(in) :: Nint, h1, h2, p1, p2 @@ -158,7 +165,8 @@ subroutine give_aba_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) int_exc_12 = -1.d0 * integral - hthree += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) + hthree += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) ! spin average +! hthree += 1.d0 * int_direct - 1.0d0 * (int_exc_13 + int_exc_12) enddo return diff --git a/src/tc_bi_ortho/test_normal_order.irp.f b/src/tc_bi_ortho/test_normal_order.irp.f index cb0c355c..ac84dbc6 100644 --- a/src/tc_bi_ortho/test_normal_order.irp.f +++ b/src/tc_bi_ortho/test_normal_order.irp.f @@ -20,7 +20,7 @@ subroutine test integer :: h1,h2,p1,p2,s1,s2,i_ok,degree,Ne(2) integer :: exc(0:2,2,2) integer(bit_kind), allocatable :: det_i(:,:) - double precision :: hmono,htwoe,hthree,htilde_ij,accu,phase,normal + double precision :: hmono,htwoe,hthree,htilde_ij,accu,phase,normal,hthree_tmp integer, allocatable :: occ(:,:) allocate( occ(N_int*bit_kind_size,2) ) call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) @@ -32,15 +32,44 @@ subroutine test do p1 = elec_alpha_num+1, mo_num do h2 = 1, elec_beta_num do p2 = elec_beta_num+1, mo_num + hthree = 0.d0 + det_i = ref_bitmask + s1 = 1 + s2 = 2 call do_single_excitation(det_i,h1,p1,s1,i_ok) + if(i_ok.ne.1)cycle call do_single_excitation(det_i,h2,p2,s2,i_ok) - call htilde_mu_mat_bi_ortho_slow(det_i,HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + if(i_ok.ne.1)cycle + call htilde_mu_mat_bi_ortho_slow(det_i,HF_bitmask,N_int,hmono,htwoe,hthree_tmp,htilde_ij) call get_excitation_degree(ref_bitmask,det_i,degree,N_int) call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) - hthree *= phase -! !normal = normal_two_body_bi_orth_ab(p2,h2,p1,h1) - call three_comp_two_e_elem(det_i,h1,h2,p1,p2,s1,s2,normal) + hthree_tmp *= phase + hthree += 0.5d0 * hthree_tmp + det_i = ref_bitmask + s1 = 2 + s2 = 1 + call do_single_excitation(det_i,h1,p1,s1,i_ok) + if(i_ok.ne.1)cycle + call do_single_excitation(det_i,h2,p2,s2,i_ok) + if(i_ok.ne.1)cycle + call htilde_mu_mat_bi_ortho_slow(det_i,HF_bitmask,N_int,hmono,htwoe,hthree_tmp,htilde_ij) + call get_excitation_degree(ref_bitmask,det_i,degree,N_int) + call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) + hthree_tmp *= phase + hthree += 0.5d0 * hthree_tmp + + +! normal = normal_two_body_bi_orth_ab(p2,h2,p1,h1) + call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, normal) + if(dabs(hthree).lt.1.d-10)cycle + if(dabs(hthree-normal).gt.1.d-10)then +! print*,pp2,pp1,hh2,hh1 + print*,p2,p1,h2,h1 + print*,hthree,normal,dabs(hthree-normal) + stop + endif +! call three_comp_two_e_elem(det_i,h1,h2,p1,p2,s1,s2,normal) ! normal = eff_2_e_from_3_e_ab(p2,p1,h2,h1) accu += dabs(hthree-normal) enddo @@ -73,8 +102,8 @@ do h1 = 1, elec_alpha_num integer :: hh1, pp1, hh2, pp2, ss1, ss2 call decode_exc(exc, 2, hh1, pp1, hh2, pp2, ss1, ss2) hthree *= phase -! normal = normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1) - normal = eff_2_e_from_3_e_aa(p2,p1,h2,h1) + normal = normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1) +! normal = eff_2_e_from_3_e_aa(p2,p1,h2,h1) if(dabs(hthree).lt.1.d-10)cycle if(dabs(hthree-normal).gt.1.d-10)then print*,pp2,pp1,hh2,hh1 From b2e44beb3e11cacc9594a58da5c7ed4295506092 Mon Sep 17 00:00:00 2001 From: eginer Date: Sun, 18 Jun 2023 21:42:40 +0200 Subject: [PATCH 181/337] added casscf_cipsi --- src/casscf_cipsi/50.casscf.bats | 49 +++ src/casscf_cipsi/EZFIO.cfg | 75 ++++ src/casscf_cipsi/NEED | 5 + src/casscf_cipsi/README.rst | 5 + src/casscf_cipsi/bavard.irp.f | 6 + src/casscf_cipsi/bielec.irp.f | 155 +++++++ src/casscf_cipsi/bielec_natorb.irp.f | 369 +++++++++++++++++ src/casscf_cipsi/casscf.irp.f | 110 +++++ src/casscf_cipsi/class.irp.f | 12 + src/casscf_cipsi/dav_sx_mat.irp.f | 45 +++ src/casscf_cipsi/densities.irp.f | 67 +++ src/casscf_cipsi/densities_peter.irp.f | 150 +++++++ src/casscf_cipsi/det_manip.irp.f | 125 ++++++ src/casscf_cipsi/driver_optorb.irp.f | 3 + src/casscf_cipsi/get_energy.irp.f | 51 +++ src/casscf_cipsi/grad_old.irp.f | 74 ++++ src/casscf_cipsi/gradient.irp.f | 215 ++++++++++ src/casscf_cipsi/hessian.irp.f | 539 +++++++++++++++++++++++++ src/casscf_cipsi/hessian_old.irp.f | 310 ++++++++++++++ src/casscf_cipsi/mcscf_fock.irp.f | 80 ++++ src/casscf_cipsi/natorb.irp.f | 231 +++++++++++ src/casscf_cipsi/neworbs.irp.f | 253 ++++++++++++ src/casscf_cipsi/reorder_orb.irp.f | 70 ++++ src/casscf_cipsi/save_energy.irp.f | 9 + src/casscf_cipsi/superci_dm.irp.f | 207 ++++++++++ src/casscf_cipsi/swap_orb.irp.f | 132 ++++++ src/casscf_cipsi/tot_en.irp.f | 101 +++++ 27 files changed, 3448 insertions(+) create mode 100644 src/casscf_cipsi/50.casscf.bats create mode 100644 src/casscf_cipsi/EZFIO.cfg create mode 100644 src/casscf_cipsi/NEED create mode 100644 src/casscf_cipsi/README.rst create mode 100644 src/casscf_cipsi/bavard.irp.f create mode 100644 src/casscf_cipsi/bielec.irp.f create mode 100644 src/casscf_cipsi/bielec_natorb.irp.f create mode 100644 src/casscf_cipsi/casscf.irp.f create mode 100644 src/casscf_cipsi/class.irp.f create mode 100644 src/casscf_cipsi/dav_sx_mat.irp.f create mode 100644 src/casscf_cipsi/densities.irp.f create mode 100644 src/casscf_cipsi/densities_peter.irp.f create mode 100644 src/casscf_cipsi/det_manip.irp.f create mode 100644 src/casscf_cipsi/driver_optorb.irp.f create mode 100644 src/casscf_cipsi/get_energy.irp.f create mode 100644 src/casscf_cipsi/grad_old.irp.f create mode 100644 src/casscf_cipsi/gradient.irp.f create mode 100644 src/casscf_cipsi/hessian.irp.f create mode 100644 src/casscf_cipsi/hessian_old.irp.f create mode 100644 src/casscf_cipsi/mcscf_fock.irp.f create mode 100644 src/casscf_cipsi/natorb.irp.f create mode 100644 src/casscf_cipsi/neworbs.irp.f create mode 100644 src/casscf_cipsi/reorder_orb.irp.f create mode 100644 src/casscf_cipsi/save_energy.irp.f create mode 100644 src/casscf_cipsi/superci_dm.irp.f create mode 100644 src/casscf_cipsi/swap_orb.irp.f create mode 100644 src/casscf_cipsi/tot_en.irp.f diff --git a/src/casscf_cipsi/50.casscf.bats b/src/casscf_cipsi/50.casscf.bats new file mode 100644 index 00000000..a0db725d --- /dev/null +++ b/src/casscf_cipsi/50.casscf.bats @@ -0,0 +1,49 @@ +#!/usr/bin/env bats + +source $QP_ROOT/tests/bats/common.bats.sh +source $QP_ROOT/quantum_package.rc + + +function run_stoch() { + thresh=$2 + test_exe casscf || skip + qp set perturbation do_pt2 True + qp set determinants n_det_max $3 + qp set davidson threshold_davidson 1.e-10 + qp set davidson n_states_diag 4 + qp run casscf | tee casscf.out + energy1="$(ezfio get casscf energy_pt2 | tr '[]' ' ' | cut -d ',' -f 1)" + eq $energy1 $1 $thresh +} + +@test "F2" { # 18.0198s + rm -rf f2_casscf + qp_create_ezfio -b aug-cc-pvdz ../input/f2.zmt -o f2_casscf + qp set_file f2_casscf + qp run scf + qp set_mo_class --core="[1-6,8-9]" --act="[7,10]" --virt="[11-46]" + run_stoch -198.773366970 1.e-4 100000 +} + +@test "N2" { # 18.0198s + rm -rf n2_casscf + qp_create_ezfio -b aug-cc-pvdz ../input/n2.xyz -o n2_casscf + qp set_file n2_casscf + qp run scf + qp set_mo_class --core="[1-4]" --act="[5-10]" --virt="[11-46]" + run_stoch -109.0961643162 1.e-4 100000 +} + +@test "N2_stretched" { + rm -rf n2_stretched_casscf + qp_create_ezfio -b aug-cc-pvdz -m 7 ../input/n2_stretched.xyz -o n2_stretched_casscf + qp set_file n2_stretched_casscf + qp run scf | tee scf.out + qp set_mo_class --core="[1-4]" --act="[5-10]" --virt="[11-46]" + qp set electrons elec_alpha_num 7 + qp set electrons elec_beta_num 7 + run_stoch -108.7860471300 1.e-4 100000 +# + +} + diff --git a/src/casscf_cipsi/EZFIO.cfg b/src/casscf_cipsi/EZFIO.cfg new file mode 100644 index 00000000..2a1f1926 --- /dev/null +++ b/src/casscf_cipsi/EZFIO.cfg @@ -0,0 +1,75 @@ +[energy] +type: double precision +doc: Calculated Selected |FCI| energy +interface: ezfio +size: (determinants.n_states) + +[energy_pt2] +type: double precision +doc: Calculated |FCI| energy + |PT2| +interface: ezfio +size: (determinants.n_states) + +[state_following_casscf] +type: logical +doc: If |true|, the CASSCF will try to follow the guess CI vector and orbitals +interface: ezfio,provider,ocaml +default: False + + +[diag_hess_cas] +type: logical +doc: If |true|, only the DIAGONAL part of the hessian is retained for the CASSCF +interface: ezfio,provider,ocaml +default: False + +[hess_cv_cv] +type: logical +doc: If |true|, the core-virtual - core-virtual part of the hessian is computed +interface: ezfio,provider,ocaml +default: True + + +[level_shift_casscf] +type: Positive_float +doc: Energy shift on the virtual MOs to improve SCF convergence +interface: ezfio,provider,ocaml +default: 0.005 + + +[fast_2rdm] +type: logical +doc: If true, the two-rdm are computed with a fast algo +interface: ezfio,provider,ocaml +default: True + +[criterion_casscf] +type: character*(32) +doc: choice of the criterion for the convergence of the casscf: can be energy or gradients or e_pt2 +interface: ezfio, provider, ocaml +default: e_pt2 + +[thresh_casscf] +type: Threshold +doc: Threshold on the convergence of the CASCF energy. +interface: ezfio,provider,ocaml +default: 1.e-06 + + +[pt2_min_casscf] +type: Threshold +doc: Minimum value of the pt2_max parameter for the CIPSI in the CASSCF iterations. +interface: ezfio,provider,ocaml +default: 1.e-04 + +[n_big_act_orb] +type: integer +doc: Number of active orbitals from which the active space is considered as large, and therefore pt2_min_casscf is activated. +interface: ezfio,provider,ocaml +default: 16 + +[adaptive_pt2_max] +type: logical +doc: If |true|, the pt2_max value in the CIPSI iterations will automatically adapt, otherwise it is fixed at the value given in the EZFIO folder +interface: ezfio,provider,ocaml +default: True diff --git a/src/casscf_cipsi/NEED b/src/casscf_cipsi/NEED new file mode 100644 index 00000000..dd91c7bd --- /dev/null +++ b/src/casscf_cipsi/NEED @@ -0,0 +1,5 @@ +cipsi +selectors_full +generators_cas +two_body_rdm +dav_general_mat diff --git a/src/casscf_cipsi/README.rst b/src/casscf_cipsi/README.rst new file mode 100644 index 00000000..08bfd95b --- /dev/null +++ b/src/casscf_cipsi/README.rst @@ -0,0 +1,5 @@ +====== +casscf +====== + +|CASSCF| program with the CIPSI algorithm. diff --git a/src/casscf_cipsi/bavard.irp.f b/src/casscf_cipsi/bavard.irp.f new file mode 100644 index 00000000..463c3ea4 --- /dev/null +++ b/src/casscf_cipsi/bavard.irp.f @@ -0,0 +1,6 @@ +! -*- F90 -*- +BEGIN_PROVIDER [logical, bavard] +! bavard=.true. + bavard=.false. +END_PROVIDER + diff --git a/src/casscf_cipsi/bielec.irp.f b/src/casscf_cipsi/bielec.irp.f new file mode 100644 index 00000000..0a44f994 --- /dev/null +++ b/src/casscf_cipsi/bielec.irp.f @@ -0,0 +1,155 @@ +BEGIN_PROVIDER [real*8, bielec_PQxx, (mo_num, mo_num,n_core_inact_act_orb,n_core_inact_act_orb)] + BEGIN_DOC + ! bielec_PQxx : integral (pq|xx) with p,q arbitrary, x core or active + ! indices are unshifted orbital numbers + END_DOC + implicit none + integer :: i,j,ii,jj,p,q,i3,j3,t3,v3 + real*8 :: mo_two_e_integral + + bielec_PQxx(:,:,:,:) = 0.d0 + PROVIDE mo_two_e_integrals_in_map + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,ii,j,jj,i3,j3) & + !$OMP SHARED(n_core_inact_orb,list_core_inact,mo_num,bielec_PQxx, & + !$OMP n_act_orb,mo_integrals_map,list_act) + + !$OMP DO + do i=1,n_core_inact_orb + ii=list_core_inact(i) + do j=i,n_core_inact_orb + jj=list_core_inact(j) + call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx(1,1,i,j),mo_integrals_map) + bielec_PQxx(:,:,j,i)=bielec_PQxx(:,:,i,j) + end do + do j=1,n_act_orb + jj=list_act(j) + j3=j+n_core_inact_orb + call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx(1,1,i,j3),mo_integrals_map) + bielec_PQxx(:,:,j3,i)=bielec_PQxx(:,:,i,j3) + end do + end do + !$OMP END DO + + + !$OMP DO + do i=1,n_act_orb + ii=list_act(i) + i3=i+n_core_inact_orb + do j=i,n_act_orb + jj=list_act(j) + j3=j+n_core_inact_orb + call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx(1,1,i3,j3),mo_integrals_map) + bielec_PQxx(:,:,j3,i3)=bielec_PQxx(:,:,i3,j3) + end do + end do + !$OMP END DO + + !$OMP END PARALLEL + +END_PROVIDER + + + +BEGIN_PROVIDER [real*8, bielec_PxxQ, (mo_num,n_core_inact_act_orb,n_core_inact_act_orb, mo_num)] + BEGIN_DOC + ! bielec_PxxQ : integral (px|xq) with p,q arbitrary, x core or active + ! indices are unshifted orbital numbers + END_DOC + implicit none + integer :: i,j,ii,jj,p,q,i3,j3,t3,v3 + double precision, allocatable :: integrals_array(:,:) + real*8 :: mo_two_e_integral + + PROVIDE mo_two_e_integrals_in_map + bielec_PxxQ = 0.d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,ii,j,jj,i3,j3,integrals_array) & + !$OMP SHARED(n_core_inact_orb,list_core_inact,mo_num,bielec_PxxQ, & + !$OMP n_act_orb,mo_integrals_map,list_act) + + allocate(integrals_array(mo_num,mo_num)) + + !$OMP DO + do i=1,n_core_inact_orb + ii=list_core_inact(i) + do j=i,n_core_inact_orb + jj=list_core_inact(j) + call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map) + do q=1,mo_num + do p=1,mo_num + bielec_PxxQ(p,i,j,q)=integrals_array(p,q) + bielec_PxxQ(p,j,i,q)=integrals_array(q,p) + end do + end do + end do + do j=1,n_act_orb + jj=list_act(j) + j3=j+n_core_inact_orb + call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map) + do q=1,mo_num + do p=1,mo_num + bielec_PxxQ(p,i,j3,q)=integrals_array(p,q) + bielec_PxxQ(p,j3,i,q)=integrals_array(q,p) + end do + end do + end do + end do + !$OMP END DO + + + ! (ip|qj) + !$OMP DO + do i=1,n_act_orb + ii=list_act(i) + i3=i+n_core_inact_orb + do j=i,n_act_orb + jj=list_act(j) + j3=j+n_core_inact_orb + call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map) + do q=1,mo_num + do p=1,mo_num + bielec_PxxQ(p,i3,j3,q)=integrals_array(p,q) + bielec_PxxQ(p,j3,i3,q)=integrals_array(q,p) + end do + end do + end do + end do + !$OMP END DO + + deallocate(integrals_array) + !$OMP END PARALLEL + +END_PROVIDER + + +BEGIN_PROVIDER [real*8, bielecCI, (n_act_orb,n_act_orb,n_act_orb, mo_num)] + BEGIN_DOC + ! bielecCI : integrals (tu|vp) with p arbitrary, tuv active + ! index p runs over the whole basis, t,u,v only over the active orbitals + END_DOC + implicit none + integer :: i,j,k,p,t,u,v + double precision, external :: mo_two_e_integral + PROVIDE mo_two_e_integrals_in_map + + !$OMP PARALLEL DO DEFAULT(NONE) & + !$OMP PRIVATE(i,j,k,p,t,u,v) & + !$OMP SHARED(mo_num,n_act_orb,list_act,bielecCI) + do p=1,mo_num + do j=1,n_act_orb + u=list_act(j) + do k=1,n_act_orb + v=list_act(k) + do i=1,n_act_orb + t=list_act(i) + bielecCI(i,k,j,p) = mo_two_e_integral(t,u,v,p) + end do + end do + end do + end do + !$OMP END PARALLEL DO + +END_PROVIDER diff --git a/src/casscf_cipsi/bielec_natorb.irp.f b/src/casscf_cipsi/bielec_natorb.irp.f new file mode 100644 index 00000000..9968530c --- /dev/null +++ b/src/casscf_cipsi/bielec_natorb.irp.f @@ -0,0 +1,369 @@ + BEGIN_PROVIDER [real*8, bielec_PQxx_no, (mo_num, mo_num,n_core_inact_act_orb,n_core_inact_act_orb)] + BEGIN_DOC + ! integral (pq|xx) in the basis of natural MOs + ! indices are unshifted orbital numbers + END_DOC + implicit none + integer :: i,j,k,l,t,u,p,q + double precision, allocatable :: f(:,:,:), d(:,:,:) + + + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(j,k,l,p,d,f) & + !$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, & + !$OMP bielec_PQxx_no,bielec_PQxx,list_act,natorbsCI) + + allocate (f(n_act_orb,mo_num,n_core_inact_act_orb), & + d(n_act_orb,mo_num,n_core_inact_act_orb)) + + !$OMP DO + do l=1,n_core_inact_act_orb + bielec_PQxx_no(:,:,:,l) = bielec_PQxx(:,:,:,l) + + do k=1,n_core_inact_act_orb + do j=1,mo_num + do p=1,n_act_orb + f(p,j,k)=bielec_PQxx_no(list_act(p),j,k,l) + end do + end do + end do + call dgemm('T','N',n_act_orb,mo_num*n_core_inact_act_orb,n_act_orb,1.d0, & + natorbsCI, size(natorbsCI,1), & + f, n_act_orb, & + 0.d0, & + d, n_act_orb) + do k=1,n_core_inact_act_orb + do j=1,mo_num + do p=1,n_act_orb + bielec_PQxx_no(list_act(p),j,k,l)=d(p,j,k) + end do + end do + + do j=1,mo_num + do p=1,n_act_orb + f(p,j,k)=bielec_PQxx_no(j,list_act(p),k,l) + end do + end do + end do + call dgemm('T','N',n_act_orb,mo_num*n_core_inact_act_orb,n_act_orb,1.d0, & + natorbsCI, n_act_orb, & + f, n_act_orb, & + 0.d0, & + d, n_act_orb) + do k=1,n_core_inact_act_orb + do p=1,n_act_orb + do j=1,mo_num + bielec_PQxx_no(j,list_act(p),k,l)=d(p,j,k) + end do + end do + end do + end do + !$OMP END DO NOWAIT + + deallocate (f,d) + + allocate (f(mo_num,mo_num,n_act_orb),d(mo_num,mo_num,n_act_orb)) + + !$OMP DO + do l=1,n_core_inact_act_orb + + do p=1,n_act_orb + do k=1,mo_num + do j=1,mo_num + f(j,k,p) = bielec_PQxx_no(j,k,n_core_inact_orb+p,l) + end do + end do + end do + call dgemm('N','N',mo_num*mo_num,n_act_orb,n_act_orb,1.d0, & + f, mo_num*mo_num, & + natorbsCI, n_act_orb, & + 0.d0, & + d, mo_num*mo_num) + do p=1,n_act_orb + do k=1,mo_num + do j=1,mo_num + bielec_PQxx_no(j,k,n_core_inact_orb+p,l)=d(j,k,p) + end do + end do + end do + end do + !$OMP END DO NOWAIT + + !$OMP BARRIER + + !$OMP DO + do l=1,n_core_inact_act_orb + do p=1,n_act_orb + do k=1,mo_num + do j=1,mo_num + f(j,k,p) = bielec_PQxx_no(j,k,l,n_core_inact_orb+p) + end do + end do + end do + call dgemm('N','N',mo_num*mo_num,n_act_orb,n_act_orb,1.d0, & + f, mo_num*mo_num, & + natorbsCI, n_act_orb, & + 0.d0, & + d, mo_num*mo_num) + do p=1,n_act_orb + do k=1,mo_num + do j=1,mo_num + bielec_PQxx_no(j,k,l,n_core_inact_orb+p)=d(j,k,p) + end do + end do + end do + end do + !$OMP END DO + + deallocate (f,d) + !$OMP END PARALLEL + +END_PROVIDER + + + +BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inact_act_orb, mo_num)] + BEGIN_DOC + ! integral (px|xq) in the basis of natural MOs + ! indices are unshifted orbital numbers + END_DOC + implicit none + integer :: i,j,k,l,t,u,p,q + double precision, allocatable :: f(:,:,:), d(:,:,:) + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(j,k,l,p,d,f) & + !$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, & + !$OMP bielec_PxxQ_no,bielec_PxxQ,list_act,natorbsCI) + + + allocate (f(n_act_orb,n_core_inact_act_orb,n_core_inact_act_orb), & + d(n_act_orb,n_core_inact_act_orb,n_core_inact_act_orb)) + + !$OMP DO + do j=1,mo_num + bielec_PxxQ_no(:,:,:,j) = bielec_PxxQ(:,:,:,j) + do l=1,n_core_inact_act_orb + do k=1,n_core_inact_act_orb + do p=1,n_act_orb + f(p,k,l) = bielec_PxxQ_no(list_act(p),k,l,j) + end do + end do + end do + call dgemm('T','N',n_act_orb,n_core_inact_act_orb**2,n_act_orb,1.d0, & + natorbsCI, size(natorbsCI,1), & + f, n_act_orb, & + 0.d0, & + d, n_act_orb) + do l=1,n_core_inact_act_orb + do k=1,n_core_inact_act_orb + do p=1,n_act_orb + bielec_PxxQ_no(list_act(p),k,l,j)=d(p,k,l) + end do + end do + end do + end do + !$OMP END DO NOWAIT + + deallocate (f,d) + + allocate (f(n_act_orb,mo_num,n_core_inact_act_orb), & + d(n_act_orb,mo_num,n_core_inact_act_orb)) + + !$OMP DO + do k=1,mo_num + do l=1,n_core_inact_act_orb + do j=1,mo_num + do p=1,n_act_orb + f(p,j,l) = bielec_PxxQ_no(j,n_core_inact_orb+p,l,k) + end do + end do + end do + call dgemm('T','N',n_act_orb,mo_num*n_core_inact_act_orb,n_act_orb,1.d0, & + natorbsCI, size(natorbsCI,1), & + f, n_act_orb, & + 0.d0, & + d, n_act_orb) + do l=1,n_core_inact_act_orb + do j=1,mo_num + do p=1,n_act_orb + bielec_PxxQ_no(j,n_core_inact_orb+p,l,k)=d(p,j,l) + end do + end do + end do + end do + !$OMP END DO NOWAIT + + deallocate(f,d) + + allocate(f(mo_num,n_core_inact_act_orb,n_act_orb), & + d(mo_num,n_core_inact_act_orb,n_act_orb) ) + + !$OMP DO + do k=1,mo_num + do p=1,n_act_orb + do l=1,n_core_inact_act_orb + do j=1,mo_num + f(j,l,p) = bielec_PxxQ_no(j,l,n_core_inact_orb+p,k) + end do + end do + end do + call dgemm('N','N',mo_num*n_core_inact_act_orb,n_act_orb,n_act_orb,1.d0, & + f, mo_num*n_core_inact_act_orb, & + natorbsCI, size(natorbsCI,1), & + 0.d0, & + d, mo_num*n_core_inact_act_orb) + do p=1,n_act_orb + do l=1,n_core_inact_act_orb + do j=1,mo_num + bielec_PxxQ_no(j,l,n_core_inact_orb+p,k)=d(j,l,p) + end do + end do + end do + end do + !$OMP END DO NOWAIT + + !$OMP BARRIER + + !$OMP DO + do l=1,n_core_inact_act_orb + do p=1,n_act_orb + do k=1,n_core_inact_act_orb + do j=1,mo_num + f(j,k,p) = bielec_PxxQ_no(j,k,l,n_core_inact_orb+p) + end do + end do + end do + call dgemm('N','N',mo_num*n_core_inact_act_orb,n_act_orb,n_act_orb,1.d0, & + f, mo_num*n_core_inact_act_orb, & + natorbsCI, size(natorbsCI,1), & + 0.d0, & + d, mo_num*n_core_inact_act_orb) + do p=1,n_act_orb + do k=1,n_core_inact_act_orb + do j=1,mo_num + bielec_PxxQ_no(j,k,l,n_core_inact_orb+p)=d(j,k,p) + end do + end do + end do + end do + !$OMP END DO NOWAIT + deallocate(f,d) + !$OMP END PARALLEL + +END_PROVIDER + + +BEGIN_PROVIDER [real*8, bielecCI_no, (n_act_orb,n_act_orb,n_act_orb, mo_num)] + BEGIN_DOC + ! integrals (tu|vp) in the basis of natural MOs + ! index p runs over the whole basis, t,u,v only over the active orbitals + END_DOC + implicit none + integer :: i,j,k,l,t,u,p,q + double precision, allocatable :: f(:,:,:), d(:,:,:) + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(j,k,l,p,d,f) & + !$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, & + !$OMP bielecCI_no,bielecCI,list_act,natorbsCI) + + allocate (f(n_act_orb,n_act_orb,mo_num), & + d(n_act_orb,n_act_orb,mo_num)) + + !$OMP DO + do l=1,mo_num + bielecCI_no(:,:,:,l) = bielecCI(:,:,:,l) + do k=1,n_act_orb + do j=1,n_act_orb + do p=1,n_act_orb + f(p,j,k)=bielecCI_no(p,j,k,l) + end do + end do + end do + call dgemm('T','N',n_act_orb,n_act_orb*n_act_orb,n_act_orb,1.d0, & + natorbsCI, size(natorbsCI,1), & + f, n_act_orb, & + 0.d0, & + d, n_act_orb) + do k=1,n_act_orb + do j=1,n_act_orb + do p=1,n_act_orb + bielecCI_no(p,j,k,l)=d(p,j,k) + end do + end do + + do j=1,n_act_orb + do p=1,n_act_orb + f(p,j,k)=bielecCI_no(j,p,k,l) + end do + end do + end do + call dgemm('T','N',n_act_orb,n_act_orb*n_act_orb,n_act_orb,1.d0, & + natorbsCI, n_act_orb, & + f, n_act_orb, & + 0.d0, & + d, n_act_orb) + do k=1,n_act_orb + do p=1,n_act_orb + do j=1,n_act_orb + bielecCI_no(j,p,k,l)=d(p,j,k) + end do + end do + end do + + do p=1,n_act_orb + do k=1,n_act_orb + do j=1,n_act_orb + f(j,k,p)=bielecCI_no(j,k,p,l) + end do + end do + end do + call dgemm('N','N',n_act_orb*n_act_orb,n_act_orb,n_act_orb,1.d0, & + f, n_act_orb*n_act_orb, & + natorbsCI, n_act_orb, & + 0.d0, & + d, n_act_orb*n_act_orb) + + do p=1,n_act_orb + do k=1,n_act_orb + do j=1,n_act_orb + bielecCI_no(j,k,p,l)=d(j,k,p) + end do + end do + end do + end do + !$OMP END DO + + !$OMP DO + do l=1,n_act_orb + do p=1,n_act_orb + do k=1,n_act_orb + do j=1,n_act_orb + f(j,k,p)=bielecCI_no(j,k,l,list_act(p)) + end do + end do + end do + call dgemm('N','N',n_act_orb*n_act_orb,n_act_orb,n_act_orb,1.d0, & + f, n_act_orb*n_act_orb, & + natorbsCI, n_act_orb, & + 0.d0, & + d, n_act_orb*n_act_orb) + + do p=1,n_act_orb + do k=1,n_act_orb + do j=1,n_act_orb + bielecCI_no(j,k,l,list_act(p))=d(j,k,p) + end do + end do + end do + end do + !$OMP END DO + + deallocate(d,f) + !$OMP END PARALLEL + + +END_PROVIDER + diff --git a/src/casscf_cipsi/casscf.irp.f b/src/casscf_cipsi/casscf.irp.f new file mode 100644 index 00000000..a2f3c5a7 --- /dev/null +++ b/src/casscf_cipsi/casscf.irp.f @@ -0,0 +1,110 @@ +program casscf + implicit none + BEGIN_DOC +! TODO : Put the documentation of the program here + END_DOC + call reorder_orbitals_for_casscf +! no_vvvv_integrals = .True. +! touch no_vvvv_integrals + n_det_max_full = 500 + touch n_det_max_full + pt2_relative_error = 0.04 + touch pt2_relative_error +! call run_stochastic_cipsi + call run +end + +subroutine run + implicit none + double precision :: energy_old, energy, pt2_max_before, ept2_before,delta_E + logical :: converged,state_following_casscf_save + integer :: iteration + converged = .False. + + energy = 0.d0 + mo_label = "MCSCF" + iteration = 1 + state_following_casscf_save = state_following_casscf + state_following_casscf = .True. + touch state_following_casscf + ept2_before = 0.d0 + if(adaptive_pt2_max)then + pt2_max = 0.005 + SOFT_TOUCH pt2_max + endif + do while (.not.converged) + print*,'pt2_max = ',pt2_max + call run_stochastic_cipsi + energy_old = energy + energy = eone+etwo+ecore + pt2_max_before = pt2_max + + call write_time(6) + call write_int(6,iteration,'CAS-SCF iteration = ') + call write_double(6,energy,'CAS-SCF energy = ') + if(n_states == 1)then + double precision :: E_PT2, PT2 + call ezfio_get_casscf_energy_pt2(E_PT2) + call ezfio_get_casscf_energy(PT2) + PT2 -= E_PT2 + call write_double(6,E_PT2,'E + PT2 energy = ') + call write_double(6,PT2,' PT2 = ') + call write_double(6,pt2_max,' PT2_MAX = ') + endif + + print*,'' + call write_double(6,norm_grad_vec2,'Norm of gradients = ') + call write_double(6,norm_grad_vec2_tab(1), ' Core-active gradients = ') + call write_double(6,norm_grad_vec2_tab(2), ' Core-virtual gradients = ') + call write_double(6,norm_grad_vec2_tab(3), ' Active-virtual gradients = ') + print*,'' + call write_double(6,energy_improvement, 'Predicted energy improvement = ') + + if(criterion_casscf == "energy")then + converged = dabs(energy_improvement) < thresh_scf + else if (criterion_casscf == "gradients")then + converged = norm_grad_vec2 < thresh_scf + else if (criterion_casscf == "e_pt2")then + delta_E = dabs(E_PT2 - ept2_before) + converged = dabs(delta_E) < thresh_casscf + endif + ept2_before = E_PT2 + if(adaptive_pt2_max)then + pt2_max = dabs(energy_improvement / (pt2_relative_error)) + pt2_max = min(pt2_max, pt2_max_before) + if(n_act_orb.ge.n_big_act_orb)then + pt2_max = max(pt2_max,pt2_min_casscf) + endif + endif + print*,'' + call write_double(6,pt2_max, 'PT2_MAX for next iteration = ') + + mo_coef = NewOrbs + mo_occ = occnum + call save_mos + if(.not.converged)then + iteration += 1 + if(norm_grad_vec2.gt.0.01d0)then + N_det = N_states + else + N_det = max(N_det/8 ,N_states) + endif + psi_det = psi_det_sorted + psi_coef = psi_coef_sorted + read_wf = .True. + call clear_mo_map + SOFT_TOUCH mo_coef N_det psi_det psi_coef + if(adaptive_pt2_max)then + SOFT_TOUCH pt2_max + endif + if(iteration .gt. 3)then + state_following_casscf = state_following_casscf_save + soft_touch state_following_casscf + endif + endif + + enddo + +end + + diff --git a/src/casscf_cipsi/class.irp.f b/src/casscf_cipsi/class.irp.f new file mode 100644 index 00000000..7360a661 --- /dev/null +++ b/src/casscf_cipsi/class.irp.f @@ -0,0 +1,12 @@ + BEGIN_PROVIDER [ logical, do_only_1h1p ] +&BEGIN_PROVIDER [ logical, do_only_cas ] +&BEGIN_PROVIDER [ logical, do_ddci ] + implicit none + BEGIN_DOC + ! In the CAS case, all those are always false except do_only_cas + END_DOC + do_only_cas = .True. + do_only_1h1p = .False. + do_ddci = .False. +END_PROVIDER + diff --git a/src/casscf_cipsi/dav_sx_mat.irp.f b/src/casscf_cipsi/dav_sx_mat.irp.f new file mode 100644 index 00000000..1e24f0e2 --- /dev/null +++ b/src/casscf_cipsi/dav_sx_mat.irp.f @@ -0,0 +1,45 @@ + + +subroutine davidson_diag_sx_mat(N_st, u_in, energies) + implicit none + integer, intent(in) :: N_st + double precision, intent(out) :: u_in(nMonoEx+1,n_states_diag), energies(N_st) + integer :: i,j,N_st_tmp, dim_in, sze, N_st_diag_in + integer, allocatable :: list_guess(:) + double precision, allocatable :: H_jj(:) + logical :: converged + N_st_diag_in = n_states_diag + provide SXmatrix + sze = nMonoEx+1 + dim_in = sze + allocate(H_jj(sze), list_guess(sze)) + H_jj(1) = 0.d0 + N_st_tmp = 1 + list_guess(1) = 1 + do j = 2, nMonoEx+1 + H_jj(j) = SXmatrix(j,j) + if(H_jj(j).lt.0.d0)then + list_guess(N_st_tmp) = j + N_st_tmp += 1 + endif + enddo + if(N_st_tmp .ne. N_st)then + print*,'Pb in davidson_diag_sx_mat' + print*,'N_st_tmp .ne. N_st' + print*,N_st_tmp, N_st + stop + endif + print*,'Number of possibly interesting states = ',N_st + print*,'Corresponding diagonal elements of the SX matrix ' + u_in = 0.d0 + do i = 1, min(N_st, N_st_diag_in) +! do i = 1, N_st + j = list_guess(i) + print*,'i,j',i,j + print*,'SX(i,i) = ',H_jj(j) + u_in(j,i) = 1.d0 + enddo + call davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,converged,SXmatrix) + print*,'energies = ',energies + +end diff --git a/src/casscf_cipsi/densities.irp.f b/src/casscf_cipsi/densities.irp.f new file mode 100644 index 00000000..bebcf5d7 --- /dev/null +++ b/src/casscf_cipsi/densities.irp.f @@ -0,0 +1,67 @@ +use bitmasks + +BEGIN_PROVIDER [real*8, D0tu, (n_act_orb,n_act_orb) ] + implicit none + BEGIN_DOC + ! the first-order density matrix in the basis of the starting MOs. + ! matrix is state averaged. + END_DOC + integer :: t,u + + do u=1,n_act_orb + do t=1,n_act_orb + D0tu(t,u) = one_e_dm_mo_alpha_average( list_act(t), list_act(u) ) + & + one_e_dm_mo_beta_average ( list_act(t), list_act(u) ) + enddo + enddo + +END_PROVIDER + +BEGIN_PROVIDER [real*8, P0tuvx, (n_act_orb,n_act_orb,n_act_orb,n_act_orb) ] + BEGIN_DOC + ! The second-order density matrix in the basis of the starting MOs ONLY IN THE RANGE OF ACTIVE MOS + ! The values are state averaged + ! + ! We use the spin-free generators of mono-excitations + ! E_pq destroys q and creates p + ! D_pq = <0|E_pq|0> = D_qp + ! P_pqrs = 1/2 <0|E_pq E_rs - delta_qr E_ps|0> + ! + ! P0tuvx(p,q,r,s) = chemist notation : 1/2 <0|E_pq E_rs - delta_qr E_ps|0> + END_DOC + implicit none + integer :: t,u,v,x + integer :: tt,uu,vv,xx + integer :: mu,nu,istate,ispin,jspin,ihole,ipart,jhole,jpart + integer :: ierr + real*8 :: phase1,phase11,phase12,phase2,phase21,phase22 + integer :: nu1,nu2,nu11,nu12,nu21,nu22 + integer :: ierr1,ierr2,ierr11,ierr12,ierr21,ierr22 + real*8 :: cI_mu(N_states),term + integer(bit_kind), dimension(N_int,2) :: det_mu, det_mu_ex + integer(bit_kind), dimension(N_int,2) :: det_mu_ex1, det_mu_ex11, det_mu_ex12 + integer(bit_kind), dimension(N_int,2) :: det_mu_ex2, det_mu_ex21, det_mu_ex22 + + if (bavard) then + write(6,*) ' providing the 2 body RDM on the active part' + endif + + P0tuvx= 0.d0 + if(fast_2rdm)then + do istate=1,N_states + do x = 1, n_act_orb + do v = 1, n_act_orb + do u = 1, n_act_orb + do t = 1, n_act_orb + ! 1 1 2 2 1 2 1 2 + P0tuvx(t,u,v,x) = 0.5d0 * state_av_act_2_rdm_spin_trace_mo(t,v,u,x) + enddo + enddo + enddo + enddo + enddo + else + P0tuvx = P0tuvx_peter + endif + +END_PROVIDER diff --git a/src/casscf_cipsi/densities_peter.irp.f b/src/casscf_cipsi/densities_peter.irp.f new file mode 100644 index 00000000..ee7414da --- /dev/null +++ b/src/casscf_cipsi/densities_peter.irp.f @@ -0,0 +1,150 @@ +use bitmasks + +BEGIN_PROVIDER [real*8, P0tuvx_peter, (n_act_orb,n_act_orb,n_act_orb,n_act_orb) ] + BEGIN_DOC + ! the second-order density matrix in the basis of the starting MOs + ! matrices are state averaged + ! + ! we use the spin-free generators of mono-excitations + ! E_pq destroys q and creates p + ! D_pq = <0|E_pq|0> = D_qp + ! P_pqrs = 1/2 <0|E_pq E_rs - delta_qr E_ps|0> + ! + END_DOC + implicit none + integer :: t,u,v,x,mu,nu,istate,ispin,jspin,ihole,ipart,jhole,jpart + integer :: ierr + real*8 :: phase1,phase11,phase12,phase2,phase21,phase22 + integer :: nu1,nu2,nu11,nu12,nu21,nu22 + integer :: ierr1,ierr2,ierr11,ierr12,ierr21,ierr22 + real*8 :: cI_mu(N_states),term + integer(bit_kind), dimension(N_int,2) :: det_mu, det_mu_ex + integer(bit_kind), dimension(N_int,2) :: det_mu_ex1, det_mu_ex11, det_mu_ex12 + integer(bit_kind), dimension(N_int,2) :: det_mu_ex2, det_mu_ex21, det_mu_ex22 + + if (bavard) then + write(6,*) ' providing density matrix P0' + endif + + P0tuvx_peter = 0.d0 + + ! first loop: we apply E_tu, once for D_tu, once for -P_tvvu + do mu=1,n_det + call det_extract(det_mu,mu,N_int) + do istate=1,n_states + cI_mu(istate)=psi_coef(mu,istate) + end do + do t=1,n_act_orb + ipart=list_act(t) + do u=1,n_act_orb + ihole=list_act(u) + ! apply E_tu + call det_copy(det_mu,det_mu_ex1,N_int) + call det_copy(det_mu,det_mu_ex2,N_int) + call do_spinfree_mono_excitation(det_mu,det_mu_ex1 & + ,det_mu_ex2,nu1,nu2,ihole,ipart,phase1,phase2,ierr1,ierr2) + ! det_mu_ex1 is in the list + if (nu1.ne.-1) then + do istate=1,n_states + term=cI_mu(istate)*psi_coef(nu1,istate)*phase1 + ! and we fill P0_tvvu + do v=1,n_act_orb + P0tuvx_peter(t,v,v,u)-=term + end do + end do + end if + ! det_mu_ex2 is in the list + if (nu2.ne.-1) then + do istate=1,n_states + term=cI_mu(istate)*psi_coef(nu2,istate)*phase2 + do v=1,n_act_orb + P0tuvx_peter(t,v,v,u)-=term + end do + end do + end if + end do + end do + end do + ! now we do the double excitation E_tu E_vx |0> + do mu=1,n_det + call det_extract(det_mu,mu,N_int) + do istate=1,n_states + cI_mu(istate)=psi_coef(mu,istate) + end do + do v=1,n_act_orb + ipart=list_act(v) + do x=1,n_act_orb + ihole=list_act(x) + ! apply E_vx + call det_copy(det_mu,det_mu_ex1,N_int) + call det_copy(det_mu,det_mu_ex2,N_int) + call do_spinfree_mono_excitation(det_mu,det_mu_ex1 & + ,det_mu_ex2,nu1,nu2,ihole,ipart,phase1,phase2,ierr1,ierr2) + ! we apply E_tu to the first resultant determinant, thus E_tu E_vx |0> + if (ierr1.eq.1) then + do t=1,n_act_orb + jpart=list_act(t) + do u=1,n_act_orb + jhole=list_act(u) + call det_copy(det_mu_ex1,det_mu_ex11,N_int) + call det_copy(det_mu_ex1,det_mu_ex12,N_int) + call do_spinfree_mono_excitation(det_mu_ex1,det_mu_ex11& + ,det_mu_ex12,nu11,nu12,jhole,jpart,phase11,phase12,ierr11,ierr12) + if (nu11.ne.-1) then + do istate=1,n_states + P0tuvx_peter(t,u,v,x)+=cI_mu(istate)*psi_coef(nu11,istate)& + *phase11*phase1 + end do + end if + if (nu12.ne.-1) then + do istate=1,n_states + P0tuvx_peter(t,u,v,x)+=cI_mu(istate)*psi_coef(nu12,istate)& + *phase12*phase1 + end do + end if + end do + end do + end if + + ! we apply E_tu to the second resultant determinant + if (ierr2.eq.1) then + do t=1,n_act_orb + jpart=list_act(t) + do u=1,n_act_orb + jhole=list_act(u) + call det_copy(det_mu_ex2,det_mu_ex21,N_int) + call det_copy(det_mu_ex2,det_mu_ex22,N_int) + call do_spinfree_mono_excitation(det_mu_ex2,det_mu_ex21& + ,det_mu_ex22,nu21,nu22,jhole,jpart,phase21,phase22,ierr21,ierr22) + if (nu21.ne.-1) then + do istate=1,n_states + P0tuvx_peter(t,u,v,x)+=cI_mu(istate)*psi_coef(nu21,istate)& + *phase21*phase2 + end do + end if + if (nu22.ne.-1) then + do istate=1,n_states + P0tuvx_peter(t,u,v,x)+=cI_mu(istate)*psi_coef(nu22,istate)& + *phase22*phase2 + end do + end if + end do + end do + end if + + end do + end do + end do + + ! we average by just dividing by the number of states + do x=1,n_act_orb + do v=1,n_act_orb + do u=1,n_act_orb + do t=1,n_act_orb + P0tuvx_peter(t,u,v,x)*=0.5D0/dble(N_states) + end do + end do + end do + end do + +END_PROVIDER diff --git a/src/casscf_cipsi/det_manip.irp.f b/src/casscf_cipsi/det_manip.irp.f new file mode 100644 index 00000000..d8c309a4 --- /dev/null +++ b/src/casscf_cipsi/det_manip.irp.f @@ -0,0 +1,125 @@ +use bitmasks + +subroutine do_signed_mono_excitation(key1,key2,nu,ihole,ipart, & + ispin,phase,ierr) + BEGIN_DOC + ! we create the mono-excitation, and determine, if possible, + ! the phase and the number in the list of determinants + END_DOC + implicit none + integer(bit_kind) :: key1(N_int,2),key2(N_int,2) + integer(bit_kind), allocatable :: keytmp(:,:) + integer :: exc(0:2,2,2),ihole,ipart,ierr,nu,ispin + real*8 :: phase + logical :: found + allocate(keytmp(N_int,2)) + + nu=-1 + phase=1.D0 + ierr=0 + call det_copy(key1,key2,N_int) + ! write(6,*) ' key2 before excitation ',ihole,' -> ',ipart,' spin = ',ispin + ! call print_det(key2,N_int) + call do_single_excitation(key2,ihole,ipart,ispin,ierr) + ! write(6,*) ' key2 after ',ihole,' -> ',ipart,' spin = ',ispin + ! call print_det(key2,N_int) + ! write(6,*) ' excitation ',ihole,' -> ',ipart,' gives ierr = ',ierr + if (ierr.eq.1) then + ! excitation is possible + ! get the phase + call get_single_excitation(key1,key2,exc,phase,N_int) + ! get the number in the list + found=.false. + nu=0 + + !TODO BOTTLENECK + do while (.not.found) + nu+=1 + if (nu.gt.N_det) then + ! the determinant is possible, but not in the list + found=.true. + nu=-1 + else + call det_extract(keytmp,nu,N_int) + integer :: i,ii + found=.true. + do ii=1,2 + do i=1,N_int + if (keytmp(i,ii).ne.key2(i,ii)) then + found=.false. + end if + end do + end do + end if + end do + end if + ! + ! we found the new string, the phase, and possibly the number in the list + ! +end subroutine do_signed_mono_excitation + +subroutine det_extract(key,nu,Nint) + BEGIN_DOC + ! extract a determinant from the list of determinants + END_DOC + implicit none + integer :: ispin,i,nu,Nint + integer(bit_kind) :: key(Nint,2) + do ispin=1,2 + do i=1,Nint + key(i,ispin)=psi_det(i,ispin,nu) + end do + end do +end subroutine det_extract + +subroutine det_copy(key1,key2,Nint) + use bitmasks ! you need to include the bitmasks_module.f90 features + BEGIN_DOC + ! copy a determinant from key1 to key2 + END_DOC + implicit none + integer :: ispin,i,Nint + integer(bit_kind) :: key1(Nint,2),key2(Nint,2) + do ispin=1,2 + do i=1,Nint + key2(i,ispin)=key1(i,ispin) + end do + end do +end subroutine det_copy + +subroutine do_spinfree_mono_excitation(key_in,key_out1,key_out2 & + ,nu1,nu2,ihole,ipart,phase1,phase2,ierr,jerr) + BEGIN_DOC + ! we create the spin-free mono-excitation E_pq=(a^+_p a_q + a^+_P a_Q) + ! we may create two determinants as result + ! + END_DOC + implicit none + integer(bit_kind) :: key_in(N_int,2),key_out1(N_int,2) + integer(bit_kind) :: key_out2(N_int,2) + integer :: ihole,ipart,ierr,jerr,nu1,nu2 + integer :: ispin + real*8 :: phase1,phase2 + + ! write(6,*) ' applying E_',ipart,ihole,' on determinant ' + ! call print_det(key_in,N_int) + + ! spin alpha + ispin=1 + call do_signed_mono_excitation(key_in,key_out1,nu1,ihole & + ,ipart,ispin,phase1,ierr) + ! if (ierr.eq.1) then + ! write(6,*) ' 1 result is ',nu1,phase1 + ! call print_det(key_out1,N_int) + ! end if + ! spin beta + ispin=2 + call do_signed_mono_excitation(key_in,key_out2,nu2,ihole & + ,ipart,ispin,phase2,jerr) + ! if (jerr.eq.1) then + ! write(6,*) ' 2 result is ',nu2,phase2 + ! call print_det(key_out2,N_int) + ! end if + +end subroutine do_spinfree_mono_excitation + diff --git a/src/casscf_cipsi/driver_optorb.irp.f b/src/casscf_cipsi/driver_optorb.irp.f new file mode 100644 index 00000000..2e3e02dc --- /dev/null +++ b/src/casscf_cipsi/driver_optorb.irp.f @@ -0,0 +1,3 @@ +subroutine driver_optorb + implicit none +end diff --git a/src/casscf_cipsi/get_energy.irp.f b/src/casscf_cipsi/get_energy.irp.f new file mode 100644 index 00000000..cfb26b59 --- /dev/null +++ b/src/casscf_cipsi/get_energy.irp.f @@ -0,0 +1,51 @@ +program print_2rdm + implicit none + BEGIN_DOC + ! get the active part of the bielectronic energy on a given wave function. + ! + ! useful to test the active part of the spin trace 2 rdms + END_DOC +!no_vvvv_integrals = .True. + read_wf = .True. +!touch read_wf no_vvvv_integrals +!call routine +!call routine_bis + call print_grad +end + +subroutine print_grad + implicit none + integer :: i + do i = 1, nMonoEx + if(dabs(gradvec2(i)).gt.1.d-5)then + print*,'' + print*,i,gradvec2(i),excit(:,i) + endif + enddo +end + +subroutine routine + integer :: i,j,k,l + integer :: ii,jj,kk,ll + double precision :: accu(4),twodm,thr,act_twodm2,integral,get_two_e_integral + thr = 1.d-10 + + + accu = 0.d0 + do ll = 1, n_act_orb + l = list_act(ll) + do kk = 1, n_act_orb + k = list_act(kk) + do jj = 1, n_act_orb + j = list_act(jj) + do ii = 1, n_act_orb + i = list_act(ii) + integral = get_two_e_integral(i,j,k,l,mo_integrals_map) + accu(1) += state_av_act_2_rdm_spin_trace_mo(ii,jj,kk,ll) * integral + enddo + enddo + enddo + enddo + print*,'accu = ',accu(1) + +end diff --git a/src/casscf_cipsi/grad_old.irp.f b/src/casscf_cipsi/grad_old.irp.f new file mode 100644 index 00000000..d60a60c8 --- /dev/null +++ b/src/casscf_cipsi/grad_old.irp.f @@ -0,0 +1,74 @@ + +BEGIN_PROVIDER [real*8, gradvec_old, (nMonoEx)] + BEGIN_DOC + ! calculate the orbital gradient by hand, i.e. for + ! each determinant I we determine the string E_pq |I> (alpha and beta + ! separately) and generate + ! sum_I c_I is then the pq component of the orbital + ! gradient + ! E_pq = a^+_pa_q + a^+_Pa_Q + END_DOC + implicit none + integer :: ii,tt,aa,indx,ihole,ipart,istate + real*8 :: res + + do indx=1,nMonoEx + ihole=excit(1,indx) + ipart=excit(2,indx) + call calc_grad_elem(ihole,ipart,res) + gradvec_old(indx)=res + end do + + real*8 :: norm_grad + norm_grad=0.d0 + do indx=1,nMonoEx + norm_grad+=gradvec_old(indx)*gradvec_old(indx) + end do + norm_grad=sqrt(norm_grad) + if (bavard) then + write(6,*) + write(6,*) ' Norm of the orbital gradient (via <0|EH|0>) : ', norm_grad + write(6,*) + endif + + +END_PROVIDER + +subroutine calc_grad_elem(ihole,ipart,res) + BEGIN_DOC + ! eq 18 of Siegbahn et al, Physica Scripta 1980 + ! we calculate 2 , q=hole, p=particle + END_DOC + implicit none + integer :: ihole,ipart,mu,iii,ispin,ierr,nu,istate + real*8 :: res + integer(bit_kind), allocatable :: det_mu(:,:),det_mu_ex(:,:) + real*8 :: i_H_psi_array(N_states),phase + allocate(det_mu(N_int,2)) + allocate(det_mu_ex(N_int,2)) + + res=0.D0 + + do mu=1,n_det + ! get the string of the determinant + call det_extract(det_mu,mu,N_int) + do ispin=1,2 + ! do the monoexcitation on it + call det_copy(det_mu,det_mu_ex,N_int) + call do_signed_mono_excitation(det_mu,det_mu_ex,nu & + ,ihole,ipart,ispin,phase,ierr) + if (ierr.eq.1) then + call i_H_psi(det_mu_ex,psi_det,psi_coef,N_int & + ,N_det,N_det,N_states,i_H_psi_array) + do istate=1,N_states + res+=i_H_psi_array(istate)*psi_coef(mu,istate)*phase + end do + end if + end do + end do + + ! state-averaged gradient + res*=2.D0/dble(N_states) + +end subroutine calc_grad_elem + diff --git a/src/casscf_cipsi/gradient.irp.f b/src/casscf_cipsi/gradient.irp.f new file mode 100644 index 00000000..a1c5e947 --- /dev/null +++ b/src/casscf_cipsi/gradient.irp.f @@ -0,0 +1,215 @@ +use bitmasks + +BEGIN_PROVIDER [ integer, nMonoEx ] + BEGIN_DOC + ! Number of single excitations + END_DOC + implicit none + nMonoEx=n_core_inact_orb*n_act_orb+n_core_inact_orb*n_virt_orb+n_act_orb*n_virt_orb +END_PROVIDER + + BEGIN_PROVIDER [integer, n_c_a_prov] +&BEGIN_PROVIDER [integer, n_c_v_prov] +&BEGIN_PROVIDER [integer, n_a_v_prov] + implicit none + n_c_a_prov = n_core_inact_orb * n_act_orb + n_c_v_prov = n_core_inact_orb * n_virt_orb + n_a_v_prov = n_act_orb * n_virt_orb + END_PROVIDER + + BEGIN_PROVIDER [integer, excit, (2,nMonoEx)] +&BEGIN_PROVIDER [character*3, excit_class, (nMonoEx)] +&BEGIN_PROVIDER [integer, list_idx_c_a, (3,n_c_a_prov) ] +&BEGIN_PROVIDER [integer, list_idx_c_v, (3,n_c_v_prov) ] +&BEGIN_PROVIDER [integer, list_idx_a_v, (3,n_a_v_prov) ] +&BEGIN_PROVIDER [integer, mat_idx_c_a, (n_core_inact_orb,n_act_orb) +&BEGIN_PROVIDER [integer, mat_idx_c_v, (n_core_inact_orb,n_virt_orb) +&BEGIN_PROVIDER [integer, mat_idx_a_v, (n_act_orb,n_virt_orb) + BEGIN_DOC + ! a list of the orbitals involved in the excitation + END_DOC + + implicit none + integer :: i,t,a,ii,tt,aa,indx,indx_tmp + indx=0 + indx_tmp = 0 + do ii=1,n_core_inact_orb + i=list_core_inact(ii) + do tt=1,n_act_orb + t=list_act(tt) + indx+=1 + excit(1,indx)=i + excit(2,indx)=t + excit_class(indx)='c-a' + indx_tmp += 1 + list_idx_c_a(1,indx_tmp) = indx + list_idx_c_a(2,indx_tmp) = ii + list_idx_c_a(3,indx_tmp) = tt + mat_idx_c_a(ii,tt) = indx + end do + end do + + indx_tmp = 0 + do ii=1,n_core_inact_orb + i=list_core_inact(ii) + do aa=1,n_virt_orb + a=list_virt(aa) + indx+=1 + excit(1,indx)=i + excit(2,indx)=a + excit_class(indx)='c-v' + indx_tmp += 1 + list_idx_c_v(1,indx_tmp) = indx + list_idx_c_v(2,indx_tmp) = ii + list_idx_c_v(3,indx_tmp) = aa + mat_idx_c_v(ii,aa) = indx + end do + end do + + indx_tmp = 0 + do tt=1,n_act_orb + t=list_act(tt) + do aa=1,n_virt_orb + a=list_virt(aa) + indx+=1 + excit(1,indx)=t + excit(2,indx)=a + excit_class(indx)='a-v' + indx_tmp += 1 + list_idx_a_v(1,indx_tmp) = indx + list_idx_a_v(2,indx_tmp) = tt + list_idx_a_v(3,indx_tmp) = aa + mat_idx_a_v(tt,aa) = indx + end do + end do + + if (bavard) then + write(6,*) ' Filled the table of the Monoexcitations ' + do indx=1,nMonoEx + write(6,*) ' ex ',indx,' : ',excit(1,indx),' -> ' & + ,excit(2,indx),' ',excit_class(indx) + end do + end if + +END_PROVIDER + + BEGIN_PROVIDER [real*8, gradvec2, (nMonoEx)] +&BEGIN_PROVIDER [real*8, norm_grad_vec2] +&BEGIN_PROVIDER [real*8, norm_grad_vec2_tab, (3)] + BEGIN_DOC + ! calculate the orbital gradient from density + ! matrices and integrals; Siegbahn et al, Phys Scr 1980 + ! eqs 14 a,b,c + END_DOC + implicit none + integer :: i,t,a,indx + real*8 :: gradvec_it,gradvec_ia,gradvec_ta + + indx=0 + norm_grad_vec2_tab = 0.d0 + do i=1,n_core_inact_orb + do t=1,n_act_orb + indx+=1 + gradvec2(indx)=gradvec_it(i,t) + norm_grad_vec2_tab(1) += gradvec2(indx)*gradvec2(indx) + end do + end do + + do i=1,n_core_inact_orb + do a=1,n_virt_orb + indx+=1 + gradvec2(indx)=gradvec_ia(i,a) + norm_grad_vec2_tab(2) += gradvec2(indx)*gradvec2(indx) + end do + end do + + do t=1,n_act_orb + do a=1,n_virt_orb + indx+=1 + gradvec2(indx)=gradvec_ta(t,a) + norm_grad_vec2_tab(3) += gradvec2(indx)*gradvec2(indx) + end do + end do + + norm_grad_vec2=0.d0 + do indx=1,nMonoEx + norm_grad_vec2+=gradvec2(indx)*gradvec2(indx) + end do + do i = 1, 3 + norm_grad_vec2_tab(i) = dsqrt(norm_grad_vec2_tab(i)) + enddo + norm_grad_vec2=sqrt(norm_grad_vec2) + if(bavard)then + write(6,*) + write(6,*) ' Norm of the orbital gradient (via D, P and integrals): ', norm_grad_vec2 + write(6,*) + endif + +END_PROVIDER + +real*8 function gradvec_it(i,t) + BEGIN_DOC + ! the orbital gradient core/inactive -> active + ! we assume natural orbitals + END_DOC + implicit none + integer :: i,t + + integer :: ii,tt,v,vv,x,y + integer :: x3,y3 + + ii=list_core_inact(i) + tt=list_act(t) + gradvec_it=2.D0*(Fipq(tt,ii)+Fapq(tt,ii)) + gradvec_it-=occnum(tt)*Fipq(ii,tt) + do v=1,n_act_orb ! active + vv=list_act(v) + do x=1,n_act_orb ! active + x3=x+n_core_inact_orb ! list_act(x) + do y=1,n_act_orb ! active + y3=y+n_core_inact_orb ! list_act(y) + ! Gamma(2) a a a a 1/r12 i a a a + gradvec_it-=2.D0*P0tuvx_no(t,v,x,y)*bielec_PQxx_no(ii,vv,x3,y3) + end do + end do + end do + gradvec_it*=2.D0 +end function gradvec_it + +real*8 function gradvec_ia(i,a) + BEGIN_DOC + ! the orbital gradient core/inactive -> virtual + END_DOC + implicit none + integer :: i,a,ii,aa + + ii=list_core_inact(i) + aa=list_virt(a) + gradvec_ia=2.D0*(Fipq(aa,ii)+Fapq(aa,ii)) + gradvec_ia*=2.D0 + +end function gradvec_ia + +real*8 function gradvec_ta(t,a) + BEGIN_DOC + ! the orbital gradient active -> virtual + ! we assume natural orbitals + END_DOC + implicit none + integer :: t,a,tt,aa,v,vv,x,y + + tt=list_act(t) + aa=list_virt(a) + gradvec_ta=0.D0 + gradvec_ta+=occnum(tt)*Fipq(aa,tt) + do v=1,n_act_orb + do x=1,n_act_orb + do y=1,n_act_orb + gradvec_ta+=2.D0*P0tuvx_no(t,v,x,y)*bielecCI_no(x,y,v,aa) + end do + end do + end do + gradvec_ta*=2.D0 + +end function gradvec_ta + diff --git a/src/casscf_cipsi/hessian.irp.f b/src/casscf_cipsi/hessian.irp.f new file mode 100644 index 00000000..458c6aa6 --- /dev/null +++ b/src/casscf_cipsi/hessian.irp.f @@ -0,0 +1,539 @@ +use bitmasks + +real*8 function hessmat_itju(i,t,j,u) + BEGIN_DOC + ! the orbital hessian for core/inactive -> active, core/inactive -> active + ! i, t, j, u are list indices, the corresponding orbitals are ii,tt,jj,uu + ! + ! we assume natural orbitals + END_DOC + implicit none + integer :: i,t,j,u,ii,tt,uu,v,vv,x,xx,y,jj + real*8 :: term,t2 + + ii=list_core_inact(i) + tt=list_act(t) + if (i.eq.j) then + if (t.eq.u) then + ! diagonal element + term=occnum(tt)*Fipq(ii,ii)+2.D0*(Fipq(tt,tt)+Fapq(tt,tt)) & + -2.D0*(Fipq(ii,ii)+Fapq(ii,ii)) + term+=2.D0*(3.D0*bielec_pxxq_no(tt,i,i,tt)-bielec_pqxx_no(tt,tt,i,i)) + term-=2.D0*occnum(tt)*(3.D0*bielec_pxxq_no(tt,i,i,tt) & + -bielec_pqxx_no(tt,tt,i,i)) + term-=occnum(tt)*Fipq(tt,tt) + do v=1,n_act_orb + vv=list_act(v) + do x=1,n_act_orb + xx=list_act(x) + term+=2.D0*(P0tuvx_no(t,t,v,x)*bielec_pqxx_no(vv,xx,i,i) & + +(P0tuvx_no(t,x,v,t)+P0tuvx_no(t,x,t,v))* & + bielec_pxxq_no(vv,i,i,xx)) + do y=1,n_act_orb + term-=2.D0*P0tuvx_no(t,v,x,y)*bielecCI_no(t,v,y,xx) + end do + end do + end do + else + ! it/iu, t != u + uu=list_act(u) + term=2.D0*(Fipq(tt,uu)+Fapq(tt,uu)) + term+=2.D0*(4.D0*bielec_PxxQ_no(tt,i,j,uu)-bielec_PxxQ_no(uu,i,j,tt) & + -bielec_PQxx_no(tt,uu,i,j)) + term-=occnum(tt)*Fipq(uu,tt) + term-=(occnum(tt)+occnum(uu)) & + *(3.D0*bielec_PxxQ_no(tt,i,i,uu)-bielec_PQxx_no(uu,tt,i,i)) + do v=1,n_act_orb + vv=list_act(v) + ! term-=D0tu(u,v)*Fipq(tt,vv) ! published, but inverting t and u seems more correct + do x=1,n_act_orb + xx=list_act(x) + term+=2.D0*(P0tuvx_no(u,t,v,x)*bielec_pqxx_no(vv,xx,i,i) & + +(P0tuvx_no(u,x,v,t)+P0tuvx_no(u,x,t,v)) & + *bielec_pxxq_no(vv,i,i,xx)) + do y=1,n_act_orb + term-=2.D0*P0tuvx_no(t,v,x,y)*bielecCI_no(u,v,y,xx) + end do + end do + end do + end if + else + ! it/ju + jj=list_core_inact(j) + uu=list_act(u) + if (t.eq.u) then + term=occnum(tt)*Fipq(ii,jj) + term-=2.D0*(Fipq(ii,jj)+Fapq(ii,jj)) + else + term=0.D0 + end if + term+=2.D0*(4.D0*bielec_PxxQ_no(tt,i,j,uu)-bielec_PxxQ_no(uu,i,j,tt) & + -bielec_PQxx_no(tt,uu,i,j)) + term-=(occnum(tt)+occnum(uu))* & + (4.D0*bielec_PxxQ_no(tt,i,j,uu)-bielec_PxxQ_no(uu,i,j,tt) & + -bielec_PQxx_no(uu,tt,i,j)) + do v=1,n_act_orb + vv=list_act(v) + do x=1,n_act_orb + xx=list_act(x) + term+=2.D0*(P0tuvx_no(u,t,v,x)*bielec_pqxx_no(vv,xx,i,j) & + +(P0tuvx_no(u,x,v,t)+P0tuvx_no(u,x,t,v)) & + *bielec_pxxq_no(vv,i,j,xx)) + end do + end do + end if + + term*=2.D0 + hessmat_itju=term + +end function hessmat_itju + +real*8 function hessmat_itja(i,t,j,a) + BEGIN_DOC + ! the orbital hessian for core/inactive -> active, core/inactive -> virtual + END_DOC + implicit none + integer :: i,t,j,a,ii,tt,jj,aa,v,vv,x,y + real*8 :: term + + ! it/ja + ii=list_core_inact(i) + tt=list_act(t) + jj=list_core_inact(j) + aa=list_virt(a) + term=2.D0*(4.D0*bielec_pxxq_no(aa,j,i,tt) & + -bielec_pqxx_no(aa,tt,i,j) -bielec_pxxq_no(aa,i,j,tt)) + term-=occnum(tt)*(4.D0*bielec_pxxq_no(aa,j,i,tt) & + -bielec_pqxx_no(aa,tt,i,j) -bielec_pxxq_no(aa,i,j,tt)) + if (i.eq.j) then + term+=2.D0*(Fipq(aa,tt)+Fapq(aa,tt)) + term-=0.5D0*occnum(tt)*Fipq(aa,tt) + do v=1,n_act_orb + do x=1,n_act_orb + do y=1,n_act_orb + term-=P0tuvx_no(t,v,x,y)*bielecCI_no(x,y,v,aa) + end do + end do + end do + end if + term*=2.D0 + hessmat_itja=term + +end function hessmat_itja + +real*8 function hessmat_itua(i,t,u,a) + BEGIN_DOC + ! the orbital hessian for core/inactive -> active, active -> virtual + END_DOC + implicit none + integer :: i,t,u,a,ii,tt,uu,aa,v,vv,x,xx,u3,t3,v3 + real*8 :: term + + ii=list_core_inact(i) + tt=list_act(t) + t3=t+n_core_inact_orb + uu=list_act(u) + u3=u+n_core_inact_orb + aa=list_virt(a) + if (t.eq.u) then + term=-occnum(tt)*Fipq(aa,ii) + else + term=0.D0 + end if + term-=occnum(uu)*(bielec_pqxx_no(aa,ii,t3,u3)-4.D0*bielec_pqxx_no(aa,uu,t3,i)& + +bielec_pxxq_no(aa,t3,u3,ii)) + do v=1,n_act_orb + vv=list_act(v) + v3=v+n_core_inact_orb + do x=1,n_act_orb + integer :: x3 + xx=list_act(x) + x3=x+n_core_inact_orb + term-=2.D0*(P0tuvx_no(t,u,v,x)*bielec_pqxx_no(aa,ii,v3,x3) & + +(P0tuvx_no(t,v,u,x)+P0tuvx_no(t,v,x,u)) & + *bielec_pqxx_no(aa,xx,v3,i)) + end do + end do + if (t.eq.u) then + term+=Fipq(aa,ii)+Fapq(aa,ii) + end if + term*=2.D0 + hessmat_itua=term + +end function hessmat_itua + +real*8 function hessmat_iajb(i,a,j,b) + BEGIN_DOC + ! the orbital hessian for core/inactive -> virtual, core/inactive -> virtual + END_DOC + implicit none + integer :: i,a,j,b,ii,aa,jj,bb + real*8 :: term + + ii=list_core_inact(i) + aa=list_virt(a) + if (i.eq.j) then + if (a.eq.b) then + ! ia/ia + term=2.D0*(Fipq(aa,aa)+Fapq(aa,aa)-Fipq(ii,ii)-Fapq(ii,ii)) + term+=2.D0*(3.D0*bielec_pxxq_no(aa,i,i,aa)-bielec_pqxx_no(aa,aa,i,i)) + else + bb=list_virt(b) + ! ia/ib + term=2.D0*(Fipq(aa,bb)+Fapq(aa,bb)) + term+=2.D0*(3.D0*bielec_pxxq_no(aa,i,i,bb)-bielec_pqxx_no(aa,bb,i,i)) + end if + else + ! ia/jb + jj=list_core_inact(j) + bb=list_virt(b) + term=2.D0*(4.D0*bielec_pxxq_no(aa,i,j,bb)-bielec_pqxx_no(aa,bb,i,j) & + -bielec_pxxq_no(aa,j,i,bb)) + if (a.eq.b) then + term-=2.D0*(Fipq(ii,jj)+Fapq(ii,jj)) + end if + end if + term*=2.D0 + hessmat_iajb=term + +end function hessmat_iajb + +real*8 function hessmat_iatb(i,a,t,b) + BEGIN_DOC + ! the orbital hessian for core/inactive -> virtual, active -> virtual + END_DOC + implicit none + integer :: i,a,t,b,ii,aa,tt,bb,v,vv,x,y,v3,t3 + real*8 :: term + + ii=list_core_inact(i) + aa=list_virt(a) + tt=list_act(t) + bb=list_virt(b) + t3=t+n_core_inact_orb + term=occnum(tt)*(4.D0*bielec_pxxq_no(aa,i,t3,bb)-bielec_pxxq_no(aa,t3,i,bb)& + -bielec_pqxx_no(aa,bb,i,t3)) + if (a.eq.b) then + term-=Fipq(tt,ii)+Fapq(tt,ii) + term-=0.5D0*occnum(tt)*Fipq(tt,ii) + do v=1,n_act_orb + do x=1,n_act_orb + do y=1,n_act_orb + term-=P0tuvx_no(t,v,x,y)*bielecCI_no(x,y,v,ii) + end do + end do + end do + end if + term*=2.D0 + hessmat_iatb=term + +end function hessmat_iatb + +real*8 function hessmat_taub(t,a,u,b) + BEGIN_DOC + ! the orbital hessian for act->virt,act->virt + END_DOC + implicit none + integer :: t,a,u,b,tt,aa,uu,bb,v,vv,x,xx,y + integer :: v3,x3 + real*8 :: term,t1,t2,t3 + + tt=list_act(t) + aa=list_virt(a) + if (t == u) then + if (a == b) then + ! ta/ta + t1=occnum(tt)*Fipq(aa,aa) + t2=0.D0 + t3=0.D0 + t1-=occnum(tt)*Fipq(tt,tt) + do v=1,n_act_orb + vv=list_act(v) + v3=v+n_core_inact_orb + do x=1,n_act_orb + xx=list_act(x) + x3=x+n_core_inact_orb + t2+=2.D0*(P0tuvx_no(t,t,v,x)*bielec_pqxx_no(aa,aa,v3,x3) & + +(P0tuvx_no(t,x,v,t)+P0tuvx_no(t,x,t,v))* & + bielec_pxxq_no(aa,x3,v3,aa)) + do y=1,n_act_orb + t3-=2.D0*P0tuvx_no(t,v,x,y)*bielecCI_no(t,v,y,xx) + end do + end do + end do + term=t1+t2+t3 + else + bb=list_virt(b) + ! ta/tb b/=a + term=occnum(tt)*Fipq(aa,bb) + do v=1,n_act_orb + vv=list_act(v) + v3=v+n_core_inact_orb + do x=1,n_act_orb + xx=list_act(x) + x3=x+n_core_inact_orb + term+=2.D0*(P0tuvx_no(t,t,v,x)*bielec_pqxx_no(aa,bb,v3,x3) & + +(P0tuvx_no(t,x,v,t)+P0tuvx_no(t,x,t,v)) & + *bielec_pxxq_no(aa,x3,v3,bb)) + end do + end do + end if + else + ! ta/ub t/=u + uu=list_act(u) + bb=list_virt(b) + term=0.D0 + do v=1,n_act_orb + vv=list_act(v) + v3=v+n_core_inact_orb + do x=1,n_act_orb + xx=list_act(x) + x3=x+n_core_inact_orb + term+=2.D0*(P0tuvx_no(t,u,v,x)*bielec_pqxx_no(aa,bb,v3,x3) & + +(P0tuvx_no(t,x,v,u)+P0tuvx_no(t,x,u,v)) & + *bielec_pxxq_no(aa,x3,v3,bb)) + end do + end do + if (a.eq.b) then + term-=0.5D0*(occnum(tt)*Fipq(uu,tt)+occnum(uu)*Fipq(tt,uu)) + do v=1,n_act_orb + do y=1,n_act_orb + do x=1,n_act_orb + term-=P0tuvx_no(t,v,x,y)*bielecCI_no(x,y,v,uu) + term-=P0tuvx_no(u,v,x,y)*bielecCI_no(x,y,v,tt) + end do + end do + end do + end if + + end if + + term*=2.D0 + hessmat_taub=term + +end function hessmat_taub + +BEGIN_PROVIDER [real*8, hessdiag, (nMonoEx)] + BEGIN_DOC + ! the diagonal of the Hessian, needed for the Davidson procedure + END_DOC + implicit none + integer :: i,t,a,indx,indx_shift + real*8 :: hessmat_itju,hessmat_iajb,hessmat_taub + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessdiag,n_core_inact_orb,n_act_orb,n_virt_orb,nMonoEx) & + !$OMP PRIVATE(i,indx,t,a,indx_shift) + + !$OMP DO + do i=1,n_core_inact_orb + do t=1,n_act_orb + indx = t + (i-1)*n_act_orb + hessdiag(indx)=hessmat_itju(i,t,i,t) + end do + end do + !$OMP END DO NOWAIT + + indx_shift = n_core_inact_orb*n_act_orb + !$OMP DO + do a=1,n_virt_orb + do i=1,n_core_inact_orb + indx = a + (i-1)*n_virt_orb + indx_shift + hessdiag(indx)=hessmat_iajb(i,a,i,a) + end do + end do + !$OMP END DO NOWAIT + + indx_shift += n_core_inact_orb*n_virt_orb + !$OMP DO + do a=1,n_virt_orb + do t=1,n_act_orb + indx = a + (t-1)*n_virt_orb + indx_shift + hessdiag(indx)=hessmat_taub(t,a,t,a) + end do + end do + !$OMP END DO + !$OMP END PARALLEL + +END_PROVIDER + + +BEGIN_PROVIDER [double precision, hessmat, (nMonoEx,nMonoEx)] + implicit none + integer :: i,j,t,u,a,b + integer :: indx,indx_tmp, jndx, jndx_tmp + integer :: ustart,bstart + real*8 :: hessmat_itju + real*8 :: hessmat_itja + real*8 :: hessmat_itua + real*8 :: hessmat_iajb + real*8 :: hessmat_iatb + real*8 :: hessmat_taub + ! c-a c-v a-v + ! c-a | X X X + ! c-v | X X + ! a-v | X + + provide mo_two_e_integrals_in_map + + hessmat = 0.d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat,n_c_a_prov,list_idx_c_a,n_core_inact_orb,n_act_orb,mat_idx_c_a) & + !$OMP PRIVATE(indx_tmp,indx,i,t,j,u,ustart,jndx) + + !$OMP DO +!!!! < Core-active| H |Core-active > + ! Core-active excitations + do indx_tmp = 1, n_c_a_prov + indx = list_idx_c_a(1,indx_tmp) + i = list_idx_c_a(2,indx_tmp) + t = list_idx_c_a(3,indx_tmp) + ! Core-active excitations + do j = 1, n_core_inact_orb + if (i.eq.j) then + ustart=t + else + ustart=1 + end if + do u=ustart,n_act_orb + jndx = mat_idx_c_a(j,u) + hessmat(jndx,indx) = hessmat_itju(i,t,j,u) + hessmat(indx,jndx) = hessmat(jndx,indx) + enddo + enddo + enddo + !$OMP END DO NOWAIT + !$OMP END PARALLEL + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat,n_c_a_prov,n_c_v_prov,list_idx_c_a,list_idx_c_v) & + !$OMP PRIVATE(indx_tmp,jndx_tmp,indx,i,t,j,a,jndx) + + !$OMP DO +!!!! < Core-active| H |Core-VIRTUAL > + ! Core-active excitations + do indx_tmp = 1, n_c_a_prov + indx = list_idx_c_a(1,indx_tmp) + i = list_idx_c_a(2,indx_tmp) + t = list_idx_c_a(3,indx_tmp) + ! Core-VIRTUAL excitations + do jndx_tmp = 1, n_c_v_prov + jndx = list_idx_c_v(1,jndx_tmp) + j = list_idx_c_v(2,jndx_tmp) + a = list_idx_c_v(3,jndx_tmp) + hessmat(jndx,indx) = hessmat_itja(i,t,j,a) + hessmat(indx,jndx) = hessmat(jndx,indx) + enddo + enddo + !$OMP END DO NOWAIT + !$OMP END PARALLEL + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat,n_c_a_prov,n_a_v_prov,list_idx_c_a,list_idx_a_v) & + !$OMP PRIVATE(indx_tmp,jndx_tmp,indx,i,t,u,a,jndx) + + !$OMP DO +!!!! < Core-active| H |ACTIVE-VIRTUAL > + ! Core-active excitations + do indx_tmp = 1, n_c_a_prov + indx = list_idx_c_a(1,indx_tmp) + i = list_idx_c_a(2,indx_tmp) + t = list_idx_c_a(3,indx_tmp) + ! ACTIVE-VIRTUAL excitations + do jndx_tmp = 1, n_a_v_prov + jndx = list_idx_a_v(1,jndx_tmp) + u = list_idx_a_v(2,jndx_tmp) + a = list_idx_a_v(3,jndx_tmp) + hessmat(jndx,indx) = hessmat_itua(i,t,u,a) + hessmat(indx,jndx) = hessmat(jndx,indx) + enddo + enddo + + !$OMP END DO NOWAIT + !$OMP END PARALLEL + + + if(hess_cv_cv)then + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat,n_c_v_prov,list_idx_c_v,n_core_inact_orb,n_virt_orb,mat_idx_c_v) & + !$OMP PRIVATE(indx_tmp,indx,i,a,j,b,bstart,jndx) + !$OMP DO +!!!!! < Core-VIRTUAL | H |Core-VIRTUAL > + ! Core-VIRTUAL excitations + do indx_tmp = 1, n_c_v_prov + indx = list_idx_c_v(1,indx_tmp) + i = list_idx_c_v(2,indx_tmp) + a = list_idx_c_v(3,indx_tmp) + ! Core-VIRTUAL excitations + do j = 1, n_core_inact_orb + if (i.eq.j) then + bstart=a + else + bstart=1 + end if + do b=bstart,n_virt_orb + jndx = mat_idx_c_v(j,b) + hessmat(jndx,indx) = hessmat_iajb(i,a,j,b) + hessmat(indx,jndx) = hessmat(jndx,indx) + enddo + enddo + enddo + + !$OMP END DO NOWAIT + !$OMP END PARALLEL + endif + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat,n_c_v_prov,n_a_v_prov,list_idx_c_v,list_idx_a_v) & + !$OMP PRIVATE(indx_tmp,jndx_tmp,indx,i,a,t,b,jndx) + + !$OMP DO +!!!! < Core-VIRTUAL | H |Active-VIRTUAL > + ! Core-VIRTUAL excitations + do indx_tmp = 1, n_c_v_prov + indx = list_idx_c_v(1,indx_tmp) + i = list_idx_c_v(2,indx_tmp) + a = list_idx_c_v(3,indx_tmp) + ! Active-VIRTUAL excitations + do jndx_tmp = 1, n_a_v_prov + jndx = list_idx_a_v(1,jndx_tmp) + t = list_idx_a_v(2,jndx_tmp) + b = list_idx_a_v(3,jndx_tmp) + hessmat(jndx,indx) = hessmat_iatb(i,a,t,b) + hessmat(indx,jndx) = hessmat(jndx,indx) + enddo + enddo + !$OMP END DO NOWAIT + !$OMP END PARALLEL + + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat,n_a_v_prov,list_idx_a_v,n_act_orb,n_virt_orb,mat_idx_a_v) & + !$OMP PRIVATE(indx_tmp,indx,t,a,u,b,bstart,jndx) + + !$OMP DO +!!!! < Active-VIRTUAL | H |Active-VIRTUAL > + ! Active-VIRTUAL excitations + do indx_tmp = 1, n_a_v_prov + indx = list_idx_a_v(1,indx_tmp) + t = list_idx_a_v(2,indx_tmp) + a = list_idx_a_v(3,indx_tmp) + ! Active-VIRTUAL excitations + do u=t,n_act_orb + if (t.eq.u) then + bstart=a + else + bstart=1 + end if + do b=bstart,n_virt_orb + jndx = mat_idx_a_v(u,b) + hessmat(jndx,indx) = hessmat_taub(t,a,u,b) + hessmat(indx,jndx) = hessmat(jndx,indx) + enddo + enddo + enddo + !$OMP END DO NOWAIT + !$OMP END PARALLEL + +END_PROVIDER diff --git a/src/casscf_cipsi/hessian_old.irp.f b/src/casscf_cipsi/hessian_old.irp.f new file mode 100644 index 00000000..d17f1f0a --- /dev/null +++ b/src/casscf_cipsi/hessian_old.irp.f @@ -0,0 +1,310 @@ + +use bitmasks +BEGIN_PROVIDER [real*8, hessmat_old, (nMonoEx,nMonoEx)] + BEGIN_DOC + ! calculate the orbital hessian 2 + ! + + by hand, + ! determinant per determinant, as for the gradient + ! + ! we assume that we have natural active orbitals + END_DOC + implicit none + integer :: indx,ihole,ipart + integer :: jndx,jhole,jpart + character*3 :: iexc,jexc + real*8 :: res + + if (bavard) then + write(6,*) ' providing Hessian matrix hessmat_old ' + write(6,*) ' nMonoEx = ',nMonoEx + endif + + do indx=1,nMonoEx + do jndx=1,nMonoEx + hessmat_old(indx,jndx)=0.D0 + end do + end do + + do indx=1,nMonoEx + ihole=excit(1,indx) + ipart=excit(2,indx) + iexc=excit_class(indx) + do jndx=indx,nMonoEx + jhole=excit(1,jndx) + jpart=excit(2,jndx) + jexc=excit_class(jndx) + call calc_hess_elem(ihole,ipart,jhole,jpart,res) + hessmat_old(indx,jndx)=res + hessmat_old(jndx,indx)=res + end do + end do + +END_PROVIDER + +subroutine calc_hess_elem(ihole,ipart,jhole,jpart,res) + BEGIN_DOC + ! eq 19 of Siegbahn et al, Physica Scripta 1980 + ! we calculate 2 + ! + + + ! average over all states is performed. + ! no transition between states. + END_DOC + implicit none + integer :: ihole,ipart,ispin,mu,istate + integer :: jhole,jpart,jspin + integer :: mu_pq, mu_pqrs, mu_rs, mu_rspq, nu_rs,nu + real*8 :: res + integer(bit_kind), allocatable :: det_mu(:,:) + integer(bit_kind), allocatable :: det_nu(:,:) + integer(bit_kind), allocatable :: det_mu_pq(:,:) + integer(bit_kind), allocatable :: det_mu_rs(:,:) + integer(bit_kind), allocatable :: det_nu_rs(:,:) + integer(bit_kind), allocatable :: det_mu_pqrs(:,:) + integer(bit_kind), allocatable :: det_mu_rspq(:,:) + real*8 :: i_H_psi_array(N_states),phase,phase2,phase3 + real*8 :: i_H_j_element + allocate(det_mu(N_int,2)) + allocate(det_nu(N_int,2)) + allocate(det_mu_pq(N_int,2)) + allocate(det_mu_rs(N_int,2)) + allocate(det_nu_rs(N_int,2)) + allocate(det_mu_pqrs(N_int,2)) + allocate(det_mu_rspq(N_int,2)) + integer :: mu_pq_possible + integer :: mu_rs_possible + integer :: nu_rs_possible + integer :: mu_pqrs_possible + integer :: mu_rspq_possible + + res=0.D0 + + ! the terms <0|E E H |0> + do mu=1,n_det + ! get the string of the determinant + call det_extract(det_mu,mu,N_int) + do ispin=1,2 + ! do the monoexcitation pq on it + call det_copy(det_mu,det_mu_pq,N_int) + call do_signed_mono_excitation(det_mu,det_mu_pq,mu_pq & + ,ihole,ipart,ispin,phase,mu_pq_possible) + if (mu_pq_possible.eq.1) then + ! possible, but not necessarily in the list + ! do the second excitation + do jspin=1,2 + call det_copy(det_mu_pq,det_mu_pqrs,N_int) + call do_signed_mono_excitation(det_mu_pq,det_mu_pqrs,mu_pqrs& + ,jhole,jpart,jspin,phase2,mu_pqrs_possible) + ! excitation possible + if (mu_pqrs_possible.eq.1) then + call i_H_psi(det_mu_pqrs,psi_det,psi_coef,N_int & + ,N_det,N_det,N_states,i_H_psi_array) + do istate=1,N_states + res+=i_H_psi_array(istate)*psi_coef(mu,istate)*phase*phase2 + end do + end if + ! try the de-excitation with opposite sign + call det_copy(det_mu_pq,det_mu_pqrs,N_int) + call do_signed_mono_excitation(det_mu_pq,det_mu_pqrs,mu_pqrs& + ,jpart,jhole,jspin,phase2,mu_pqrs_possible) + phase2=-phase2 + ! excitation possible + if (mu_pqrs_possible.eq.1) then + call i_H_psi(det_mu_pqrs,psi_det,psi_coef,N_int & + ,N_det,N_det,N_states,i_H_psi_array) + do istate=1,N_states + res+=i_H_psi_array(istate)*psi_coef(mu,istate)*phase*phase2 + end do + end if + end do + end if + ! exchange the notion of pq and rs + ! do the monoexcitation rs on the initial determinant + call det_copy(det_mu,det_mu_rs,N_int) + call do_signed_mono_excitation(det_mu,det_mu_rs,mu_rs & + ,jhole,jpart,ispin,phase2,mu_rs_possible) + if (mu_rs_possible.eq.1) then + ! do the second excitation + do jspin=1,2 + call det_copy(det_mu_rs,det_mu_rspq,N_int) + call do_signed_mono_excitation(det_mu_rs,det_mu_rspq,mu_rspq& + ,ihole,ipart,jspin,phase3,mu_rspq_possible) + ! excitation possible (of course, the result is outside the CAS) + if (mu_rspq_possible.eq.1) then + call i_H_psi(det_mu_rspq,psi_det,psi_coef,N_int & + ,N_det,N_det,N_states,i_H_psi_array) + do istate=1,N_states + res+=i_H_psi_array(istate)*psi_coef(mu,istate)*phase2*phase3 + end do + end if + ! we may try the de-excitation, with opposite sign + call det_copy(det_mu_rs,det_mu_rspq,N_int) + call do_signed_mono_excitation(det_mu_rs,det_mu_rspq,mu_rspq& + ,ipart,ihole,jspin,phase3,mu_rspq_possible) + phase3=-phase3 + ! excitation possible (of course, the result is outside the CAS) + if (mu_rspq_possible.eq.1) then + call i_H_psi(det_mu_rspq,psi_det,psi_coef,N_int & + ,N_det,N_det,N_states,i_H_psi_array) + do istate=1,N_states + res+=i_H_psi_array(istate)*psi_coef(mu,istate)*phase2*phase3 + end do + end if + end do + end if + ! + ! the operator E H E, we have to do a double loop over the determinants + ! we still have the determinant mu_pq and the phase in memory + if (mu_pq_possible.eq.1) then + do nu=1,N_det + call det_extract(det_nu,nu,N_int) + do jspin=1,2 + call det_copy(det_nu,det_nu_rs,N_int) + call do_signed_mono_excitation(det_nu,det_nu_rs,nu_rs & + ,jhole,jpart,jspin,phase2,nu_rs_possible) + ! excitation possible ? + if (nu_rs_possible.eq.1) then + call i_H_j(det_mu_pq,det_nu_rs,N_int,i_H_j_element) + do istate=1,N_states + res+=2.D0*i_H_j_element*psi_coef(mu,istate) & + *psi_coef(nu,istate)*phase*phase2 + end do + end if + end do + end do + end if + end do + end do + + ! state-averaged Hessian + res*=1.D0/dble(N_states) + +end subroutine calc_hess_elem + +BEGIN_PROVIDER [real*8, hessmat_peter, (nMonoEx,nMonoEx)] + BEGIN_DOC + ! explicit hessian matrix from density matrices and integrals + ! of course, this will be used for a direct Davidson procedure later + ! we will not store the matrix in real life + ! formulas are broken down as functions for the 6 classes of matrix elements + ! + END_DOC + implicit none + integer :: i,j,t,u,a,b,indx,jndx,bstart,ustart,indx_shift + + real*8 :: hessmat_itju + real*8 :: hessmat_itja + real*8 :: hessmat_itua + real*8 :: hessmat_iajb + real*8 :: hessmat_iatb + real*8 :: hessmat_taub + + if (bavard) then + write(6,*) ' providing Hessian matrix hessmat_peter ' + write(6,*) ' nMonoEx = ',nMonoEx + endif + provide mo_two_e_integrals_in_map + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat_peter,n_core_inact_orb,n_act_orb,n_virt_orb,nMonoEx) & + !$OMP PRIVATE(i,indx,jndx,j,ustart,t,u,a,bstart,indx_shift) + + !$OMP DO + ! (DOUBLY OCCUPIED ---> ACT ) + do i=1,n_core_inact_orb + do t=1,n_act_orb + indx = t + (i-1)*n_act_orb + jndx=indx + ! (DOUBLY OCCUPIED ---> ACT ) + do j=i,n_core_inact_orb + if (i.eq.j) then + ustart=t + else + ustart=1 + end if + do u=ustart,n_act_orb + hessmat_peter(jndx,indx)=hessmat_itju(i,t,j,u) + jndx+=1 + end do + end do + ! (DOUBLY OCCUPIED ---> VIRTUAL) + do j=1,n_core_inact_orb + do a=1,n_virt_orb + hessmat_peter(jndx,indx)=hessmat_itja(i,t,j,a) + jndx+=1 + end do + end do + ! (ACTIVE ---> VIRTUAL) + do u=1,n_act_orb + do a=1,n_virt_orb + hessmat_peter(jndx,indx)=hessmat_itua(i,t,u,a) + jndx+=1 + end do + end do + end do + end do + !$OMP END DO NOWAIT + + indx_shift = n_core_inact_orb*n_act_orb + !$OMP DO + ! (DOUBLY OCCUPIED ---> VIRTUAL) + do a=1,n_virt_orb + do i=1,n_core_inact_orb + indx = a + (i-1)*n_virt_orb + indx_shift + jndx=indx + ! (DOUBLY OCCUPIED ---> VIRTUAL) + do j=i,n_core_inact_orb + if (i.eq.j) then + bstart=a + else + bstart=1 + end if + do b=bstart,n_virt_orb + hessmat_peter(jndx,indx)=hessmat_iajb(i,a,j,b) + jndx+=1 + end do + end do + ! (ACT ---> VIRTUAL) + do t=1,n_act_orb + do b=1,n_virt_orb + hessmat_peter(jndx,indx)=hessmat_iatb(i,a,t,b) + jndx+=1 + end do + end do + end do + end do + !$OMP END DO NOWAIT + + indx_shift += n_core_inact_orb*n_virt_orb + !$OMP DO + ! (ACT ---> VIRTUAL) + do a=1,n_virt_orb + do t=1,n_act_orb + indx = a + (t-1)*n_virt_orb + indx_shift + jndx=indx + ! (ACT ---> VIRTUAL) + do u=t,n_act_orb + if (t.eq.u) then + bstart=a + else + bstart=1 + end if + do b=bstart,n_virt_orb + hessmat_peter(jndx,indx)=hessmat_taub(t,a,u,b) + jndx+=1 + end do + end do + end do + end do + !$OMP END DO + + !$OMP END PARALLEL + + do jndx=1,nMonoEx + do indx=1,jndx-1 + hessmat_peter(indx,jndx) = hessmat_peter(jndx,indx) + enddo + enddo + + +END_PROVIDER + diff --git a/src/casscf_cipsi/mcscf_fock.irp.f b/src/casscf_cipsi/mcscf_fock.irp.f new file mode 100644 index 00000000..e4568405 --- /dev/null +++ b/src/casscf_cipsi/mcscf_fock.irp.f @@ -0,0 +1,80 @@ +BEGIN_PROVIDER [real*8, Fipq, (mo_num,mo_num) ] + BEGIN_DOC + ! the inactive Fock matrix, in molecular orbitals + END_DOC + implicit none + integer :: p,q,k,kk,t,tt,u,uu + + do q=1,mo_num + do p=1,mo_num + Fipq(p,q)=one_ints_no(p,q) + end do + end do + + ! the inactive Fock matrix + do k=1,n_core_inact_orb + kk=list_core_inact(k) + do q=1,mo_num + do p=1,mo_num + Fipq(p,q)+=2.D0*bielec_pqxx_no(p,q,k,k) -bielec_pxxq_no(p,k,k,q) + end do + end do + end do + + if (bavard) then + integer :: i + write(6,*) + write(6,*) ' the diagonal of the inactive effective Fock matrix ' + write(6,'(5(i3,F12.5))') (i,Fipq(i,i),i=1,mo_num) + write(6,*) + end if + + +END_PROVIDER + + +BEGIN_PROVIDER [real*8, Fapq, (mo_num,mo_num) ] + BEGIN_DOC + ! the active active Fock matrix, in molecular orbitals + ! we create them in MOs, quite expensive + ! + ! for an implementation in AOs we need first the natural orbitals + ! for forming an active density matrix in AOs + ! + END_DOC + implicit none + integer :: p,q,k,kk,t,tt,u,uu + + Fapq = 0.d0 + + ! the active Fock matrix, D0tu is diagonal + do t=1,n_act_orb + tt=list_act(t) + do q=1,mo_num + do p=1,mo_num + Fapq(p,q)+=occnum(tt) & + *(bielec_pqxx_no(p,q,tt,tt)-0.5D0*bielec_pxxq_no(p,tt,tt,q)) + end do + end do + end do + + if (bavard) then + integer :: i + write(6,*) + write(6,*) ' the effective Fock matrix over MOs' + write(6,*) + + write(6,*) + write(6,*) ' the diagonal of the inactive effective Fock matrix ' + write(6,'(5(i3,F12.5))') (i,Fipq(i,i),i=1,mo_num) + write(6,*) + write(6,*) + write(6,*) ' the diagonal of the active Fock matrix ' + write(6,'(5(i3,F12.5))') (i,Fapq(i,i),i=1,mo_num) + write(6,*) + end if + + +END_PROVIDER + + diff --git a/src/casscf_cipsi/natorb.irp.f b/src/casscf_cipsi/natorb.irp.f new file mode 100644 index 00000000..9ce90304 --- /dev/null +++ b/src/casscf_cipsi/natorb.irp.f @@ -0,0 +1,231 @@ + BEGIN_PROVIDER [real*8, occnum, (mo_num)] + implicit none + BEGIN_DOC + ! MO occupation numbers + END_DOC + + integer :: i + occnum=0.D0 + do i=1,n_core_inact_orb + occnum(list_core_inact(i))=2.D0 + end do + + do i=1,n_act_orb + occnum(list_act(i))=occ_act(i) + end do + + if (bavard) then + write(6,*) ' occupation numbers ' + do i=1,mo_num + write(6,*) i,occnum(i) + end do + endif + +END_PROVIDER + + + BEGIN_PROVIDER [ real*8, natorbsCI, (n_act_orb,n_act_orb) ] +&BEGIN_PROVIDER [ real*8, occ_act, (n_act_orb) ] + implicit none + BEGIN_DOC + ! Natural orbitals of CI + END_DOC + integer :: i, j + double precision :: Vt(n_act_orb,n_act_orb) + +! call lapack_diag(occ_act,natorbsCI,D0tu,n_act_orb,n_act_orb) + call svd(D0tu, size(D0tu,1), natorbsCI,size(natorbsCI,1), occ_act, Vt, size(Vt,1),n_act_orb,n_act_orb) + + if (bavard) then + write(6,*) ' found occupation numbers as ' + do i=1,n_act_orb + write(6,*) i,occ_act(i) + end do + + integer :: nmx + real*8 :: xmx + do i=1,n_act_orb + ! largest element of the eigenvector should be positive + xmx=0.D0 + nmx=0 + do j=1,n_act_orb + if (abs(natOrbsCI(j,i)).gt.xmx) then + nmx=j + xmx=abs(natOrbsCI(j,i)) + end if + end do + xmx=sign(1.D0,natOrbsCI(nmx,i)) + do j=1,n_act_orb + natOrbsCI(j,i)*=xmx + end do + + write(6,*) ' Eigenvector No ',i + write(6,'(5(I3,F12.5))') (j,natOrbsCI(j,i),j=1,n_act_orb) + end do + end if + +END_PROVIDER + + +BEGIN_PROVIDER [real*8, P0tuvx_no, (n_act_orb,n_act_orb,n_act_orb,n_act_orb)] + implicit none + BEGIN_DOC + ! 4-index transformation of 2part matrices + END_DOC + integer :: i,j,k,l,p,q + real*8 :: d(n_act_orb) + + ! index per index + ! first quarter + P0tuvx_no(:,:,:,:) = P0tuvx(:,:,:,:) + + do j=1,n_act_orb + do k=1,n_act_orb + do l=1,n_act_orb + do p=1,n_act_orb + d(p)=0.D0 + end do + do p=1,n_act_orb + do q=1,n_act_orb + d(p)+=P0tuvx_no(q,j,k,l)*natorbsCI(q,p) + end do + end do + do p=1,n_act_orb + P0tuvx_no(p,j,k,l)=d(p) + end do + end do + end do + end do + ! 2nd quarter + do j=1,n_act_orb + do k=1,n_act_orb + do l=1,n_act_orb + do p=1,n_act_orb + d(p)=0.D0 + end do + do p=1,n_act_orb + do q=1,n_act_orb + d(p)+=P0tuvx_no(j,q,k,l)*natorbsCI(q,p) + end do + end do + do p=1,n_act_orb + P0tuvx_no(j,p,k,l)=d(p) + end do + end do + end do + end do + ! 3rd quarter + do j=1,n_act_orb + do k=1,n_act_orb + do l=1,n_act_orb + do p=1,n_act_orb + d(p)=0.D0 + end do + do p=1,n_act_orb + do q=1,n_act_orb + d(p)+=P0tuvx_no(j,k,q,l)*natorbsCI(q,p) + end do + end do + do p=1,n_act_orb + P0tuvx_no(j,k,p,l)=d(p) + end do + end do + end do + end do + ! 4th quarter + do j=1,n_act_orb + do k=1,n_act_orb + do l=1,n_act_orb + do p=1,n_act_orb + d(p)=0.D0 + end do + do p=1,n_act_orb + do q=1,n_act_orb + d(p)+=P0tuvx_no(j,k,l,q)*natorbsCI(q,p) + end do + end do + do p=1,n_act_orb + P0tuvx_no(j,k,l,p)=d(p) + end do + end do + end do + end do + +END_PROVIDER + + + +BEGIN_PROVIDER [real*8, one_ints_no, (mo_num,mo_num)] + implicit none + BEGIN_DOC + ! Transformed one-e integrals + END_DOC + integer :: i,j, p, q + real*8 :: d(n_act_orb) + one_ints_no(:,:)=mo_one_e_integrals(:,:) + + ! 1st half-trf + do j=1,mo_num + do p=1,n_act_orb + d(p)=0.D0 + end do + do p=1,n_act_orb + do q=1,n_act_orb + d(p)+=one_ints_no(list_act(q),j)*natorbsCI(q,p) + end do + end do + do p=1,n_act_orb + one_ints_no(list_act(p),j)=d(p) + end do + end do + + ! 2nd half-trf + do j=1,mo_num + do p=1,n_act_orb + d(p)=0.D0 + end do + do p=1,n_act_orb + do q=1,n_act_orb + d(p)+=one_ints_no(j,list_act(q))*natorbsCI(q,p) + end do + end do + do p=1,n_act_orb + one_ints_no(j,list_act(p))=d(p) + end do + end do +END_PROVIDER + + +BEGIN_PROVIDER [ double precision, NatOrbsCI_mos, (mo_num, mo_num) ] + implicit none + BEGIN_DOC + ! Rotation matrix from current MOs to the CI natural MOs + END_DOC + integer :: p,q + + NatOrbsCI_mos(:,:) = 0.d0 + + do q = 1,mo_num + NatOrbsCI_mos(q,q) = 1.d0 + enddo + + do q = 1,n_act_orb + do p = 1,n_act_orb + NatOrbsCI_mos(list_act(p),list_act(q)) = natorbsCI(p,q) + enddo + enddo +END_PROVIDER + + +BEGIN_PROVIDER [real*8, NatOrbsFCI, (ao_num,mo_num)] + implicit none + BEGIN_DOC +! FCI natural orbitals + END_DOC + + call dgemm('N','N', ao_num,mo_num,mo_num,1.d0, & + mo_coef, size(mo_coef,1), & + NatOrbsCI_mos, size(NatOrbsCI_mos,1), 0.d0, & + NatOrbsFCI, size(NatOrbsFCI,1)) +END_PROVIDER + diff --git a/src/casscf_cipsi/neworbs.irp.f b/src/casscf_cipsi/neworbs.irp.f new file mode 100644 index 00000000..a7cebbb2 --- /dev/null +++ b/src/casscf_cipsi/neworbs.irp.f @@ -0,0 +1,253 @@ + BEGIN_PROVIDER [real*8, SXmatrix, (nMonoEx+1,nMonoEx+1)] +&BEGIN_PROVIDER [integer, n_guess_sx_mat ] + implicit none + BEGIN_DOC + ! Single-excitation matrix + END_DOC + + integer :: i,j + + do i=1,nMonoEx+1 + do j=1,nMonoEx+1 + SXmatrix(i,j)=0.D0 + end do + end do + + do i=1,nMonoEx + SXmatrix(1,i+1)=gradvec2(i) + SXmatrix(1+i,1)=gradvec2(i) + end do + if(diag_hess_cas)then + do i = 1, nMonoEx + SXmatrix(i+1,i+1) = hessdiag(i) + enddo + else + do i=1,nMonoEx + do j=1,nMonoEx + SXmatrix(i+1,j+1)=hessmat(i,j) + SXmatrix(j+1,i+1)=hessmat(i,j) + end do + end do + endif + + do i = 1, nMonoEx + SXmatrix(i+1,i+1) += level_shift_casscf + enddo + n_guess_sx_mat = 1 + do i = 1, nMonoEx + if(SXmatrix(i+1,i+1).lt.0.d0 )then + n_guess_sx_mat += 1 + endif + enddo + if (bavard) then + do i=2,nMonoEx + write(6,*) ' diagonal of the Hessian : ',i,hessmat(i,i) + end do + end if + +END_PROVIDER + + BEGIN_PROVIDER [real*8, SXeigenvec, (nMonoEx+1,nMonoEx+1)] +&BEGIN_PROVIDER [real*8, SXeigenval, (nMonoEx+1)] + implicit none + BEGIN_DOC + ! Eigenvectors/eigenvalues of the single-excitation matrix + END_DOC + if(nMonoEx+1.gt.n_det_max_full)then + if(bavard)then + print*,'Using the Davidson algorithm to diagonalize the SXmatrix' + endif + double precision, allocatable :: u_in(:,:),energies(:) + allocate(u_in(nMonoEx+1,n_states_diag),energies(n_guess_sx_mat)) + call davidson_diag_sx_mat(n_guess_sx_mat, u_in, energies) + integer :: i,j + SXeigenvec = 0.d0 + SXeigenval = 0.d0 + do i = 1, n_guess_sx_mat + SXeigenval(i) = energies(i) + do j = 1, nMonoEx+1 + SXeigenvec(j,i) = u_in(j,i) + enddo + enddo + else + if(bavard)then + print*,'Diagonalize the SXmatrix with Jacobi' + endif + call lapack_diag(SXeigenval,SXeigenvec,SXmatrix,nMonoEx+1,nMonoEx+1) + endif + if (bavard) then + write(6,*) ' SXdiag : lowest eigenvalues ' + write(6,*) ' 1 - ',SXeigenval(1),SXeigenvec(1,1) + if(n_guess_sx_mat.gt.0)then + write(6,*) ' 2 - ',SXeigenval(2),SXeigenvec(1,2) + write(6,*) ' 3 - ',SXeigenval(3),SXeigenvec(1,3) + write(6,*) ' 4 - ',SXeigenval(4),SXeigenvec(1,4) + write(6,*) ' 5 - ',SXeigenval(5),SXeigenvec(1,5) + endif + write(6,*) + write(6,*) ' SXdiag : lowest eigenvalue = ',SXeigenval(1) + endif +END_PROVIDER + + BEGIN_PROVIDER [real*8, energy_improvement] + implicit none + if(state_following_casscf)then + energy_improvement = SXeigenval(best_vector_ovrlp_casscf) + else + energy_improvement = SXeigenval(1) + endif + END_PROVIDER + + + + BEGIN_PROVIDER [ integer, best_vector_ovrlp_casscf ] +&BEGIN_PROVIDER [ double precision, best_overlap_casscf ] + implicit none + integer :: i + double precision :: c0 + best_overlap_casscf = 0.D0 + best_vector_ovrlp_casscf = -1000 + do i=1,nMonoEx+1 + if (SXeigenval(i).lt.0.D0) then + if (dabs(SXeigenvec(1,i)).gt.best_overlap_casscf) then + best_overlap_casscf=dabs(SXeigenvec(1,i)) + best_vector_ovrlp_casscf = i + end if + end if + end do + if(best_vector_ovrlp_casscf.lt.0)then + best_vector_ovrlp_casscf = minloc(SXeigenval,nMonoEx+1) + endif + c0=SXeigenvec(1,best_vector_ovrlp_casscf) + if (bavard) then + write(6,*) ' SXdiag : eigenvalue for best overlap with ' + write(6,*) ' previous orbitals = ',SXeigenval(best_vector_ovrlp_casscf) + write(6,*) ' weight of the 1st element ',c0 + endif + END_PROVIDER + + BEGIN_PROVIDER [double precision, SXvector, (nMonoEx+1)] + implicit none + BEGIN_DOC + ! Best eigenvector of the single-excitation matrix + END_DOC + integer :: i + double precision :: c0 + c0=SXeigenvec(1,best_vector_ovrlp_casscf) + do i=1,nMonoEx+1 + SXvector(i)=SXeigenvec(i,best_vector_ovrlp_casscf)/c0 + end do + END_PROVIDER + + +BEGIN_PROVIDER [double precision, NewOrbs, (ao_num,mo_num) ] + implicit none + BEGIN_DOC + ! Updated orbitals + END_DOC + integer :: i,j,ialph + + if(state_following_casscf)then + print*,'Using the state following casscf ' + call dgemm('N','T', ao_num,mo_num,mo_num,1.d0, & + NatOrbsFCI, size(NatOrbsFCI,1), & + Umat, size(Umat,1), 0.d0, & + NewOrbs, size(NewOrbs,1)) + + level_shift_casscf *= 0.5D0 + level_shift_casscf = max(level_shift_casscf,0.002d0) + !touch level_shift_casscf + else + if(best_vector_ovrlp_casscf.ne.1.and.n_orb_swap.ne.0)then + print*,'Taking the lowest root for the CASSCF' + print*,'!!! SWAPPING MOS !!!!!!' + level_shift_casscf *= 2.D0 + level_shift_casscf = min(level_shift_casscf,0.5d0) + print*,'level_shift_casscf = ',level_shift_casscf + NewOrbs = switch_mo_coef + !mo_coef = switch_mo_coef + !soft_touch mo_coef + !call save_mos_no_occ + !stop + else + level_shift_casscf *= 0.5D0 + level_shift_casscf = max(level_shift_casscf,0.002d0) + !touch level_shift_casscf + call dgemm('N','T', ao_num,mo_num,mo_num,1.d0, & + NatOrbsFCI, size(NatOrbsFCI,1), & + Umat, size(Umat,1), 0.d0, & + NewOrbs, size(NewOrbs,1)) + endif + endif + +END_PROVIDER + +BEGIN_PROVIDER [real*8, Umat, (mo_num,mo_num) ] + implicit none + BEGIN_DOC + ! Orbital rotation matrix + END_DOC + integer :: i,j,indx,k,iter,t,a,ii,tt,aa + logical :: converged + + real*8 :: Tpotmat (mo_num,mo_num), Tpotmat2 (mo_num,mo_num) + real*8 :: Tmat(mo_num,mo_num) + real*8 :: f + + ! the orbital rotation matrix T + Tmat(:,:)=0.D0 + indx=1 + do i=1,n_core_inact_orb + ii=list_core_inact(i) + do t=1,n_act_orb + tt=list_act(t) + indx+=1 + Tmat(ii,tt)= SXvector(indx) + Tmat(tt,ii)=-SXvector(indx) + end do + end do + do i=1,n_core_inact_orb + ii=list_core_inact(i) + do a=1,n_virt_orb + aa=list_virt(a) + indx+=1 + Tmat(ii,aa)= SXvector(indx) + Tmat(aa,ii)=-SXvector(indx) + end do + end do + do t=1,n_act_orb + tt=list_act(t) + do a=1,n_virt_orb + aa=list_virt(a) + indx+=1 + Tmat(tt,aa)= SXvector(indx) + Tmat(aa,tt)=-SXvector(indx) + end do + end do + + ! Form the exponential + + Tpotmat(:,:)=0.D0 + Umat(:,:) =0.D0 + do i=1,mo_num + Tpotmat(i,i)=1.D0 + Umat(i,i) =1.d0 + end do + iter=0 + converged=.false. + do while (.not.converged) + iter+=1 + f = 1.d0 / dble(iter) + Tpotmat2(:,:) = Tpotmat(:,:) * f + call dgemm('N','N', mo_num,mo_num,mo_num,1.d0, & + Tpotmat2, size(Tpotmat2,1), & + Tmat, size(Tmat,1), 0.d0, & + Tpotmat, size(Tpotmat,1)) + Umat(:,:) = Umat(:,:) + Tpotmat(:,:) + + converged = ( sum(abs(Tpotmat(:,:))) < 1.d-6).or.(iter>30) + end do +END_PROVIDER + + + diff --git a/src/casscf_cipsi/reorder_orb.irp.f b/src/casscf_cipsi/reorder_orb.irp.f new file mode 100644 index 00000000..3cb90522 --- /dev/null +++ b/src/casscf_cipsi/reorder_orb.irp.f @@ -0,0 +1,70 @@ +subroutine reorder_orbitals_for_casscf + implicit none + BEGIN_DOC +! routine that reorders the orbitals of the CASSCF in terms block of core, active and virtual + END_DOC + integer :: i,j,iorb + integer, allocatable :: iorder(:),array(:) + allocate(iorder(mo_num),array(mo_num)) + do i = 1, n_core_orb + iorb = list_core(i) + array(iorb) = i + enddo + + do i = 1, n_inact_orb + iorb = list_inact(i) + array(iorb) = mo_num + i + enddo + + do i = 1, n_act_orb + iorb = list_act(i) + array(iorb) = 2 * mo_num + i + enddo + + do i = 1, n_virt_orb + iorb = list_virt(i) + array(iorb) = 3 * mo_num + i + enddo + + do i = 1, mo_num + iorder(i) = i + enddo + call isort(array,iorder,mo_num) + double precision, allocatable :: mo_coef_new(:,:) + allocate(mo_coef_new(ao_num,mo_num)) + do i = 1, mo_num + mo_coef_new(:,i) = mo_coef(:,iorder(i)) + enddo + mo_coef = mo_coef_new + touch mo_coef + + list_core_reverse = 0 + do i = 1, n_core_orb + list_core(i) = i + list_core_reverse(i) = i + mo_class(i) = "Core" + enddo + + list_inact_reverse = 0 + do i = 1, n_inact_orb + list_inact(i) = i + n_core_orb + list_inact_reverse(i+n_core_orb) = i + mo_class(i+n_core_orb) = "Inactive" + enddo + + list_act_reverse = 0 + do i = 1, n_act_orb + list_act(i) = n_core_inact_orb + i + list_act_reverse(n_core_inact_orb + i) = i + mo_class(n_core_inact_orb + i) = "Active" + enddo + + list_virt_reverse = 0 + do i = 1, n_virt_orb + list_virt(i) = n_core_inact_orb + n_act_orb + i + list_virt_reverse(n_core_inact_orb + n_act_orb + i) = i + mo_class(n_core_inact_orb + n_act_orb + i) = "Virtual" + enddo + touch list_core_reverse list_core list_inact list_inact_reverse list_act list_act_reverse list_virt list_virt_reverse + +end diff --git a/src/casscf_cipsi/save_energy.irp.f b/src/casscf_cipsi/save_energy.irp.f new file mode 100644 index 00000000..8729c5af --- /dev/null +++ b/src/casscf_cipsi/save_energy.irp.f @@ -0,0 +1,9 @@ +subroutine save_energy(E,pt2) + implicit none + BEGIN_DOC +! Saves the energy in |EZFIO|. + END_DOC + double precision, intent(in) :: E(N_states), pt2(N_states) + call ezfio_set_casscf_energy(E(1:N_states)) + call ezfio_set_casscf_energy_pt2(E(1:N_states)+pt2(1:N_states)) +end diff --git a/src/casscf_cipsi/superci_dm.irp.f b/src/casscf_cipsi/superci_dm.irp.f new file mode 100644 index 00000000..ee831c35 --- /dev/null +++ b/src/casscf_cipsi/superci_dm.irp.f @@ -0,0 +1,207 @@ + BEGIN_PROVIDER [double precision, super_ci_dm, (mo_num,mo_num)] + implicit none + BEGIN_DOC +! density matrix of the super CI matrix, in the basis of NATURAL ORBITALS OF THE CASCI WF +! +! This is obtained from annex B of Roos et. al. Chemical Physics 48 (1980) 157-173 +! +! WARNING ::: in the equation B3.d there is a TYPO with a forgotten MINUS SIGN (see variable mat_tmp_dm_super_ci ) + END_DOC + super_ci_dm = 0.d0 + integer :: i,j,iorb,jorb + integer :: a,aorb,b,borb + integer :: t,torb,v,vorb,u,uorb,x,xorb + double precision :: c0,ci + c0 = SXeigenvec(1,1) + ! equation B3.a of the annex B of Roos et. al. Chemical Physics 48 (1980) 157-173 + ! loop over the core/inact + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + super_ci_dm(iorb,iorb) = 2.d0 ! first term of B3.a + ! loop over the core/inact + do j = 1, n_core_inact_orb + jorb = list_core_inact(j) + ! loop over the virtual + do a = 1, n_virt_orb + aorb = list_virt(a) + super_ci_dm(jorb,iorb) += -2.d0 * lowest_super_ci_coef_mo(aorb,iorb) * lowest_super_ci_coef_mo(aorb,jorb) ! second term in B3.a + enddo + do t = 1, n_act_orb + torb = list_act(t) + ! thrid term of the B3.a + super_ci_dm(jorb,iorb) += - lowest_super_ci_coef_mo(iorb,torb) * lowest_super_ci_coef_mo(jorb,torb) * (2.d0 - occ_act(t)) + enddo + enddo + enddo + + ! equation B3.b of the annex B of Roos et. al. Chemical Physics 48 (1980) 157-173 + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + do t = 1, n_act_orb + torb = list_act(t) + super_ci_dm(iorb,torb) = c0 * lowest_super_ci_coef_mo(torb,iorb) * (2.d0 - occ_act(t)) + super_ci_dm(torb,iorb) = c0 * lowest_super_ci_coef_mo(torb,iorb) * (2.d0 - occ_act(t)) + do a = 1, n_virt_orb + aorb = list_virt(a) + super_ci_dm(iorb,torb) += - lowest_super_ci_coef_mo(aorb,iorb) * lowest_super_ci_coef_mo(aorb,torb) * occ_act(t) + super_ci_dm(torb,iorb) += - lowest_super_ci_coef_mo(aorb,iorb) * lowest_super_ci_coef_mo(aorb,torb) * occ_act(t) + enddo + enddo + enddo + + ! equation B3.c of the annex B of Roos et. al. Chemical Physics 48 (1980) 157-173 + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + do a = 1, n_virt_orb + aorb = list_virt(a) + super_ci_dm(aorb,iorb) = 2.d0 * c0 * lowest_super_ci_coef_mo(aorb,iorb) + super_ci_dm(iorb,aorb) = 2.d0 * c0 * lowest_super_ci_coef_mo(aorb,iorb) + enddo + enddo + + ! equation B3.d of the annex B of Roos et. al. Chemical Physics 48 (1980) 157-173 + do t = 1, n_act_orb + torb = list_act(t) + super_ci_dm(torb,torb) = occ_act(t) ! first term of equation B3.d + do x = 1, n_act_orb + xorb = list_act(x) + super_ci_dm(torb,torb) += - occ_act(x) * occ_act(t)* mat_tmp_dm_super_ci(x,x) ! second term involving the ONE-rdm + enddo + do u = 1, n_act_orb + uorb = list_act(u) + + ! second term of equation B3.d + do x = 1, n_act_orb + xorb = list_act(x) + do v = 1, n_act_orb + vorb = list_act(v) + super_ci_dm(torb,uorb) += 2.d0 * P0tuvx_no(v,x,t,u) * mat_tmp_dm_super_ci(v,x) ! second term involving the TWO-rdm + enddo + enddo + + ! third term of equation B3.d + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + super_ci_dm(torb,uorb) += lowest_super_ci_coef_mo(iorb,torb) * lowest_super_ci_coef_mo(iorb,uorb) * (2.d0 - occ_act(t) - occ_act(u)) + enddo + + enddo + enddo + + ! equation B3.e of the annex B of Roos et. al. Chemical Physics 48 (1980) 157-173 + do t = 1, n_act_orb + torb = list_act(t) + do a = 1, n_virt_orb + aorb = list_virt(a) + super_ci_dm(aorb,torb) += c0 * lowest_super_ci_coef_mo(aorb,torb) * occ_act(t) + super_ci_dm(torb,aorb) += c0 * lowest_super_ci_coef_mo(aorb,torb) * occ_act(t) + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + super_ci_dm(aorb,torb) += lowest_super_ci_coef_mo(iorb,aorb) * lowest_super_ci_coef_mo(iorb,torb) * (2.d0 - occ_act(t)) + super_ci_dm(torb,aorb) += lowest_super_ci_coef_mo(iorb,aorb) * lowest_super_ci_coef_mo(iorb,torb) * (2.d0 - occ_act(t)) + enddo + enddo + enddo + + ! equation B3.f of the annex B of Roos et. al. Chemical Physics 48 (1980) 157-173 + do a = 1, n_virt_orb + aorb = list_virt(a) + do b = 1, n_virt_orb + borb= list_virt(b) + + ! First term of equation B3.f + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + super_ci_dm(borb,aorb) += 2.d0 * lowest_super_ci_coef_mo(iorb,aorb) * lowest_super_ci_coef_mo(iorb,borb) + enddo + + ! Second term of equation B3.f + do t = 1, n_act_orb + torb = list_act(t) + super_ci_dm(borb,aorb) += lowest_super_ci_coef_mo(torb,aorb) * lowest_super_ci_coef_mo(torb,borb) * occ_act(t) + enddo + enddo + enddo + + END_PROVIDER + + BEGIN_PROVIDER [double precision, superci_natorb, (ao_num,mo_num) +&BEGIN_PROVIDER [double precision, superci_nat_occ, (mo_num) + implicit none + call general_mo_coef_new_as_svd_vectors_of_mo_matrix_eig(super_ci_dm,mo_num,mo_num,mo_num,NatOrbsFCI,superci_nat_occ,superci_natorb) + +END_PROVIDER + + BEGIN_PROVIDER [double precision, mat_tmp_dm_super_ci, (n_act_orb,n_act_orb)] + implicit none + BEGIN_DOC + ! computation of the term in [ ] in the equation B3.d of Roos et. al. Chemical Physics 48 (1980) 157-173 + ! + ! !!!!! WARNING !!!!!! there is a TYPO: a MINUS SIGN SHOULD APPEAR in that term + END_DOC + integer :: a,aorb,i,iorb + integer :: x,xorb,v,vorb + mat_tmp_dm_super_ci = 0.d0 + do v = 1, n_act_orb + vorb = list_act(v) + do x = 1, n_act_orb + xorb = list_act(x) + do a = 1, n_virt_orb + aorb = list_virt(a) + mat_tmp_dm_super_ci(x,v) += lowest_super_ci_coef_mo(aorb,vorb) * lowest_super_ci_coef_mo(aorb,xorb) + enddo + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + ! MARK THE MINUS SIGN HERE !!!!!!!!!!! BECAUSE OF TYPO IN THE ORIGINAL PAPER + mat_tmp_dm_super_ci(x,v) -= lowest_super_ci_coef_mo(iorb,vorb) * lowest_super_ci_coef_mo(iorb,xorb) + enddo + enddo + enddo + END_PROVIDER + + BEGIN_PROVIDER [double precision, lowest_super_ci_coef_mo, (mo_num,mo_num)] + implicit none + integer :: i,j,iorb,jorb + integer :: a, aorb,t, torb + double precision :: sqrt2 + + sqrt2 = 1.d0/dsqrt(2.d0) + do i = 1, nMonoEx + iorb = excit(1,i) + jorb = excit(2,i) + lowest_super_ci_coef_mo(iorb,jorb) = SXeigenvec(i+1,1) + lowest_super_ci_coef_mo(jorb,iorb) = SXeigenvec(i+1,1) + enddo + + ! a_{it} of the equation B.2 of Roos et. al. Chemical Physics 48 (1980) 157-173 + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + do t = 1, n_act_orb + torb = list_act(t) + lowest_super_ci_coef_mo(torb,iorb) *= (2.d0 - occ_act(t))**(-0.5d0) + lowest_super_ci_coef_mo(iorb,torb) *= (2.d0 - occ_act(t))**(-0.5d0) + enddo + enddo + + ! a_{ia} of the equation B.2 of Roos et. al. Chemical Physics 48 (1980) 157-173 + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + do a = 1, n_virt_orb + aorb = list_virt(a) + lowest_super_ci_coef_mo(aorb,iorb) *= sqrt2 + lowest_super_ci_coef_mo(iorb,aorb) *= sqrt2 + enddo + enddo + + ! a_{ta} of the equation B.2 of Roos et. al. Chemical Physics 48 (1980) 157-173 + do a = 1, n_virt_orb + aorb = list_virt(a) + do t = 1, n_act_orb + torb = list_act(t) + lowest_super_ci_coef_mo(torb,aorb) *= occ_act(t)**(-0.5d0) + lowest_super_ci_coef_mo(aorb,torb) *= occ_act(t)**(-0.5d0) + enddo + enddo + + END_PROVIDER + diff --git a/src/casscf_cipsi/swap_orb.irp.f b/src/casscf_cipsi/swap_orb.irp.f new file mode 100644 index 00000000..49af207c --- /dev/null +++ b/src/casscf_cipsi/swap_orb.irp.f @@ -0,0 +1,132 @@ + BEGIN_PROVIDER [double precision, SXvector_lowest, (nMonoEx)] + implicit none + integer :: i + do i=2,nMonoEx+1 + SXvector_lowest(i-1)=SXeigenvec(i,1) + enddo + END_PROVIDER + + BEGIN_PROVIDER [double precision, thresh_overlap_switch] + implicit none + thresh_overlap_switch = 0.5d0 + END_PROVIDER + + BEGIN_PROVIDER [integer, max_overlap, (nMonoEx)] +&BEGIN_PROVIDER [integer, n_max_overlap] +&BEGIN_PROVIDER [integer, dim_n_max_overlap] + implicit none + double precision, allocatable :: vec_tmp(:) + integer, allocatable :: iorder(:) + allocate(vec_tmp(nMonoEx),iorder(nMonoEx)) + integer :: i + do i = 1, nMonoEx + iorder(i) = i + vec_tmp(i) = -dabs(SXvector_lowest(i)) + enddo + call dsort(vec_tmp,iorder,nMonoEx) + n_max_overlap = 0 + do i = 1, nMonoEx + if(dabs(vec_tmp(i)).gt.thresh_overlap_switch)then + n_max_overlap += 1 + max_overlap(n_max_overlap) = iorder(i) + endif + enddo + dim_n_max_overlap = max(1,n_max_overlap) + END_PROVIDER + + BEGIN_PROVIDER [integer, orb_swap, (2,dim_n_max_overlap)] +&BEGIN_PROVIDER [integer, index_orb_swap, (dim_n_max_overlap)] +&BEGIN_PROVIDER [integer, n_orb_swap ] + implicit none + use bitmasks ! you need to include the bitmasks_module.f90 features + integer :: i,imono,iorb,jorb,j + n_orb_swap = 0 + do i = 1, n_max_overlap + imono = max_overlap(i) + iorb = excit(1,imono) + jorb = excit(2,imono) + if (excit_class(imono) == "c-a" .and.hessmat(imono,imono).gt.0.d0)then ! core --> active rotation + n_orb_swap += 1 + orb_swap(1,n_orb_swap) = iorb ! core + orb_swap(2,n_orb_swap) = jorb ! active + index_orb_swap(n_orb_swap) = imono + else if (excit_class(imono) == "a-v" .and.hessmat(imono,imono).gt.0.d0)then ! active --> virtual rotation + n_orb_swap += 1 + orb_swap(1,n_orb_swap) = jorb ! virtual + orb_swap(2,n_orb_swap) = iorb ! active + index_orb_swap(n_orb_swap) = imono + endif + enddo + + integer,allocatable :: orb_swap_tmp(:,:) + allocate(orb_swap_tmp(2,dim_n_max_overlap)) + do i = 1, n_orb_swap + orb_swap_tmp(1,i) = orb_swap(1,i) + orb_swap_tmp(2,i) = orb_swap(2,i) + enddo + + integer(bit_kind), allocatable :: det_i(:),det_j(:) + allocate(det_i(N_int),det_j(N_int)) + logical, allocatable :: good_orb_rot(:) + allocate(good_orb_rot(n_orb_swap)) + integer, allocatable :: index_orb_swap_tmp(:) + allocate(index_orb_swap_tmp(dim_n_max_overlap)) + index_orb_swap_tmp = index_orb_swap + good_orb_rot = .True. + integer :: icount,k + do i = 1, n_orb_swap + if(.not.good_orb_rot(i))cycle + det_i = 0_bit_kind + call set_bit_to_integer(orb_swap(1,i),det_i,N_int) + call set_bit_to_integer(orb_swap(2,i),det_i,N_int) + do j = i+1, n_orb_swap + det_j = 0_bit_kind + call set_bit_to_integer(orb_swap(1,j),det_j,N_int) + call set_bit_to_integer(orb_swap(2,j),det_j,N_int) + icount = 0 + do k = 1, N_int + icount += popcnt(ior(det_i(k),det_j(k))) + enddo + if (icount.ne.4)then + good_orb_rot(i) = .False. + good_orb_rot(j) = .False. + exit + endif + enddo + enddo + icount = n_orb_swap + n_orb_swap = 0 + do i = 1, icount + if(good_orb_rot(i))then + n_orb_swap += 1 + index_orb_swap(n_orb_swap) = index_orb_swap_tmp(i) + orb_swap(1,n_orb_swap) = orb_swap_tmp(1,i) + orb_swap(2,n_orb_swap) = orb_swap_tmp(2,i) + endif + enddo + + if(n_orb_swap.gt.0)then + print*,'n_orb_swap = ',n_orb_swap + endif + do i = 1, n_orb_swap + print*,'imono = ',index_orb_swap(i) + print*,orb_swap(1,i),'-->',orb_swap(2,i) + enddo + END_PROVIDER + + BEGIN_PROVIDER [double precision, switch_mo_coef, (ao_num,mo_num)] + implicit none + integer :: i,j,iorb,jorb + switch_mo_coef = NatOrbsFCI + do i = 1, n_orb_swap + iorb = orb_swap(1,i) + jorb = orb_swap(2,i) + do j = 1, ao_num + switch_mo_coef(j,jorb) = NatOrbsFCI(j,iorb) + enddo + do j = 1, ao_num + switch_mo_coef(j,iorb) = NatOrbsFCI(j,jorb) + enddo + enddo + + END_PROVIDER diff --git a/src/casscf_cipsi/tot_en.irp.f b/src/casscf_cipsi/tot_en.irp.f new file mode 100644 index 00000000..1d70e087 --- /dev/null +++ b/src/casscf_cipsi/tot_en.irp.f @@ -0,0 +1,101 @@ + BEGIN_PROVIDER [real*8, etwo] +&BEGIN_PROVIDER [real*8, eone] +&BEGIN_PROVIDER [real*8, eone_bis] +&BEGIN_PROVIDER [real*8, etwo_bis] +&BEGIN_PROVIDER [real*8, etwo_ter] +&BEGIN_PROVIDER [real*8, ecore] +&BEGIN_PROVIDER [real*8, ecore_bis] + implicit none + integer :: t,u,v,x,i,ii,tt,uu,vv,xx,j,jj,t3,u3,v3,x3 + real*8 :: e_one_all,e_two_all + e_one_all=0.D0 + e_two_all=0.D0 + do i=1,n_core_inact_orb + ii=list_core_inact(i) + e_one_all+=2.D0*mo_one_e_integrals(ii,ii) + do j=1,n_core_inact_orb + jj=list_core_inact(j) + e_two_all+=2.D0*bielec_PQxx(ii,ii,j,j)-bielec_PQxx(ii,jj,j,i) + end do + do t=1,n_act_orb + tt=list_act(t) + t3=t+n_core_inact_orb + do u=1,n_act_orb + uu=list_act(u) + u3=u+n_core_inact_orb + e_two_all+=D0tu(t,u)*(2.D0*bielec_PQxx(tt,uu,i,i) & + -bielec_PQxx(tt,ii,i,u3)) + end do + end do + end do + do t=1,n_act_orb + tt=list_act(t) + do u=1,n_act_orb + uu=list_act(u) + e_one_all+=D0tu(t,u)*mo_one_e_integrals(tt,uu) + do v=1,n_act_orb + v3=v+n_core_inact_orb + do x=1,n_act_orb + x3=x+n_core_inact_orb + e_two_all +=P0tuvx(t,u,v,x)*bielec_PQxx(tt,uu,v3,x3) + end do + end do + end do + end do + ecore =nuclear_repulsion + ecore_bis=nuclear_repulsion + do i=1,n_core_inact_orb + ii=list_core_inact(i) + ecore +=2.D0*mo_one_e_integrals(ii,ii) + ecore_bis+=2.D0*mo_one_e_integrals(ii,ii) + do j=1,n_core_inact_orb + jj=list_core_inact(j) + ecore +=2.D0*bielec_PQxx(ii,ii,j,j)-bielec_PQxx(ii,jj,j,i) + ecore_bis+=2.D0*bielec_PxxQ(ii,i,j,jj)-bielec_PxxQ(ii,j,j,ii) + end do + end do + eone =0.D0 + eone_bis=0.D0 + etwo =0.D0 + etwo_bis=0.D0 + etwo_ter=0.D0 + do t=1,n_act_orb + tt=list_act(t) + t3=t+n_core_inact_orb + do u=1,n_act_orb + uu=list_act(u) + u3=u+n_core_inact_orb + eone +=D0tu(t,u)*mo_one_e_integrals(tt,uu) + eone_bis+=D0tu(t,u)*mo_one_e_integrals(tt,uu) + do i=1,n_core_inact_orb + ii=list_core_inact(i) + eone +=D0tu(t,u)*(2.D0*bielec_PQxx(tt,uu,i,i) & + -bielec_PQxx(tt,ii,i,u3)) + eone_bis+=D0tu(t,u)*(2.D0*bielec_PxxQ(tt,u3,i,ii) & + -bielec_PxxQ(tt,i,i,uu)) + end do + do v=1,n_act_orb + vv=list_act(v) + v3=v+n_core_inact_orb + do x=1,n_act_orb + xx=list_act(x) + x3=x+n_core_inact_orb + real*8 :: h1,h2,h3 + h1=bielec_PQxx(tt,uu,v3,x3) + h2=bielec_PxxQ(tt,u3,v3,xx) + h3=bielecCI(t,u,v,xx) + etwo +=P0tuvx(t,u,v,x)*h1 + etwo_bis+=P0tuvx(t,u,v,x)*h2 + etwo_ter+=P0tuvx(t,u,v,x)*h3 + if ((h1.ne.h2).or.(h1.ne.h3)) then + write(6,9901) t,u,v,x,h1,h2,h3 + 9901 format('aie: ',4I4,3E20.12) + end if + end do + end do + end do + end do + +END_PROVIDER + + From 1e0e06d9cd705553501af87b417e719760b918da Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 19 Jun 2023 15:26:14 +0200 Subject: [PATCH 182/337] fixed bug in bi_ort_ints/three_body_ijmk.irp.f: deallocate(tmp1) is wrong --- src/bi_ort_ints/three_body_ijmk.irp.f | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f index ee7e88ef..4a99fb1b 100644 --- a/src/bi_ort_ints/three_body_ijmk.irp.f +++ b/src/bi_ort_ints/three_body_ijmk.irp.f @@ -97,7 +97,7 @@ , tmp3, 3*n_points_final_grid, tmp1, 3*n_points_final_grid & , 0.d0, tmp_4d, mo_num*mo_num) - deallocate(tmp1) +! deallocate(tmp1) !$OMP PARALLEL DO PRIVATE(i,j,k,m) do i = 1, mo_num @@ -112,7 +112,7 @@ !$OMP END PARALLEL DO - +! allocate(tmp1(n_points_final_grid, 2, mo_num, mo_num)) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i, l, ipoint) & @@ -195,7 +195,7 @@ , tmp1, 3*n_points_final_grid, int2_grad1_u12_bimo_t, 3*n_points_final_grid & , 0.d0, tmp_4d, mo_num*mo_num) - deallocate(tmp1) +! deallocate(tmp1) !$OMP PARALLEL DO PRIVATE(i,j,k,m) do i = 1, mo_num From 2ef05d01c9033abccc5f8f7166b33418741b9420 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Mon, 19 Jun 2023 23:39:53 +0200 Subject: [PATCH 183/337] j1b_type 4/104 modif --- src/ao_many_one_e_ints/listj1b.irp.f | 14 +++--- src/non_h_ints_mu/j12_nucl_utils.irp.f | 8 ++-- src/non_h_ints_mu/jast_deriv.irp.f | 6 +-- src/tc_keywords/EZFIO.cfg | 6 +++ src/tc_keywords/j1b_pen.irp.f | 64 +++++++++++++++++++++----- 5 files changed, 73 insertions(+), 25 deletions(-) diff --git a/src/ao_many_one_e_ints/listj1b.irp.f b/src/ao_many_one_e_ints/listj1b.irp.f index 02963605..33ca8085 100644 --- a/src/ao_many_one_e_ints/listj1b.irp.f +++ b/src/ao_many_one_e_ints/listj1b.irp.f @@ -62,6 +62,7 @@ END_PROVIDER double precision :: tmp_cent_x, tmp_cent_y, tmp_cent_z provide j1b_pen + provide j1b_pen_coef List_all_comb_b2_coef = 0.d0 List_all_comb_b2_expo = 0.d0 @@ -127,8 +128,8 @@ END_PROVIDER List_all_comb_b2_expo( 1) = 0.d0 List_all_comb_b2_cent(1:3,1) = 0.d0 do i = 1, nucl_num - List_all_comb_b2_coef( i+1) = -1.d0 - List_all_comb_b2_expo( i+1) = j1b_pen( i) + List_all_comb_b2_coef( i+1) = -1.d0 * j1b_pen_coef(i) + List_all_comb_b2_expo( i+1) = j1b_pen(i) List_all_comb_b2_cent(1,i+1) = nucl_coord(i,1) List_all_comb_b2_cent(2,i+1) = nucl_coord(i,2) List_all_comb_b2_cent(3,i+1) = nucl_coord(i,3) @@ -225,6 +226,7 @@ END_PROVIDER double precision :: dx, dy, dz, r2 provide j1b_pen + provide j1b_pen_coef List_all_comb_b3_coef = 0.d0 List_all_comb_b3_expo = 0.d0 @@ -296,8 +298,8 @@ END_PROVIDER do i = 1, nucl_num ii = ii + 1 - List_all_comb_b3_coef( ii) = -2.d0 - List_all_comb_b3_expo( ii) = j1b_pen( i) + List_all_comb_b3_coef( ii) = -2.d0 * j1b_pen_coef(i) + List_all_comb_b3_expo( ii) = j1b_pen(i) List_all_comb_b3_cent(1,ii) = nucl_coord(i,1) List_all_comb_b3_cent(2,ii) = nucl_coord(i,2) List_all_comb_b3_cent(3,ii) = nucl_coord(i,3) @@ -305,7 +307,7 @@ END_PROVIDER do i = 1, nucl_num ii = ii + 1 - List_all_comb_b3_coef( ii) = 1.d0 + List_all_comb_b3_coef( ii) = 1.d0 * j1b_pen_coef(i) * j1b_pen_coef(i) List_all_comb_b3_expo( ii) = 2.d0 * j1b_pen(i) List_all_comb_b3_cent(1,ii) = nucl_coord(i,1) List_all_comb_b3_cent(2,ii) = nucl_coord(i,2) @@ -337,7 +339,7 @@ END_PROVIDER ii = ii + 1 ! x 2 to avoid doing integrals twice - List_all_comb_b3_coef( ii) = 2.d0 * dexp(-tmp1*tmp2*tmp4*r2) + List_all_comb_b3_coef( ii) = 2.d0 * dexp(-tmp1*tmp2*tmp4*r2) * j1b_pen_coef(i) * j1b_pen_coef(j) List_all_comb_b3_expo( ii) = tmp3 List_all_comb_b3_cent(1,ii) = tmp4 * (tmp1 * xi + tmp2 * xj) List_all_comb_b3_cent(2,ii) = tmp4 * (tmp1 * yi + tmp2 * yj) diff --git a/src/non_h_ints_mu/j12_nucl_utils.irp.f b/src/non_h_ints_mu/j12_nucl_utils.irp.f index 9b91a8ed..ac077fe0 100644 --- a/src/non_h_ints_mu/j12_nucl_utils.irp.f +++ b/src/non_h_ints_mu/j12_nucl_utils.irp.f @@ -35,7 +35,7 @@ BEGIN_PROVIDER [ double precision, v_1b, (n_points_final_grid)] elseif(j1b_type .eq. 4) then - ! v(r) = 1 - \sum_{a} \exp(-\alpha_a (r - r_a)^2) + ! v(r) = 1 - \sum_{a} \beta_a \exp(-\alpha_a (r - r_a)^2) do ipoint = 1, n_points_final_grid @@ -51,7 +51,7 @@ BEGIN_PROVIDER [ double precision, v_1b, (n_points_final_grid)] dz = z - nucl_coord(j,3) d = dx*dx + dy*dy + dz*dz - fact_r = fact_r - dexp(-a*d) + fact_r = fact_r - j1b_pen_coef(j) * dexp(-a*d) enddo v_1b(ipoint) = fact_r @@ -125,7 +125,7 @@ BEGIN_PROVIDER [double precision, v_1b_grad, (3, n_points_final_grid)] elseif(j1b_type .eq. 4) then - ! v(r) = 1 - \sum_{a} \exp(-\alpha_a (r - r_a)^2) + ! v(r) = 1 - \sum_{a} \beta_a \exp(-\alpha_a (r - r_a)^2) do ipoint = 1, n_points_final_grid @@ -144,7 +144,7 @@ BEGIN_PROVIDER [double precision, v_1b_grad, (3, n_points_final_grid)] r2 = dx*dx + dy*dy + dz*dz a = j1b_pen(j) - e = a * dexp(-a * r2) + e = a * j1b_pen_coef(j) * dexp(-a * r2) ax_der += e * dx ay_der += e * dy diff --git a/src/non_h_ints_mu/jast_deriv.irp.f b/src/non_h_ints_mu/jast_deriv.irp.f index 5e99600e..bd7ff6b7 100644 --- a/src/non_h_ints_mu/jast_deriv.irp.f +++ b/src/non_h_ints_mu/jast_deriv.irp.f @@ -296,7 +296,7 @@ double precision function j1b_nucl(r) d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) - j1b_nucl = j1b_nucl - dexp(-a*d) + j1b_nucl = j1b_nucl - j1b_pen_coef(i) * dexp(-a*d) enddo elseif((j1b_type .eq. 5) .or. (j1b_type .eq. 105)) then @@ -363,7 +363,7 @@ double precision function j1b_nucl_square(r) d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) - j1b_nucl_square = j1b_nucl_square - dexp(-a*d) + j1b_nucl_square = j1b_nucl_square - j1b_pen_coef(i) * dexp(-a*d) enddo j1b_nucl_square = j1b_nucl_square * j1b_nucl_square @@ -475,7 +475,7 @@ subroutine grad1_j1b_nucl(r, grad) y = r(2) - nucl_coord(i,2) z = r(3) - nucl_coord(i,3) d = x*x + y*y + z*z - e = a * dexp(-a*d) + e = a * j1b_pen_coef(i) * dexp(-a*d) fact_x += e * x fact_y += e * y diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index a69f5bac..ea1503c3 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -130,6 +130,12 @@ doc: exponents of the 1-body Jastrow interface: ezfio size: (nuclei.nucl_num) +[j1b_pen_coef] +type: double precision +doc: coefficients of the 1-body Jastrow +interface: ezfio +size: (nuclei.nucl_num) + [j1b_coeff] type: double precision doc: coeff of the 1-body Jastrow diff --git a/src/tc_keywords/j1b_pen.irp.f b/src/tc_keywords/j1b_pen.irp.f index 57250b52..3f1eb8ac 100644 --- a/src/tc_keywords/j1b_pen.irp.f +++ b/src/tc_keywords/j1b_pen.irp.f @@ -1,17 +1,22 @@ ! --- -BEGIN_PROVIDER [ double precision, j1b_pen, (nucl_num) ] + BEGIN_PROVIDER [ double precision, j1b_pen , (nucl_num) ] +&BEGIN_PROVIDER [ double precision, j1b_pen_coef, (nucl_num) ] BEGIN_DOC - ! exponents of the 1-body Jastrow + ! parameters of the 1-body Jastrow END_DOC implicit none logical :: exists + integer :: i + integer :: ierr PROVIDE ezfio_filename + ! --- + if (mpi_master) then call ezfio_has_tc_keywords_j1b_pen(exists) endif @@ -23,7 +28,6 @@ BEGIN_PROVIDER [ double precision, j1b_pen, (nucl_num) ] IRP_IF MPI include 'mpif.h' - integer :: ierr call MPI_BCAST(j1b_pen, (nucl_num), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) if (ierr /= MPI_SUCCESS) then stop 'Unable to read j1b_pen with MPI' @@ -31,7 +35,6 @@ BEGIN_PROVIDER [ double precision, j1b_pen, (nucl_num) ] IRP_ENDIF if (exists) then - if (mpi_master) then write(6,'(A)') '.. >>>>> [ IO READ: j1b_pen ] <<<<< ..' call ezfio_get_tc_keywords_j1b_pen(j1b_pen) @@ -42,19 +45,55 @@ BEGIN_PROVIDER [ double precision, j1b_pen, (nucl_num) ] endif IRP_ENDIF endif - else - - integer :: i do i = 1, nucl_num j1b_pen(i) = 1d5 enddo - endif - print*,'parameters for nuclei jastrow' - do i = 1, nucl_num - print*,'i,Z,j1b_pen(i)',i,nucl_charge(i),j1b_pen(i) - enddo + + ! --- + + if (mpi_master) then + call ezfio_has_tc_keywords_j1b_pen_coef(exists) + endif + + IRP_IF MPI_DEBUG + print *, irp_here, mpi_rank + call MPI_BARRIER(MPI_COMM_WORLD, ierr) + IRP_ENDIF + + IRP_IF MPI + include 'mpif.h' + call MPI_BCAST(j1b_pen_coef, (nucl_num), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + stop 'Unable to read j1b_pen_coef with MPI' + endif + IRP_ENDIF + + if (exists) then + if (mpi_master) then + write(6,'(A)') '.. >>>>> [ IO READ: j1b_pen_coef ] <<<<< ..' + call ezfio_get_tc_keywords_j1b_pen_coef(j1b_pen_coef) + IRP_IF MPI + call MPI_BCAST(j1b_pen_coef, (nucl_num), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + if (ierr /= MPI_SUCCESS) then + stop 'Unable to read j1b_pen_coef with MPI' + endif + IRP_ENDIF + endif + else + do i = 1, nucl_num + j1b_pen_coef(i) = 1d0 + enddo + endif + + ! --- + + print *, ' parameters for nuclei jastrow' + print *, ' i, Z, j1b_pen, j1b_pen_coef' + do i = 1, nucl_num + print *, i, nucl_charge(i), j1b_pen(i), j1b_pen_coef(i) + enddo END_PROVIDER @@ -114,3 +153,4 @@ BEGIN_PROVIDER [ double precision, j1b_coeff, (nucl_num) ] END_PROVIDER ! --- + From 8c2f6c9485d995f82b5fbfa37e58baa77cef8b6c Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Mon, 19 Jun 2023 23:57:39 +0200 Subject: [PATCH 184/337] minor modif --- src/tc_keywords/j1b_pen.irp.f | 1 - 1 file changed, 1 deletion(-) diff --git a/src/tc_keywords/j1b_pen.irp.f b/src/tc_keywords/j1b_pen.irp.f index 3f1eb8ac..ebcd5107 100644 --- a/src/tc_keywords/j1b_pen.irp.f +++ b/src/tc_keywords/j1b_pen.irp.f @@ -63,7 +63,6 @@ IRP_ENDIF IRP_IF MPI - include 'mpif.h' call MPI_BCAST(j1b_pen_coef, (nucl_num), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) if (ierr /= MPI_SUCCESS) then stop 'Unable to read j1b_pen_coef with MPI' From 466741259e706f90d394bd93398e938ba0691686 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Wed, 21 Jun 2023 10:44:37 +0200 Subject: [PATCH 185/337] minor modif in non-sym Dav --- .../dav_diag_dressed_ext_rout_nonsym_B1space.irp.f | 2 +- src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f | 2 +- src/tc_bi_ortho/dav_h_tc_s2.irp.f | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/dav_general_mat/dav_diag_dressed_ext_rout_nonsym_B1space.irp.f b/src/dav_general_mat/dav_diag_dressed_ext_rout_nonsym_B1space.irp.f index 670b2395..1a8269f4 100644 --- a/src/dav_general_mat/dav_diag_dressed_ext_rout_nonsym_B1space.irp.f +++ b/src/dav_general_mat/dav_diag_dressed_ext_rout_nonsym_B1space.irp.f @@ -343,7 +343,7 @@ subroutine davidson_general_diag_dressed_ext_rout_nonsym_b1space(u_in, H_jj, Dre if(lambda_tmp .lt. 0.7d0) then print *, ' very small overlap ...', l, i_omax(l) print *, ' max overlap = ', lambda_tmp - stop + !stop endif if(i_omax(l) .ne. l) then diff --git a/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f b/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f index 1bed60fe..4b7b9cc9 100644 --- a/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f +++ b/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f @@ -342,7 +342,7 @@ subroutine davidson_general_ext_rout_nonsym_b1space(u_in, H_jj, energies, sze, N if(lambda_tmp .lt. 0.7d0) then print *, ' very small overlap ...', l, i_omax(l) print *, ' max overlap = ', lambda_tmp - stop + !stop endif if(i_omax(l) .ne. l) then diff --git a/src/tc_bi_ortho/dav_h_tc_s2.irp.f b/src/tc_bi_ortho/dav_h_tc_s2.irp.f index 3e89bbe2..19df847a 100644 --- a/src/tc_bi_ortho/dav_h_tc_s2.irp.f +++ b/src/tc_bi_ortho/dav_h_tc_s2.irp.f @@ -425,7 +425,7 @@ subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N if(lambda_tmp .lt. 0.7d0) then print *, ' very small overlap ...', l, i_omax(l) print *, ' max overlap = ', lambda_tmp - stop + !stop endif if(i_omax(l) .ne. l) then From c3d257c7aceff4c8641fc751cf9996783c1ce96f Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 22 Jun 2023 11:58:32 +0200 Subject: [PATCH 186/337] added routines to rotate orbitals without touching core orbitals --- src/tc_bi_ortho/tc_natorb.irp.f | 13 +- src/tc_keywords/EZFIO.cfg | 6 + src/tc_scf/routines_rotates.irp.f | 16 +- src/utils/block_diag_degen_core.irp.f | 244 ++++++++++++++++++++++++++ 4 files changed, 271 insertions(+), 8 deletions(-) create mode 100644 src/utils/block_diag_degen_core.irp.f diff --git a/src/tc_bi_ortho/tc_natorb.irp.f b/src/tc_bi_ortho/tc_natorb.irp.f index b7e5ae81..1b5a66f3 100644 --- a/src/tc_bi_ortho/tc_natorb.irp.f +++ b/src/tc_bi_ortho/tc_natorb.irp.f @@ -23,7 +23,7 @@ dm_tmp(1:mo_num,1:mo_num) = -tc_transition_matrix_mo(1:mo_num,1:mo_num,1,1) - print *, ' dm_tmp' + print *, ' Transition density matrix ' do i = 1, mo_num fock_diag(i) = fock_matrix_tc_mo_tot(i,i) write(*, '(100(F16.10,X))') -dm_tmp(:,i) @@ -32,8 +32,15 @@ thr_d = 1.d-6 thr_nd = 1.d-6 thr_deg = 1.d-3 - call diag_mat_per_fock_degen( fock_diag, dm_tmp, mo_num, thr_d, thr_nd, thr_deg & - , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) + if(n_core_orb.ne.0)then +! print*,'core orbitals' +! pause + call diag_mat_per_fock_degen_core( fock_diag, dm_tmp, list_core, n_core_orb, mo_num, thr_d, thr_nd, thr_deg & + , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) + else + call diag_mat_per_fock_degen( fock_diag, dm_tmp, mo_num, thr_d, thr_nd, thr_deg & + , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) + endif ! call non_hrmt_bieig( mo_num, dm_tmp& ! , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo& ! , mo_num, natorb_tc_eigval ) diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index a69f5bac..f984d53a 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -208,6 +208,12 @@ doc: Threshold to determine if diagonal elements of the bi-orthogonal condition interface: ezfio,provider,ocaml default: 1.e-6 +[thresh_lr_angle] +type: double precision +doc: Maximum value of the angle between the couple of left and right orbital for the rotations +interface: ezfio,provider,ocaml +default: 20.0 + [thresh_biorthog_nondiag] type: Threshold doc: Threshold to determine if non-diagonal elements of L.T x R are close enouph to 0 diff --git a/src/tc_scf/routines_rotates.irp.f b/src/tc_scf/routines_rotates.irp.f index 755c35b9..588382b5 100644 --- a/src/tc_scf/routines_rotates.irp.f +++ b/src/tc_scf/routines_rotates.irp.f @@ -140,7 +140,11 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) ! compute the overlap between the left and rescaled right call build_s_matrix(ao_num, mo_num, mo_r_coef_new, mo_r_coef_new, ao_overlap, s_mat) ! call give_degen(fock_diag,mo_num,thr_deg,list_degen,n_degen_list) - call give_degen_full_list(fock_diag, mo_num, thr_deg, list_degen, n_degen_list) + if(n_core_orb.ne.0)then + call give_degen_full_listcore(fock_diag, mo_num, list_core, n_core_orb, thr_deg, list_degen, n_degen_list) + else + call give_degen_full_list(fock_diag, mo_num, thr_deg, list_degen, n_degen_list) + endif print *, ' fock_matrix_mo' do i = 1, mo_num print *, i, fock_diag(i), angle_left_right(i) @@ -152,6 +156,8 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) ! n_degen = ilast - ifirst +1 n_degen = list_degen(i,0) + if(n_degen .ge. 1000)n_degen = 1 ! convention for core orbitals + if(n_degen .eq. 1) cycle allocate(stmp(n_degen,n_degen), smat2(n_degen,n_degen)) @@ -279,7 +285,7 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) allocate(new_angles(mo_num)) new_angles(1:mo_num) = dabs(angle_left_right(1:mo_num)) max_angle = maxval(new_angles) - good_angles = max_angle.lt.45.d0 + good_angles = max_angle.lt.thresh_lr_angle print *, ' max_angle = ', max_angle deallocate(new_angles) @@ -397,11 +403,11 @@ subroutine print_energy_and_mos(good_angles) print *, ' TC SCF energy gradient = ', grad_non_hermit print *, ' Max angle Left/right = ', max_angle_left_right - if(max_angle_left_right .lt. 45.d0) then + if(max_angle_left_right .lt. thresh_lr_angle) then print *, ' Maximum angle BELOW 45 degrees, everthing is OK !' good_angles = .true. - else if(max_angle_left_right .gt. 45.d0 .and. max_angle_left_right .lt. 75.d0) then - print *, ' Maximum angle between 45 and 75 degrees, this is not the best for TC-CI calculations ...' + else if(max_angle_left_right .gt. thresh_lr_angle .and. max_angle_left_right .lt. 75.d0) then + print *, ' Maximum angle between thresh_lr_angle and 75 degrees, this is not the best for TC-CI calculations ...' good_angles = .false. else if(max_angle_left_right .gt. 75.d0) then print *, ' Maximum angle between ABOVE 75 degrees, YOU WILL CERTAINLY FIND TROUBLES IN TC-CI calculations ...' diff --git a/src/utils/block_diag_degen_core.irp.f b/src/utils/block_diag_degen_core.irp.f new file mode 100644 index 00000000..5d46bd87 --- /dev/null +++ b/src/utils/block_diag_degen_core.irp.f @@ -0,0 +1,244 @@ + +subroutine diag_mat_per_fock_degen_core(fock_diag, mat_ref, listcore,ncore, n, thr_d, thr_nd, thr_deg, leigvec, reigvec, eigval) + + + BEGIN_DOC + ! + ! subroutine that diagonalizes a matrix mat_ref BY BLOCK + ! + ! the blocks are defined by the elements having the SAME DEGENERACIES in the entries "fock_diag" + ! + ! the elements of listcore are untouched + ! + ! examples : all elements having degeneracy 1 in fock_diag (i.e. not being degenerated) will be treated together + ! + ! : all elements having degeneracy 2 in fock_diag (i.e. two elements are equal) will be treated together + ! + ! : all elements having degeneracy 3 in fock_diag (i.e. two elements are equal) will be treated together + ! + ! etc... the advantage is to guarentee no spurious mixing because of numerical problems. + ! + END_DOC + + implicit none + integer, intent(in) :: n,ncore, listcore(ncore) + double precision, intent(in) :: fock_diag(n), mat_ref(n,n), thr_d, thr_nd, thr_deg + double precision, intent(out) :: leigvec(n,n), reigvec(n,n), eigval(n) + + integer :: n_degen_list, n_degen,size_mat, i, j, k, icount, m, index_degen + integer :: ii, jj, i_good, j_good, n_real + integer :: icount_eigval + logical, allocatable :: is_ok(:) + integer, allocatable :: list_degen(:,:), list_same_degen(:) + integer, allocatable :: iorder(:), list_degen_sorted(:) + double precision, allocatable :: leigvec_unsrtd(:,:), reigvec_unsrtd(:,:), eigval_unsrtd(:) + double precision, allocatable :: mat_tmp(:,:), eigval_tmp(:), leigvec_tmp(:,:), reigvec_tmp(:,:) + + allocate(leigvec_unsrtd(n,n), reigvec_unsrtd(n,n), eigval_unsrtd(n)) + leigvec_unsrtd = 0.d0 + reigvec_unsrtd = 0.d0 + eigval_unsrtd = 0.d0 + + ! obtain degeneracies + allocate(list_degen(n,0:n)) + call give_degen_full_listcore(fock_diag, n, listcore, ncore, thr_deg, list_degen, n_degen_list) + + allocate(iorder(n_degen_list), list_degen_sorted(n_degen_list)) + do i = 1, n_degen_list + n_degen = list_degen(i,0) + list_degen_sorted(i) = n_degen + iorder(i) = i + enddo + + ! sort by number of degeneracies + call isort(list_degen_sorted, iorder, n_degen_list) + + allocate(is_ok(n_degen_list)) + is_ok = .True. + icount_eigval = 0 + + ! loop over degeneracies + do i = 1, n_degen_list + if(.not.is_ok(i)) cycle + + is_ok(i) = .False. + n_degen = list_degen_sorted(i) + + + if(n_degen.ge.1000)then + print*,'core orbital ' + else + print *, ' diagonalizing for n_degen = ', n_degen + endif + + k = 1 + + ! group all the entries having the same degeneracies +!! do while (list_degen_sorted(i+k)==n_degen) + do m = i+1, n_degen_list + if(list_degen_sorted(m)==n_degen) then + is_ok(i+k) = .False. + k += 1 + endif + enddo + + print *, ' number of identical degeneracies = ', k + if(n_degen.ge.1000)then + n_degen = 1 + endif + size_mat = k*n_degen + print *, ' size_mat = ', size_mat + allocate(mat_tmp(size_mat,size_mat), list_same_degen(size_mat)) + allocate(eigval_tmp(size_mat), leigvec_tmp(size_mat,size_mat), reigvec_tmp(size_mat,size_mat)) + ! group all the elements sharing the same degeneracy + icount = 0 + do j = 1, k ! jth set of degeneracy + index_degen = iorder(i+j-1) + do m = 1, n_degen + icount += 1 + list_same_degen(icount) = list_degen(index_degen,m) + enddo + enddo + + print *, ' list of elements ' + do icount = 1, size_mat + print *, icount, list_same_degen(icount) + enddo + + ! you copy subset of matrix elements having all the same degeneracy in mat_tmp + do ii = 1, size_mat + i_good = list_same_degen(ii) + do jj = 1, size_mat + j_good = list_same_degen(jj) + mat_tmp(jj,ii) = mat_ref(j_good,i_good) + enddo + enddo + + call non_hrmt_bieig( size_mat, mat_tmp, thr_d, thr_nd & + , leigvec_tmp, reigvec_tmp & + , n_real, eigval_tmp ) + + do ii = 1, size_mat + icount_eigval += 1 + eigval_unsrtd(icount_eigval) = eigval_tmp(ii) ! copy eigenvalues + do jj = 1, size_mat ! copy the eigenvectors + j_good = list_same_degen(jj) + leigvec_unsrtd(j_good,icount_eigval) = leigvec_tmp(jj,ii) + reigvec_unsrtd(j_good,icount_eigval) = reigvec_tmp(jj,ii) + enddo + enddo + + deallocate(mat_tmp, list_same_degen) + deallocate(eigval_tmp, leigvec_tmp, reigvec_tmp) + enddo + + if(icount_eigval .ne. n) then + print *, ' pb !! (icount_eigval.ne.n)' + print *, ' icount_eigval,n', icount_eigval, n + stop + endif + + deallocate(iorder) + allocate(iorder(n)) + do i = 1, n + iorder(i) = i + enddo + call dsort(eigval_unsrtd, iorder, n) + + do i = 1, n + print*,'sorted eigenvalues ' + i_good = iorder(i) + eigval(i) = eigval_unsrtd(i) + print*,'i,eigval(i) = ',i,eigval(i) + do j = 1, n + leigvec(j,i) = leigvec_unsrtd(j,i_good) + reigvec(j,i) = reigvec_unsrtd(j,i_good) + enddo + enddo + + deallocate(leigvec_unsrtd, reigvec_unsrtd, eigval_unsrtd) + deallocate(list_degen) + deallocate(iorder, list_degen_sorted) + deallocate(is_ok) + +end + +! --- + +subroutine give_degen_full_listcore(A, n, listcore, ncore, thr, list_degen, n_degen_list) + + BEGIN_DOC + ! you enter with an array A(n) and spits out all the elements degenerated up to thr + ! + ! the elements of A(n) DON'T HAVE TO BE SORTED IN THE ENTRANCE: TOTALLY GENERAL + ! + ! list_degen(i,0) = number of degenerate entries + ! + ! list_degen(i,1) = index of the first degenerate entry + ! + ! list_degen(i,2:list_degen(i,0)) = list of all other dengenerate entries + ! + ! if list_degen(i,0) == 1 it means that there is no degeneracy for that element + ! + ! if list_degen(i,0) >= 1000 it means that it is core orbitals + END_DOC + + implicit none + + double precision, intent(in) :: A(n) + double precision, intent(in) :: thr + integer, intent(in) :: n,ncore, listcore(ncore) + integer, intent(out) :: list_degen(n,0:n), n_degen_list + integer :: i, j, icount, icheck,k + logical, allocatable :: is_ok(:) + + + allocate(is_ok(n)) + n_degen_list = 0 + is_ok = .True. + ! you first exclude the "core" orbitals + do i = 1, ncore + j=listcore(i) + is_ok(j) = .False. + enddo + do i = 1, n + if(.not.is_ok(i)) cycle + n_degen_list +=1 + is_ok(i) = .False. + list_degen(n_degen_list,1) = i + icount = 1 + do j = i+1, n + if(dabs(A(i)-A(j)).lt.thr.and.is_ok(j)) then + is_ok(j) = .False. + icount += 1 + list_degen(n_degen_list,icount) = j + endif + enddo + + list_degen(n_degen_list,0) = icount + enddo + ! you set all the core orbitals as separate entities + icheck = 0 + do i = 1, n_degen_list + icheck += list_degen(i,0) + enddo + if(icheck.ne.(n-ncore))then + print *, ' pb ! :: icheck.ne.n-ncore' + print *, icheck, n-ncore + stop + endif + k=1000 + do i = 1, ncore + n_degen_list+= 1 + j=listcore(i) + list_degen(n_degen_list,1) = i + list_degen(n_degen_list,0) = k + k+=1 + enddo + + + +end + +! --- + From 6881a65994fe04eebcfedde0871e34d7737b9b8c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 22 Jun 2023 13:34:36 +0200 Subject: [PATCH 187/337] Fix possible float_of_string: 0.160099927795302-102 error --- src/utils/linear_algebra.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index 76a539a6..65c57a76 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1565,7 +1565,7 @@ subroutine nullify_small_elements(m,n,A,LDA,thresh) ! Remove tiny elements do j=1,n do i=1,m - if ( dabs(A(i,j) * amax) < thresh ) then + if ( (dabs(A(i,j) * amax) < thresh).or.(dabs(A(i,j)) < 1.d-99) ) then A(i,j) = 0.d0 endif enddo From 5a5071f248ca55516f37b9f09f01f279c7c6faea Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Thu, 22 Jun 2023 18:26:52 +0200 Subject: [PATCH 188/337] fixed bug in nucl_aos --- src/ao_basis/aos_in_r.irp.f | 84 ++++--- src/ao_basis/aos_transp.irp.f | 38 +-- src/bi_ort_ints/one_e_bi_ort.irp.f | 2 +- src/bi_ortho_mos/mos_rl.irp.f | 2 + src/dft_utils_in_r/ao_in_r.irp.f | 81 ++++--- src/non_h_ints_mu/total_tc_int.irp.f | 3 + src/tc_bi_ortho/tc_utils.irp.f | 13 +- src/tc_scf/fock_three_hermit.irp.f | 52 +++-- src/tc_scf/molden_lr_mos.irp.f | 332 ++++++++++++++++++++++++++- 9 files changed, 490 insertions(+), 117 deletions(-) diff --git a/src/ao_basis/aos_in_r.irp.f b/src/ao_basis/aos_in_r.irp.f index 7fcb980a..1b1595a3 100644 --- a/src/ao_basis/aos_in_r.irp.f +++ b/src/ao_basis/aos_in_r.irp.f @@ -65,46 +65,60 @@ double precision function primitive_value(i,j,r) end +! --- -subroutine give_all_aos_at_r(r,aos_array) - implicit none - BEGIN_dOC -! input : r == r(1) = x and so on -! -! output : aos_array(i) = aos(i) evaluated in $\textbf{r}$ - END_DOC - double precision, intent(in) :: r(3) - double precision, intent(out):: aos_array(ao_num) +subroutine give_all_aos_at_r(r, tmp_array) - integer :: power_ao(3) - integer :: i,j,k,l,m - double precision :: dx,dy,dz,r2 - double precision :: dx2,dy2,dz2 - double precision :: center_ao(3) - double precision :: beta - do i = 1, nucl_num - center_ao(1:3) = nucl_coord(i,1:3) - dx = (r(1) - center_ao(1)) - dy = (r(2) - center_ao(2)) - dz = (r(3) - center_ao(3)) - r2 = dx*dx + dy*dy + dz*dz - do j = 1,Nucl_N_Aos(i) - k = Nucl_Aos_transposed(j,i) ! index of the ao in the ordered format - aos_array(k) = 0.d0 - power_ao(1:3)= ao_power_ordered_transp_per_nucl(1:3,j,i) - dx2 = dx**power_ao(1) - dy2 = dy**power_ao(2) - dz2 = dz**power_ao(3) - do l = 1,ao_prim_num(k) - beta = ao_expo_ordered_transp_per_nucl(l,j,i) - if(dabs(beta*r2).gt.40.d0)cycle - aos_array(k)+= ao_coef_normalized_ordered_transp_per_nucl(l,j,i) * dexp(-beta*r2) - enddo - aos_array(k) = aos_array(k) * dx2 * dy2 * dz2 + BEGIN_dOC + ! + ! input : r == r(1) = x and so on + ! + ! output : tmp_array(i) = aos(i) evaluated in $\textbf{r}$ + ! + END_DOC + + implicit none + double precision, intent(in) :: r(3) + double precision, intent(out) :: tmp_array(ao_num) + integer :: p_ao(3) + integer :: i, j, k, l, m + double precision :: dx, dy, dz, r2 + double precision :: dx2, dy2, dz2 + double precision :: c_ao(3) + double precision :: beta + + do i = 1, nucl_num + + c_ao(1:3) = nucl_coord(i,1:3) + dx = r(1) - c_ao(1) + dy = r(2) - c_ao(2) + dz = r(3) - c_ao(3) + r2 = dx*dx + dy*dy + dz*dz + + do j = 1, Nucl_N_Aos(i) + + k = Nucl_Aos_transposed(j,i) ! index of the ao in the ordered format + p_ao(1:3) = ao_power_ordered_transp_per_nucl(1:3,j,i) + dx2 = dx**p_ao(1) + dy2 = dy**p_ao(2) + dz2 = dz**p_ao(3) + + tmp_array(k) = 0.d0 + do l = 1,ao_prim_num(k) + beta = ao_expo_ordered_transp_per_nucl(l,j,i) + if(dabs(beta*r2).gt.40.d0) cycle + + tmp_array(k) += ao_coef_normalized_ordered_transp_per_nucl(l,j,i) * dexp(-beta*r2) + enddo + + tmp_array(k) = tmp_array(k) * dx2 * dy2 * dz2 + enddo enddo - enddo + + return end +! --- subroutine give_all_aos_and_grad_at_r(r,aos_array,aos_grad_array) implicit none diff --git a/src/ao_basis/aos_transp.irp.f b/src/ao_basis/aos_transp.irp.f index ae6193bf..4e44a9f6 100644 --- a/src/ao_basis/aos_transp.irp.f +++ b/src/ao_basis/aos_transp.irp.f @@ -1,20 +1,28 @@ - BEGIN_PROVIDER [ integer, Nucl_Aos_transposed, (N_AOs_max,nucl_num)] - implicit none - BEGIN_DOC - ! List of AOs attached on each atom - END_DOC - integer :: i - integer, allocatable :: nucl_tmp(:) - allocate(nucl_tmp(nucl_num)) - nucl_tmp = 0 - Nucl_Aos = 0 - do i = 1, ao_num - nucl_tmp(ao_nucl(i))+=1 - Nucl_Aos_transposed(nucl_tmp(ao_nucl(i)),ao_nucl(i)) = i - enddo - deallocate(nucl_tmp) + +! --- + +BEGIN_PROVIDER [ integer, Nucl_Aos_transposed, (N_AOs_max,nucl_num)] + + BEGIN_DOC + ! List of AOs attached on each atom + END_DOC + + implicit none + integer :: i + integer, allocatable :: nucl_tmp(:) + + allocate(nucl_tmp(nucl_num)) + nucl_tmp = 0 + do i = 1, ao_num + nucl_tmp(ao_nucl(i)) += 1 + Nucl_Aos_transposed(nucl_tmp(ao_nucl(i)),ao_nucl(i)) = i + enddo + deallocate(nucl_tmp) + END_PROVIDER +! --- + BEGIN_PROVIDER [double precision, ao_expo_ordered_transp_per_nucl, (ao_prim_num_max,N_AOs_max,nucl_num) ] implicit none integer :: i,j,k,l diff --git a/src/bi_ort_ints/one_e_bi_ort.irp.f b/src/bi_ort_ints/one_e_bi_ort.irp.f index 5f2795f1..49181182 100644 --- a/src/bi_ort_ints/one_e_bi_ort.irp.f +++ b/src/bi_ort_ints/one_e_bi_ort.irp.f @@ -6,7 +6,7 @@ BEGIN_PROVIDER [double precision, ao_one_e_integrals_tc_tot, (ao_num,ao_num)] implicit none integer :: i, j - ao_one_e_integrals_tc_tot = ao_one_e_integrals + ao_one_e_integrals_tc_tot = ao_one_e_integrals !provide j1b_type diff --git a/src/bi_ortho_mos/mos_rl.irp.f b/src/bi_ortho_mos/mos_rl.irp.f index c69309d1..13eedfb7 100644 --- a/src/bi_ortho_mos/mos_rl.irp.f +++ b/src/bi_ortho_mos/mos_rl.irp.f @@ -136,6 +136,7 @@ BEGIN_PROVIDER [ double precision, mo_r_coef, (ao_num, mo_num) ] mo_r_coef(j,i) = mo_coef(j,i) enddo enddo + call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) endif END_PROVIDER @@ -191,6 +192,7 @@ BEGIN_PROVIDER [ double precision, mo_l_coef, (ao_num, mo_num) ] mo_l_coef(j,i) = mo_coef(j,i) enddo enddo + call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) endif END_PROVIDER diff --git a/src/dft_utils_in_r/ao_in_r.irp.f b/src/dft_utils_in_r/ao_in_r.irp.f index b8beea76..16414f39 100644 --- a/src/dft_utils_in_r/ao_in_r.irp.f +++ b/src/dft_utils_in_r/ao_in_r.irp.f @@ -1,53 +1,64 @@ - BEGIN_PROVIDER[double precision, aos_in_r_array, (ao_num,n_points_final_grid)] - implicit none - BEGIN_DOC - ! aos_in_r_array(i,j) = value of the ith ao on the jth grid point - END_DOC - integer :: i,j - double precision :: aos_array(ao_num), r(3) - !$OMP PARALLEL DO & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,r,aos_array,j) & - !$OMP SHARED(aos_in_r_array,n_points_final_grid,ao_num,final_grid_points) - do i = 1, n_points_final_grid - r(1) = final_grid_points(1,i) - r(2) = final_grid_points(2,i) - r(3) = final_grid_points(3,i) - call give_all_aos_at_r(r,aos_array) - do j = 1, ao_num - aos_in_r_array(j,i) = aos_array(j) +! --- + +BEGIN_PROVIDER[double precision, aos_in_r_array, (ao_num,n_points_final_grid)] + + BEGIN_DOC + ! aos_in_r_array(i,j) = value of the ith ao on the jth grid point + END_DOC + + implicit none + integer :: i, j + double precision :: tmp_array(ao_num), r(3) + + !$OMP PARALLEL DO & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,r,tmp_array,j) & + !$OMP SHARED(aos_in_r_array,n_points_final_grid,ao_num,final_grid_points) + do i = 1, n_points_final_grid + r(1) = final_grid_points(1,i) + r(2) = final_grid_points(2,i) + r(3) = final_grid_points(3,i) + call give_all_aos_at_r(r, tmp_array) + do j = 1, ao_num + aos_in_r_array(j,i) = tmp_array(j) + enddo enddo - enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO - END_PROVIDER +END_PROVIDER +! --- - BEGIN_PROVIDER[double precision, aos_in_r_array_transp, (n_points_final_grid,ao_num)] - implicit none - BEGIN_DOC - ! aos_in_r_array_transp(i,j) = value of the jth ao on the ith grid point - END_DOC - integer :: i,j - double precision :: aos_array(ao_num), r(3) - do i = 1, n_points_final_grid - do j = 1, ao_num - aos_in_r_array_transp(i,j) = aos_in_r_array(j,i) +BEGIN_PROVIDER[double precision, aos_in_r_array_transp, (n_points_final_grid,ao_num)] + + BEGIN_DOC + ! aos_in_r_array_transp(i,j) = value of the jth ao on the ith grid point + END_DOC + + implicit none + integer :: i, j + double precision :: aos_array(ao_num), r(3) + + do i = 1, n_points_final_grid + do j = 1, ao_num + aos_in_r_array_transp(i,j) = aos_in_r_array(j,i) + enddo enddo - enddo - END_PROVIDER +END_PROVIDER +! --- +BEGIN_PROVIDER[double precision, aos_grad_in_r_array, (ao_num,n_points_final_grid,3)] - BEGIN_PROVIDER[double precision, aos_grad_in_r_array, (ao_num,n_points_final_grid,3)] - implicit none BEGIN_DOC ! aos_grad_in_r_array(i,j,k) = value of the kth component of the gradient of ith ao on the jth grid point ! ! k = 1 : x, k= 2, y, k 3, z END_DOC + + implicit none integer :: i,j,m double precision :: aos_array(ao_num), r(3) double precision :: aos_grad_array(3,ao_num) diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f index afa10305..2bdf39f0 100644 --- a/src/non_h_ints_mu/total_tc_int.irp.f +++ b/src/non_h_ints_mu/total_tc_int.irp.f @@ -1,4 +1,7 @@ +! TODO +! remove ao_two_e_coul and use map directly + ! --- BEGIN_PROVIDER [double precision, ao_vartc_int_chemist, (ao_num, ao_num, ao_num, ao_num)] diff --git a/src/tc_bi_ortho/tc_utils.irp.f b/src/tc_bi_ortho/tc_utils.irp.f index 9023e2f0..53fe5884 100644 --- a/src/tc_bi_ortho/tc_utils.irp.f +++ b/src/tc_bi_ortho/tc_utils.irp.f @@ -5,27 +5,34 @@ subroutine write_tc_energy() integer :: i, j, k double precision :: hmono, htwoe, hthree, htot double precision :: E_TC, O_TC + double precision :: E_1e, E_2e, E_3e do k = 1, n_states E_TC = 0.d0 + E_1e = 0.d0 + E_2e = 0.d0 + E_3e = 0.d0 do i = 1, N_det do j = 1, N_det - !htot = htilde_matrix_elmt_bi_ortho(i,j) call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) E_TC = E_TC + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(j,k) * htot - !E_TC = E_TC + leigvec_tc_bi_orth(i,k) * reigvec_tc_bi_orth(j,k) * htot + E_1e = E_1e + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(j,k) * hmono + E_2e = E_2e + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(j,k) * htwoe + E_3e = E_3e + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(j,k) * hthree enddo enddo O_TC = 0.d0 do i = 1, N_det - !O_TC = O_TC + leigvec_tc_bi_orth(i,k) * reigvec_tc_bi_orth(i,k) O_TC = O_TC + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(i,k) enddo print *, ' state :', k print *, " E_TC = ", E_TC / O_TC + print *, " E_1e = ", E_1e / O_TC + print *, " E_2e = ", E_2e / O_TC + print *, " E_3e = ", E_3e / O_TC print *, " O_TC = ", O_TC enddo diff --git a/src/tc_scf/fock_three_hermit.irp.f b/src/tc_scf/fock_three_hermit.irp.f index fe8fbfd7..89e6f620 100644 --- a/src/tc_scf/fock_three_hermit.irp.f +++ b/src/tc_scf/fock_three_hermit.irp.f @@ -1,30 +1,36 @@ + +! --- + BEGIN_PROVIDER [ double precision, fock_3_mat, (mo_num, mo_num)] - implicit none + + implicit none integer :: i,j double precision :: contrib + fock_3_mat = 0.d0 - if(.not.bi_ortho.and.three_body_h_tc)then - call give_fock_ia_three_e_total(1,1,contrib) -!! !$OMP PARALLEL & -!! !$OMP DEFAULT (NONE) & -!! !$OMP PRIVATE (i,j,m,integral) & -!! !$OMP SHARED (mo_num,three_body_3_index) -!! !$OMP DO SCHEDULE (guided) COLLAPSE(3) - do i = 1, mo_num - do j = 1, mo_num - call give_fock_ia_three_e_total(j,i,contrib) - fock_3_mat(j,i) = -contrib - enddo - enddo - else if(bi_ortho.and.three_body_h_tc)then -!! !$OMP END DO -!! !$OMP END PARALLEL -!! do i = 1, mo_num -!! do j = 1, i-1 -!! mat_three(j,i) = mat_three(i,j) -!! enddo -!! enddo - endif + if(.not.bi_ortho .and. three_body_h_tc) then + + call give_fock_ia_three_e_total(1, 1, contrib) + !! !$OMP PARALLEL & + !! !$OMP DEFAULT (NONE) & + !! !$OMP PRIVATE (i,j,m,integral) & + !! !$OMP SHARED (mo_num,three_body_3_index) + !! !$OMP DO SCHEDULE (guided) COLLAPSE(3) + do i = 1, mo_num + do j = 1, mo_num + call give_fock_ia_three_e_total(j,i,contrib) + fock_3_mat(j,i) = -contrib + enddo + enddo + !else if(bi_ortho.and.three_body_h_tc) then + !! !$OMP END DO + !! !$OMP END PARALLEL + !! do i = 1, mo_num + !! do j = 1, i-1 + !! mat_three(j,i) = mat_three(i,j) + !! enddo + !! enddo + endif END_PROVIDER diff --git a/src/tc_scf/molden_lr_mos.irp.f b/src/tc_scf/molden_lr_mos.irp.f index 735349ba..e12fcd1c 100644 --- a/src/tc_scf/molden_lr_mos.irp.f +++ b/src/tc_scf/molden_lr_mos.irp.f @@ -1,6 +1,9 @@ -program molden +! --- + +program molden_lr_mos + BEGIN_DOC -! TODO : Put the documentation of the program here + ! TODO : Put the documentation of the program here END_DOC implicit none @@ -14,13 +17,21 @@ program molden ! my_n_pt_a_grid = 26 ! small grid for quick debug touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - call molden_lr + !call molden_lr + call molden_l() + call molden_r() + end + +! --- + subroutine molden_lr - implicit none + BEGIN_DOC ! Produces a Molden file END_DOC + + implicit none character*(128) :: output integer :: i_unit_output,getUnitAndOpen integer :: i,j,k,l @@ -37,7 +48,7 @@ subroutine molden_lr write(i_unit_output,'(A)') '[Atoms] Angs' do i = 1, nucl_num - write(i_unit_output,'(A2,2X,I4,2X,I4,3(2X,F15.10))') & + write(i_unit_output,'(A2,2X,I4,2X,I4,3(2X,F15.10))') & trim(element_name(int(nucl_charge(i)))), & i, & int(nucl_charge(i)), & @@ -174,3 +185,314 @@ subroutine molden_lr close(i_unit_output) end +! --- + +subroutine molden_l() + + BEGIN_DOC + ! Produces a Molden file + END_DOC + + implicit none + character*(128) :: output + integer :: i_unit_output, getUnitAndOpen + integer :: i, j, k, l + double precision, parameter :: a0 = 0.529177249d0 + + PROVIDE ezfio_filename + PROVIDE mo_l_coef + + output=trim(ezfio_filename)//'_left.mol' + print*,'output = ',trim(output) + + i_unit_output = getUnitAndOpen(output,'w') + + write(i_unit_output,'(A)') '[Molden Format]' + + write(i_unit_output,'(A)') '[Atoms] Angs' + do i = 1, nucl_num + write(i_unit_output,'(A2,2X,I4,2X,I4,3(2X,F15.10))') & + trim(element_name(int(nucl_charge(i)))), & + i, & + int(nucl_charge(i)), & + nucl_coord(i,1)*a0, nucl_coord(i,2)*a0, nucl_coord(i,3)*a0 + enddo + + write(i_unit_output,'(A)') '[GTO]' + + character*(1) :: character_shell + integer :: i_shell,i_prim,i_ao + integer :: iorder(ao_num) + integer :: nsort(ao_num) + + i_shell = 0 + i_prim = 0 + do i=1,nucl_num + write(i_unit_output,*) i, 0 + do j=1,nucl_num_shell_aos(i) + i_shell +=1 + i_ao = nucl_list_shell_aos(i,j) + character_shell = trim(ao_l_char(i_ao)) + write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00' + do k = 1, ao_prim_num(i_ao) + i_prim +=1 + write(i_unit_output,'(E20.10,2X,E20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) + enddo + l = i_ao + do while ( ao_l(l) == ao_l(i_ao) ) + nsort(l) = i*10000 + j*100 + l += 1 + if (l > ao_num) exit + enddo + enddo + write(i_unit_output,*)'' + enddo + + + do i=1,ao_num + iorder(i) = i + ! p + if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 1 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 2 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 3 + ! d + else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 1 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 2 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 2 )) then + nsort(i) += 3 + else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 4 + else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 5 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 6 + ! f + else if ((ao_power(i,1) == 3 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 1 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 3 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 2 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 3 )) then + nsort(i) += 3 + else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 4 + else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 5 + else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 6 + else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 2 )) then + nsort(i) += 7 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 2 )) then + nsort(i) += 8 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 9 + else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 10 + ! g + else if ((ao_power(i,1) == 4 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 1 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 4 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 2 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 4 )) then + nsort(i) += 3 + else if ((ao_power(i,1) == 3 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 4 + else if ((ao_power(i,1) == 3 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 5 + else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 3 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 6 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 3 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 7 + else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 3 )) then + nsort(i) += 8 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 3 )) then + nsort(i) += 9 + else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 10 + else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 2 )) then + nsort(i) += 11 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 2 )) then + nsort(i) += 12 + else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 13 + else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 14 + else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 2 )) then + nsort(i) += 15 + endif + enddo + + call isort(nsort,iorder,ao_num) + write(i_unit_output,'(A)') '[MO]' + do i=1,mo_num + write (i_unit_output,*) 'Sym= 1' + write (i_unit_output,*) 'Ene=', Fock_matrix_tc_mo_tot(i,i) + write (i_unit_output,*) 'Spin= Alpha' + write (i_unit_output,*) 'Occup=', mo_occ(i) + do j=1,ao_num + write(i_unit_output, '(I6,2X,E20.10)') j, mo_l_coef(iorder(j),i) + enddo + enddo + close(i_unit_output) + +end + +! --- + +subroutine molden_r() + + BEGIN_DOC + ! Produces a Molden file + END_DOC + + implicit none + character*(128) :: output + integer :: i_unit_output, getUnitAndOpen + integer :: i, j, k, l + double precision, parameter :: a0 = 0.529177249d0 + + PROVIDE ezfio_filename + + output=trim(ezfio_filename)//'_right.mol' + print*,'output = ',trim(output) + + i_unit_output = getUnitAndOpen(output,'w') + + write(i_unit_output,'(A)') '[Molden Format]' + + write(i_unit_output,'(A)') '[Atoms] Angs' + do i = 1, nucl_num + write(i_unit_output,'(A2,2X,I4,2X,I4,3(2X,F15.10))') & + trim(element_name(int(nucl_charge(i)))), & + i, & + int(nucl_charge(i)), & + nucl_coord(i,1)*a0, nucl_coord(i,2)*a0, nucl_coord(i,3)*a0 + enddo + + write(i_unit_output,'(A)') '[GTO]' + + character*(1) :: character_shell + integer :: i_shell,i_prim,i_ao + integer :: iorder(ao_num) + integer :: nsort(ao_num) + + i_shell = 0 + i_prim = 0 + do i=1,nucl_num + write(i_unit_output,*) i, 0 + do j=1,nucl_num_shell_aos(i) + i_shell +=1 + i_ao = nucl_list_shell_aos(i,j) + character_shell = trim(ao_l_char(i_ao)) + write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00' + do k = 1, ao_prim_num(i_ao) + i_prim +=1 + write(i_unit_output,'(E20.10,2X,E20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) + enddo + l = i_ao + do while ( ao_l(l) == ao_l(i_ao) ) + nsort(l) = i*10000 + j*100 + l += 1 + if (l > ao_num) exit + enddo + enddo + write(i_unit_output,*)'' + enddo + + + do i=1,ao_num + iorder(i) = i + ! p + if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 1 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 2 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 3 + ! d + else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 1 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 2 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 2 )) then + nsort(i) += 3 + else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 4 + else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 5 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 6 + ! f + else if ((ao_power(i,1) == 3 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 1 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 3 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 2 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 3 )) then + nsort(i) += 3 + else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 4 + else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 5 + else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 6 + else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 2 )) then + nsort(i) += 7 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 2 )) then + nsort(i) += 8 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 9 + else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 10 + ! g + else if ((ao_power(i,1) == 4 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 1 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 4 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 2 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 4 )) then + nsort(i) += 3 + else if ((ao_power(i,1) == 3 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 4 + else if ((ao_power(i,1) == 3 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 5 + else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 3 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 6 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 3 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 7 + else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 3 )) then + nsort(i) += 8 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 3 )) then + nsort(i) += 9 + else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 10 + else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 2 )) then + nsort(i) += 11 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 2 )) then + nsort(i) += 12 + else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 13 + else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 14 + else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 2 )) then + nsort(i) += 15 + endif + enddo + + call isort(nsort, iorder, ao_num) + write(i_unit_output,'(A)') '[MO]' + do i=1,mo_num + write (i_unit_output,*) 'Sym= 1' + write (i_unit_output,*) 'Ene=', Fock_matrix_tc_mo_tot(i,i) + write (i_unit_output,*) 'Spin= Alpha' + write (i_unit_output,*) 'Occup=', mo_occ(i) + do j=1,ao_num + write(i_unit_output, '(I6,2X,E20.10)') j, mo_r_coef(iorder(j),i) + enddo + enddo + close(i_unit_output) + +end + From 3940eaeb787cdd32dc7259deda782c96c948f183 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Thu, 22 Jun 2023 22:04:57 +0200 Subject: [PATCH 189/337] BI-AO --> BI-MO with DGEMM --- src/bi_ort_ints/total_twoe_pot.irp.f | 150 +++++++++++++++++---------- src/non_h_ints_mu/total_tc_int.irp.f | 1 + src/tc_bi_ortho/print_tc_dump.irp.f | 44 ++++++++ 3 files changed, 138 insertions(+), 57 deletions(-) create mode 100644 src/tc_bi_ortho/print_tc_dump.irp.f diff --git a/src/bi_ort_ints/total_twoe_pot.irp.f b/src/bi_ort_ints/total_twoe_pot.irp.f index 721ea0c8..c1bacbd0 100644 --- a/src/bi_ort_ints/total_twoe_pot.irp.f +++ b/src/bi_ort_ints/total_twoe_pot.irp.f @@ -80,6 +80,8 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n enddo enddo + FREE ao_tc_int_chemist + endif END_PROVIDER @@ -128,69 +130,99 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e_chemist, (mo_num, mo_num, implicit none integer :: i, j, k, l, m, n, p, q - double precision, allocatable :: mo_tmp_1(:,:,:,:), mo_tmp_2(:,:,:,:) + double precision, allocatable :: a1(:,:,:,:), a2(:,:,:,:) - allocate(mo_tmp_1(mo_num,ao_num,ao_num,ao_num)) - mo_tmp_1 = 0.d0 + PROVIDE mo_r_coef mo_l_coef - do m = 1, ao_num - do p = 1, ao_num - do n = 1, ao_num - do q = 1, ao_num - do k = 1, mo_num - ! (k n|p m) = sum_q c_qk * (q n|p m) - mo_tmp_1(k,n,p,m) += mo_l_coef_transp(k,q) * ao_two_e_tc_tot(q,n,p,m) - enddo - enddo - enddo - enddo - enddo + allocate(a2(ao_num,ao_num,ao_num,mo_num)) - allocate(mo_tmp_2(mo_num,mo_num,ao_num,ao_num)) - mo_tmp_2 = 0.d0 + call dgemm( 'T', 'N', ao_num*ao_num*ao_num, mo_num, ao_num, 1.d0 & + , ao_two_e_tc_tot(1,1,1,1), ao_num, mo_l_coef(1,1), ao_num & + , 0.d0 , a2(1,1,1,1), ao_num*ao_num*ao_num) - do m = 1, ao_num - do p = 1, ao_num - do n = 1, ao_num - do i = 1, mo_num - do k = 1, mo_num - ! (k i|p m) = sum_n c_ni * (k n|p m) - mo_tmp_2(k,i,p,m) += mo_r_coef_transp(i,n) * mo_tmp_1(k,n,p,m) - enddo - enddo - enddo - enddo - enddo - deallocate(mo_tmp_1) + allocate(a1(ao_num,ao_num,mo_num,mo_num)) - allocate(mo_tmp_1(mo_num,mo_num,mo_num,ao_num)) - mo_tmp_1 = 0.d0 - do m = 1, ao_num - do p = 1, ao_num - do l = 1, mo_num - do i = 1, mo_num - do k = 1, mo_num - mo_tmp_1(k,i,l,m) += mo_l_coef_transp(l,p) * mo_tmp_2(k,i,p,m) - enddo - enddo - enddo - enddo - enddo - deallocate(mo_tmp_2) + call dgemm( 'T', 'N', ao_num*ao_num*mo_num, mo_num, ao_num, 1.d0 & + , a2(1,1,1,1), ao_num, mo_r_coef(1,1), ao_num & + , 0.d0, a1(1,1,1,1), ao_num*ao_num*mo_num) - mo_bi_ortho_tc_two_e_chemist = 0.d0 - do m = 1, ao_num - do j = 1, mo_num - do l = 1, mo_num - do i = 1, mo_num - do k = 1, mo_num - mo_bi_ortho_tc_two_e_chemist(k,i,l,j) += mo_r_coef_transp(j,m) * mo_tmp_1(k,i,l,m) - enddo - enddo - enddo - enddo - enddo - deallocate(mo_tmp_1) + deallocate(a2) + allocate(a2(ao_num,mo_num,mo_num,mo_num)) + + call dgemm( 'T', 'N', ao_num*mo_num*mo_num, mo_num, ao_num, 1.d0 & + , a1(1,1,1,1), ao_num, mo_l_coef(1,1), ao_num & + , 0.d0, a2(1,1,1,1), ao_num*mo_num*mo_num) + + deallocate(a1) + + call dgemm( 'T', 'N', mo_num*mo_num*mo_num, mo_num, ao_num, 1.d0 & + , a2(1,1,1,1), ao_num, mo_r_coef(1,1), ao_num & + , 0.d0, mo_bi_ortho_tc_two_e_chemist(1,1,1,1), mo_num*mo_num*mo_num) + + deallocate(a2) + + + !allocate(a1(mo_num,ao_num,ao_num,ao_num)) + !a1 = 0.d0 + + !do m = 1, ao_num + ! do p = 1, ao_num + ! do n = 1, ao_num + ! do q = 1, ao_num + ! do k = 1, mo_num + ! ! (k n|p m) = sum_q c_qk * (q n|p m) + ! a1(k,n,p,m) += mo_l_coef_transp(k,q) * ao_two_e_tc_tot(q,n,p,m) + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + + !allocate(a2(mo_num,mo_num,ao_num,ao_num)) + !a2 = 0.d0 + + !do m = 1, ao_num + ! do p = 1, ao_num + ! do n = 1, ao_num + ! do i = 1, mo_num + ! do k = 1, mo_num + ! ! (k i|p m) = sum_n c_ni * (k n|p m) + ! a2(k,i,p,m) += mo_r_coef_transp(i,n) * a1(k,n,p,m) + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + !deallocate(a1) + + !allocate(a1(mo_num,mo_num,mo_num,ao_num)) + !a1 = 0.d0 + !do m = 1, ao_num + ! do p = 1, ao_num + ! do l = 1, mo_num + ! do i = 1, mo_num + ! do k = 1, mo_num + ! a1(k,i,l,m) += mo_l_coef_transp(l,p) * a2(k,i,p,m) + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + !deallocate(a2) + + !mo_bi_ortho_tc_two_e_chemist = 0.d0 + !do m = 1, ao_num + ! do j = 1, mo_num + ! do l = 1, mo_num + ! do i = 1, mo_num + ! do k = 1, mo_num + ! mo_bi_ortho_tc_two_e_chemist(k,i,l,j) += mo_r_coef_transp(j,m) * a1(k,i,l,m) + ! enddo + ! enddo + ! enddo + ! enddo + !enddo + !deallocate(a1) END_PROVIDER @@ -209,6 +241,8 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e, (mo_num, mo_num, mo_num, implicit none integer :: i, j, k, l + PROVIDE mo_bi_ortho_tc_two_e_chemist + do j = 1, mo_num do i = 1, mo_num do l = 1, mo_num @@ -220,6 +254,8 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e, (mo_num, mo_num, mo_num, enddo enddo + FREE mo_bi_ortho_tc_two_e_chemist + END_PROVIDER ! --- diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f index 2bdf39f0..158ee2fb 100644 --- a/src/non_h_ints_mu/total_tc_int.irp.f +++ b/src/non_h_ints_mu/total_tc_int.irp.f @@ -119,6 +119,7 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist_no_cycle, (ao_num, ao_num, a call wall_time(wall1) print *, ' wall time for ao_tc_int_chemist_no_cycle ', wall1 - wall0 + END_PROVIDER ! --- diff --git a/src/tc_bi_ortho/print_tc_dump.irp.f b/src/tc_bi_ortho/print_tc_dump.irp.f new file mode 100644 index 00000000..327e0f02 --- /dev/null +++ b/src/tc_bi_ortho/print_tc_dump.irp.f @@ -0,0 +1,44 @@ +program tc_bi_ortho + + BEGIN_DOC + ! TODO + END_DOC + implicit none + + my_grid_becke = .True. + my_n_pt_r_grid = 30 + my_n_pt_a_grid = 50 + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + call KMat_tilde_dump() +end + +! --- + +subroutine KMat_tilde_dump() + + implicit none + integer :: i, j, k, l + + PROVIDE mo_bi_ortho_tc_two_e_chemist + + print *, ' Kmat_tilde in chem notation' + + open(33, file='Kmat_tilde.dat', action='write') + do l = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do i = 1, mo_num + write(33, '(4(I4, 2X), 4X, E15.7)') i, j, k, l, mo_bi_ortho_tc_two_e_chemist(i,j,k,l) + ! TCHint convention + !write(33, '(4(I4, 2X), 4X, E15.7)') i, j, k, l, mo_bi_ortho_tc_two_e_chemist(j,i,l,k) + enddo + enddo + enddo + enddo + close(33) + + return +end subroutine KMat_tilde_dump + +! --- From b88437b6b32a8c88d19b045740eab5f5ed433953 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 24 Jun 2023 18:54:32 +0200 Subject: [PATCH 190/337] minor modif --- src/tc_bi_ortho/print_tc_dump.irp.f | 93 ++++++++++++++++++++++++++- src/tc_bi_ortho/print_tc_energy.irp.f | 24 +++++-- src/tc_bi_ortho/slater_tc_slow.irp.f | 2 +- src/tc_keywords/j1b_pen.irp.f | 2 +- src/tc_scf/fock_three_hermit.irp.f | 3 +- 5 files changed, 114 insertions(+), 10 deletions(-) diff --git a/src/tc_bi_ortho/print_tc_dump.irp.f b/src/tc_bi_ortho/print_tc_dump.irp.f index 327e0f02..55df20a2 100644 --- a/src/tc_bi_ortho/print_tc_dump.irp.f +++ b/src/tc_bi_ortho/print_tc_dump.irp.f @@ -10,7 +10,10 @@ program tc_bi_ortho my_n_pt_a_grid = 50 touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + call ERI_dump() call KMat_tilde_dump() + call LMat_tilde_dump() + end ! --- @@ -18,7 +21,7 @@ end subroutine KMat_tilde_dump() implicit none - integer :: i, j, k, l + integer :: i, j, k, l PROVIDE mo_bi_ortho_tc_two_e_chemist @@ -42,3 +45,91 @@ subroutine KMat_tilde_dump() end subroutine KMat_tilde_dump ! --- + +subroutine ERI_dump() + + implicit none + integer :: i, j, k, l + double precision, allocatable :: a1(:,:,:,:), a2(:,:,:,:) + + PROVIDE mo_r_coef mo_l_coef + + allocate(a2(ao_num,ao_num,ao_num,mo_num)) + + call dgemm( 'T', 'N', ao_num*ao_num*ao_num, mo_num, ao_num, 1.d0 & + , ao_two_e_coul(1,1,1,1), ao_num, mo_l_coef(1,1), ao_num & + , 0.d0 , a2(1,1,1,1), ao_num*ao_num*ao_num) + + allocate(a1(ao_num,ao_num,mo_num,mo_num)) + + call dgemm( 'T', 'N', ao_num*ao_num*mo_num, mo_num, ao_num, 1.d0 & + , a2(1,1,1,1), ao_num, mo_r_coef(1,1), ao_num & + , 0.d0, a1(1,1,1,1), ao_num*ao_num*mo_num) + + deallocate(a2) + allocate(a2(ao_num,mo_num,mo_num,mo_num)) + + call dgemm( 'T', 'N', ao_num*mo_num*mo_num, mo_num, ao_num, 1.d0 & + , a1(1,1,1,1), ao_num, mo_l_coef(1,1), ao_num & + , 0.d0, a2(1,1,1,1), ao_num*mo_num*mo_num) + + deallocate(a1) + allocate(a1(mo_num,mo_num,mo_num,mo_num)) + + call dgemm( 'T', 'N', mo_num*mo_num*mo_num, mo_num, ao_num, 1.d0 & + , a2(1,1,1,1), ao_num, mo_r_coef(1,1), ao_num & + , 0.d0, a1(1,1,1,1), mo_num*mo_num*mo_num) + + deallocate(a2) + + open(33, file='ERI.dat', action='write') + do l = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do i = 1, mo_num + write(33, '(4(I4, 2X), 4X, E15.7)') i, j, k, l, a1(i,j,k,l) + enddo + enddo + enddo + enddo + close(33) + + deallocate(a1) + + return +end subroutine ERI_dump + +! --- + +subroutine LMat_tilde_dump() + + implicit none + integer :: i, j, k, l, m, n + double precision :: integral + + PROVIDE mo_bi_ortho_tc_two_e_chemist + + print *, ' Lmat_tilde in phys notation' + + open(33, file='Lmat_tilde.dat', action='write') + do n = 1, mo_num + do m = 1, mo_num + do l = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do i = 1, mo_num + ! < i j k | -L | l m n > with a BI-ORTHONORMAL MOLECULAR ORBITALS + call give_integrals_3_body_bi_ort(i, j, k, l, m, n, integral) + write(33, '(6(I4, 2X), 4X, E15.7)') i, j, k, l, m, n, integral + enddo + enddo + enddo + enddo + enddo + enddo + close(33) + + return +end subroutine LMat_tilde_dump + +! --- diff --git a/src/tc_bi_ortho/print_tc_energy.irp.f b/src/tc_bi_ortho/print_tc_energy.irp.f index 980d12de..b9f23a8a 100644 --- a/src/tc_bi_ortho/print_tc_energy.irp.f +++ b/src/tc_bi_ortho/print_tc_energy.irp.f @@ -1,19 +1,31 @@ program print_tc_energy - implicit none + BEGIN_DOC -! TODO : Put the documentation of the program here + ! TODO : Put the documentation of the program here END_DOC + + implicit none + print *, 'Hello world' my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 + !my_n_pt_r_grid = 30 + !my_n_pt_a_grid = 50 + + my_n_pt_r_grid = 100 + my_n_pt_a_grid = 170 + + !my_n_pt_r_grid = 100 + !my_n_pt_a_grid = 266 + read_wf = .True. touch read_wf PROVIDE j1b_type print*, 'j1b_type = ', j1b_type - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - call write_tc_energy + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + call write_tc_energy() + end diff --git a/src/tc_bi_ortho/slater_tc_slow.irp.f b/src/tc_bi_ortho/slater_tc_slow.irp.f index 1833d20f..301cfe0f 100644 --- a/src/tc_bi_ortho/slater_tc_slow.irp.f +++ b/src/tc_bi_ortho/slater_tc_slow.irp.f @@ -55,7 +55,7 @@ subroutine htilde_mu_mat_bi_ortho_slow(key_j, key_i, Nint, hmono, htwoe, hthree, hmono = 0.d0 htwoe = 0.d0 htot = 0.d0 - hthree = 0.D0 + hthree = 0.d0 call get_excitation_degree(key_i, key_j, degree, Nint) if(degree.gt.2) return diff --git a/src/tc_keywords/j1b_pen.irp.f b/src/tc_keywords/j1b_pen.irp.f index ebcd5107..2d5e59a9 100644 --- a/src/tc_keywords/j1b_pen.irp.f +++ b/src/tc_keywords/j1b_pen.irp.f @@ -91,7 +91,7 @@ print *, ' parameters for nuclei jastrow' print *, ' i, Z, j1b_pen, j1b_pen_coef' do i = 1, nucl_num - print *, i, nucl_charge(i), j1b_pen(i), j1b_pen_coef(i) + write(*,"(I4, 2x, 3(E15.7, 2X))"), i, nucl_charge(i), j1b_pen(i), j1b_pen_coef(i) enddo END_PROVIDER diff --git a/src/tc_scf/fock_three_hermit.irp.f b/src/tc_scf/fock_three_hermit.irp.f index 89e6f620..a936da9b 100644 --- a/src/tc_scf/fock_three_hermit.irp.f +++ b/src/tc_scf/fock_three_hermit.irp.f @@ -78,6 +78,7 @@ end ! --- +! TODO DGEMM BEGIN_PROVIDER [double precision, diag_three_elem_hf] implicit none @@ -106,7 +107,7 @@ BEGIN_PROVIDER [double precision, diag_three_elem_hf] do i = 1, elec_beta_num do j = 1, elec_beta_num do k = 1, elec_beta_num - call give_integrals_3_body(k, j, i, j, i, k,exchange_int_231) + call give_integrals_3_body(k, j, i, j, i, k, exchange_int_231) diag_three_elem_hf += two_third * exchange_int_231 enddo enddo From b2393ba88db73506b0589a8003a20fb7720a8936 Mon Sep 17 00:00:00 2001 From: ydamour Date: Mon, 26 Jun 2023 15:59:30 +0200 Subject: [PATCH 191/337] fix typo --- src/utils_cc/occupancy.irp.f | 4 ++-- src/utils_cc/org/occupancy.org | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/utils_cc/occupancy.irp.f b/src/utils_cc/occupancy.irp.f index c6139bb3..27b0ee5e 100644 --- a/src/utils_cc/occupancy.irp.f +++ b/src/utils_cc/occupancy.irp.f @@ -204,8 +204,8 @@ function is_del(i) is_del = .False. ! Search - do j = 1, dim_list_core_orb - if (list_core(j) == i) then + do j = 1, dim_list_del_orb + if (list_del(j) == i) then is_del = .True. exit endif diff --git a/src/utils_cc/org/occupancy.org b/src/utils_cc/org/occupancy.org index 246bbd5b..4267fc88 100644 --- a/src/utils_cc/org/occupancy.org +++ b/src/utils_cc/org/occupancy.org @@ -209,8 +209,8 @@ function is_del(i) is_del = .False. ! Search - do j = 1, dim_list_core_orb - if (list_core(j) == i) then + do j = 1, dim_list_del_orb + if (list_del(j) == i) then is_del = .True. exit endif From ce82fc82524a342c570483d79dab9dea760045fd Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 28 Jun 2023 13:50:21 +0200 Subject: [PATCH 192/337] Update EZFIO --- external/ezfio | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/ezfio b/external/ezfio index d5805497..0520b5e2 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit d5805497fa0ef30e70e055cde1ecec2963303e93 +Subproject commit 0520b5e2cf70e2451c37ce5b7f2f64f6d2e5e956 From bd8218a87648e8c76bff73d1ff85c455ccac6f42 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Wed, 28 Jun 2023 18:57:41 +0200 Subject: [PATCH 193/337] DUMP for TCHint added --- src/non_h_ints_mu/debug_integ_jmu_modif.irp.f | 64 ++++++++++++- src/non_h_ints_mu/j12_nucl_utils.irp.f | 89 +++++++++++++++++-- src/non_h_ints_mu/jast_deriv.irp.f | 2 + src/tc_bi_ortho/print_tc_dump.irp.f | 71 +++++++++++++-- src/tc_bi_ortho/print_tc_energy.irp.f | 9 +- src/tc_scf/print_tcscf_energy.irp.f | 58 ++++++++++++ src/tc_scf/tc_scf.irp.f | 9 +- src/tc_scf/tcscf_energy_naive.irp.f | 80 +++++++++++++++++ src/tc_scf/three_e_energy_bi_ortho.irp.f | 77 +++++++++------- 9 files changed, 404 insertions(+), 55 deletions(-) create mode 100644 src/tc_scf/print_tcscf_energy.irp.f create mode 100644 src/tc_scf/tcscf_energy_naive.irp.f diff --git a/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f b/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f index 5e7ef7e9..f3e93360 100644 --- a/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f +++ b/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f @@ -27,12 +27,13 @@ program debug_integ_jmu_modif ! call test_int2_grad1_u12_ao() ! ! call test_grad12_j12() + call test_tchint_rsdft() ! call test_u12sq_j1bsq() ! call test_u12_grad1_u12_j1b_grad1_j1b() ! !call test_gradu_squared_u_ij_mu() !call test_vect_overlap_gauss_r12_ao() - call test_vect_overlap_gauss_r12_ao_with1s() + !call test_vect_overlap_gauss_r12_ao_with1s() end @@ -473,6 +474,65 @@ end subroutine test_gradu_squared_u_ij_mu ! --- +subroutine test_tchint_rsdft() + + implicit none + integer :: i, j, m, ipoint, jpoint + double precision :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz + double precision :: x(3), y(3), dj_1(3), dj_2(3), dj_3(3) + + print*, ' test rsdft_jastrow ...' + + PROVIDE grad1_u12_num + + eps_ij = 1d-4 + acc_tot = 0.d0 + normalz = 0.d0 + + do ipoint = 1, n_points_final_grid + x(1) = final_grid_points(1,ipoint) + x(2) = final_grid_points(2,ipoint) + x(3) = final_grid_points(3,ipoint) + + do jpoint = 1, n_points_extra_final_grid + y(1) = final_grid_points_extra(1,jpoint) + y(2) = final_grid_points_extra(2,jpoint) + y(3) = final_grid_points_extra(3,jpoint) + + dj_1(1) = grad1_u12_num(jpoint,ipoint,1) + dj_1(2) = grad1_u12_num(jpoint,ipoint,2) + dj_1(3) = grad1_u12_num(jpoint,ipoint,3) + + call get_tchint_rsdft_jastrow(x, y, dj_2) + + do m = 1, 3 + i_exc = dj_1(m) + i_num = dj_2(m) + acc_ij = dabs(i_exc - i_num) + if(acc_ij .gt. eps_ij) then + print *, ' problem on', ipoint, jpoint, m + print *, ' x = ', x + print *, ' y = ', y + print *, ' exc, num, diff = ', i_exc, i_num, acc_ij + call grad1_jmu_modif_num(x, y, dj_3) + print *, ' check = ', dj_3(m) + stop + endif + + acc_tot += acc_ij + normalz += dabs(i_exc) + enddo + enddo + enddo + + print*, ' acc_tot = ', acc_tot + print*, ' normalz = ', normalz + + return +end subroutine test_tchint_rsdft + +! --- + subroutine test_grad12_j12() implicit none @@ -484,7 +544,7 @@ subroutine test_grad12_j12() PROVIDE grad12_j12 - eps_ij = 1d-3 + eps_ij = 1d-6 acc_tot = 0.d0 normalz = 0.d0 diff --git a/src/non_h_ints_mu/j12_nucl_utils.irp.f b/src/non_h_ints_mu/j12_nucl_utils.irp.f index ac077fe0..7dd13f14 100644 --- a/src/non_h_ints_mu/j12_nucl_utils.irp.f +++ b/src/non_h_ints_mu/j12_nucl_utils.irp.f @@ -668,7 +668,7 @@ subroutine grad1_jmu_modif_num(r1, r2, grad) double precision, intent(in) :: r1(3), r2(3) double precision, intent(out) :: grad(3) - double precision :: tmp0, tmp1, tmp2, tmp3, tmp4, grad_u12(3) + double precision :: tmp0, tmp1, tmp2, grad_u12(3) double precision, external :: j12_mu double precision, external :: j1b_nucl @@ -681,18 +681,93 @@ subroutine grad1_jmu_modif_num(r1, r2, grad) tmp0 = j1b_nucl(r1) tmp1 = j1b_nucl(r2) tmp2 = j12_mu(r1, r2) - tmp3 = tmp0 * tmp1 - tmp4 = tmp2 * tmp1 - grad(1) = tmp3 * grad_u12(1) + tmp4 * grad_x_j1b_nucl_num(r1) - grad(2) = tmp3 * grad_u12(2) + tmp4 * grad_y_j1b_nucl_num(r1) - grad(3) = tmp3 * grad_u12(3) + tmp4 * grad_z_j1b_nucl_num(r1) + grad(1) = (tmp0 * grad_u12(1) + tmp2 * grad_x_j1b_nucl_num(r1)) * tmp1 + grad(2) = (tmp0 * grad_u12(2) + tmp2 * grad_y_j1b_nucl_num(r1)) * tmp1 + grad(3) = (tmp0 * grad_u12(3) + tmp2 * grad_z_j1b_nucl_num(r1)) * tmp1 return end subroutine grad1_jmu_modif_num ! --- - +subroutine get_tchint_rsdft_jastrow(x, y, dj) + + implicit none + double precision, intent(in) :: x(3), y(3) + double precision, intent(out) :: dj(3) + integer :: at + double precision :: a, mu_tmp, inv_sq_pi_2 + double precision :: tmp_x, tmp_y, tmp_z, tmp + double precision :: dx2, dy2, pos(3), dxy, dxy2 + double precision :: v1b_x, v1b_y + double precision :: u2b, grad1_u2b(3), grad1_v1b(3) + + PROVIDE mu_erf + + inv_sq_pi_2 = 0.5d0 / dsqrt(dacos(-1.d0)) + + dj = 0.d0 + +! double precision, external :: j12_mu, j1b_nucl +! v1b_x = j1b_nucl(x) +! v1b_y = j1b_nucl(y) +! call grad1_j1b_nucl(x, grad1_v1b) +! u2b = j12_mu(x, y) +! call grad1_j12_mu(x, y, grad1_u2b) + + ! 1b terms + v1b_x = 1.d0 + v1b_y = 1.d0 + tmp_x = 0.d0 + tmp_y = 0.d0 + tmp_z = 0.d0 + do at = 1, nucl_num + + a = j1b_pen(at) + pos(1) = nucl_coord(at,1) + pos(2) = nucl_coord(at,2) + pos(3) = nucl_coord(at,3) + + dx2 = sum((x-pos)**2) + dy2 = sum((y-pos)**2) + tmp = dexp(-a*dx2) * a + + v1b_x = v1b_x - dexp(-a*dx2) + v1b_y = v1b_y - dexp(-a*dy2) + + tmp_x = tmp_x + tmp * (x(1) - pos(1)) + tmp_y = tmp_y + tmp * (x(2) - pos(2)) + tmp_z = tmp_z + tmp * (x(3) - pos(3)) + end do + grad1_v1b(1) = 2.d0 * tmp_x + grad1_v1b(2) = 2.d0 * tmp_y + grad1_v1b(3) = 2.d0 * tmp_z + + ! 2b terms + dxy2 = sum((x-y)**2) + dxy = dsqrt(dxy2) + mu_tmp = mu_erf * dxy + u2b = 0.5d0 * dxy * (1.d0 - derf(mu_tmp)) - inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / mu_erf + + if(dxy .lt. 1d-8) then + grad1_u2b(1) = 0.d0 + grad1_u2b(2) = 0.d0 + grad1_u2b(3) = 0.d0 + else + tmp = 0.5d0 * (1.d0 - derf(mu_tmp)) / dxy + grad1_u2b(1) = tmp * (x(1) - y(1)) + grad1_u2b(2) = tmp * (x(2) - y(2)) + grad1_u2b(3) = tmp * (x(3) - y(3)) + endif + + dj(1) = (grad1_u2b(1) * v1b_x + u2b * grad1_v1b(1)) * v1b_y + dj(2) = (grad1_u2b(2) * v1b_x + u2b * grad1_v1b(2)) * v1b_y + dj(3) = (grad1_u2b(3) * v1b_x + u2b * grad1_v1b(3)) * v1b_y + + return +end subroutine get_tchint_rsdft_jastrow + +! --- diff --git a/src/non_h_ints_mu/jast_deriv.irp.f b/src/non_h_ints_mu/jast_deriv.irp.f index bd7ff6b7..859f2aa5 100644 --- a/src/non_h_ints_mu/jast_deriv.irp.f +++ b/src/non_h_ints_mu/jast_deriv.irp.f @@ -70,6 +70,8 @@ elseif((j1b_type .gt. 100) .and. (j1b_type .lt. 200)) then + PROVIDE final_grid_points + !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (ipoint, jpoint, r1, r2, v1b_r1, v1b_r2, u2b_r12, grad1_v1b, grad1_u2b, dx, dy, dz) & diff --git a/src/tc_bi_ortho/print_tc_dump.irp.f b/src/tc_bi_ortho/print_tc_dump.irp.f index 55df20a2..0a7e08d2 100644 --- a/src/tc_bi_ortho/print_tc_dump.irp.f +++ b/src/tc_bi_ortho/print_tc_dump.irp.f @@ -8,6 +8,8 @@ program tc_bi_ortho my_grid_becke = .True. my_n_pt_r_grid = 30 my_n_pt_a_grid = 50 + !my_n_pt_r_grid = 100 + !my_n_pt_a_grid = 170 touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid call ERI_dump() @@ -21,26 +23,66 @@ end subroutine KMat_tilde_dump() implicit none - integer :: i, j, k, l + integer :: i, j, k, l + integer :: isym, ms2, st, iii + character(16) :: corb + double precision :: t1, t2 + integer, allocatable :: orbsym(:) + + print *, ' generating FCIDUMP' + call wall_time(t1) PROVIDE mo_bi_ortho_tc_two_e_chemist + PROVIDE mo_bi_ortho_tc_one_e - print *, ' Kmat_tilde in chem notation' + isym = 1 + ms2 = elec_alpha_num - elec_beta_num + st = 0 + iii = 0 + + allocate(orbsym(mo_num)) + orbsym(1:mo_num) = 1 + + open(33, file='FCIDUMP', action='write') + + write(33,'("&",a)') 'FCI' + write(33,'(1x,a,"=",i0,",")') 'NORB', mo_num + write(33,'(1x,a,"=",i0,",")') 'NELEC', elec_num + write(33,'(1x,a,"=",i0,",")') 'MS2', ms2 + write(33,'(1x,a,"=",i0,",")') 'ISYM', isym + write(corb,'(i0)') mo_num + write(33,'(1x,a,"=",'//corb//'(i0,","))') 'ORBSYM', orbsym + write(33,'(1x,a,"=",i0,",")') 'ST', st + write(33,'(1x,a,"=",i0,",")') 'III', iii + write(33,'(1x,a,"=",i0,",")') 'OCC', (elec_num-ms2)/2+ms2 + write(33,'(1x,a,"=",i0,",")') 'CLOSED', 2*elec_alpha_num + write(33,'(1x,"/")') - open(33, file='Kmat_tilde.dat', action='write') do l = 1, mo_num do k = 1, mo_num do j = 1, mo_num do i = 1, mo_num - write(33, '(4(I4, 2X), 4X, E15.7)') i, j, k, l, mo_bi_ortho_tc_two_e_chemist(i,j,k,l) ! TCHint convention - !write(33, '(4(I4, 2X), 4X, E15.7)') i, j, k, l, mo_bi_ortho_tc_two_e_chemist(j,i,l,k) + write(33, '(E15.7, 4X, 4(I4, 2X))') mo_bi_ortho_tc_two_e_chemist(j,i,l,k), i, j, k, l enddo enddo enddo enddo + + do j = 1, mo_num + do i = 1, mo_num + ! TCHint convention + write(33, '(E15.7, 4X, 4(I4, 2X))') mo_bi_ortho_tc_one_e(i,j), i, j, 0, 0 + enddo + enddo + close(33) + deallocate(orbsym) + + call wall_time(t2) + print *, ' end after (min)', (t2-t1)/60.d0 + return end subroutine KMat_tilde_dump @@ -106,12 +148,15 @@ subroutine LMat_tilde_dump() implicit none integer :: i, j, k, l, m, n double precision :: integral + double precision :: t1, t2 - PROVIDE mo_bi_ortho_tc_two_e_chemist + print *, ' generating TCDUMP' + call wall_time(t1) - print *, ' Lmat_tilde in phys notation' + PROVIDE mo_l_coef mo_r_coef - open(33, file='Lmat_tilde.dat', action='write') + open(33, file='TCDUMP', action='write') + write(33, '(4X, I4)') mo_num do n = 1, mo_num do m = 1, mo_num do l = 1, mo_num @@ -120,7 +165,12 @@ subroutine LMat_tilde_dump() do i = 1, mo_num ! < i j k | -L | l m n > with a BI-ORTHONORMAL MOLECULAR ORBITALS call give_integrals_3_body_bi_ort(i, j, k, l, m, n, integral) - write(33, '(6(I4, 2X), 4X, E15.7)') i, j, k, l, m, n, integral + !write(33, '(6(I4, 2X), 4X, E15.7)') i, j, k, l, m, n, integral + ! TCHint convention + if(dabs(integral).gt.1d-10) then + write(33, '(E15.7, 4X, 6(I4, 2X))') -integral/3.d0, i, j, k, l, m, n + !write(33, '(E15.7, 4X, 6(I4, 2X))') -integral/3.d0, l, m, n, i, j, k + endif enddo enddo enddo @@ -129,6 +179,9 @@ subroutine LMat_tilde_dump() enddo close(33) + call wall_time(t2) + print *, ' end after (min)', (t2-t1)/60.d0 + return end subroutine LMat_tilde_dump diff --git a/src/tc_bi_ortho/print_tc_energy.irp.f b/src/tc_bi_ortho/print_tc_energy.irp.f index b9f23a8a..2f667a48 100644 --- a/src/tc_bi_ortho/print_tc_energy.irp.f +++ b/src/tc_bi_ortho/print_tc_energy.irp.f @@ -8,11 +8,12 @@ program print_tc_energy print *, 'Hello world' my_grid_becke = .True. - !my_n_pt_r_grid = 30 - !my_n_pt_a_grid = 50 - my_n_pt_r_grid = 100 - my_n_pt_a_grid = 170 + my_n_pt_r_grid = 30 + my_n_pt_a_grid = 50 + + !my_n_pt_r_grid = 100 + !my_n_pt_a_grid = 170 !my_n_pt_r_grid = 100 !my_n_pt_a_grid = 266 diff --git a/src/tc_scf/print_tcscf_energy.irp.f b/src/tc_scf/print_tcscf_energy.irp.f new file mode 100644 index 00000000..96512cb7 --- /dev/null +++ b/src/tc_scf/print_tcscf_energy.irp.f @@ -0,0 +1,58 @@ +program print_tcscf_energy + + BEGIN_DOC + ! TODO : Put the documentation of the program here + END_DOC + + implicit none + + print *, 'Hello world' + my_grid_becke = .True. + + !my_n_pt_r_grid = 30 + !my_n_pt_a_grid = 50 + + my_n_pt_r_grid = 100 + my_n_pt_a_grid = 170 + + !my_n_pt_r_grid = 100 + !my_n_pt_a_grid = 266 + + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + call main() + +end + +! --- + +subroutine main() + + implicit none + double precision :: etc_tot, etc_1e, etc_2e, etc_3e + + PROVIDE mu_erf + PROVIDE j1b_type + + print*, ' mu_erf = ', mu_erf + print*, ' j1b_type = ', j1b_type + + etc_tot = TC_HF_energy + etc_1e = TC_HF_one_e_energy + etc_2e = TC_HF_two_e_energy + etc_3e = 0.d0 + if(three_body_h_tc) then + !etc_3e = diag_three_elem_hf + etc_3e = tcscf_energy_3e_naive + endif + + print *, " E_TC = ", etc_tot + print *, " E_1e = ", etc_1e + print *, " E_2e = ", etc_2e + print *, " E_3e = ", etc_3e + + return +end subroutine main + +! --- + diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f index 04c4f92d..2f2d803f 100644 --- a/src/tc_scf/tc_scf.irp.f +++ b/src/tc_scf/tc_scf.irp.f @@ -13,8 +13,13 @@ program tc_scf print *, ' starting ...' my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 + + !my_n_pt_r_grid = 30 + !my_n_pt_a_grid = 50 + + my_n_pt_r_grid = 100 + my_n_pt_a_grid = 170 + ! my_n_pt_r_grid = 10 ! small grid for quick debug ! my_n_pt_a_grid = 26 ! small grid for quick debug touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid diff --git a/src/tc_scf/tcscf_energy_naive.irp.f b/src/tc_scf/tcscf_energy_naive.irp.f new file mode 100644 index 00000000..82bb8799 --- /dev/null +++ b/src/tc_scf/tcscf_energy_naive.irp.f @@ -0,0 +1,80 @@ + +! --- + +BEGIN_PROVIDER [double precision, tcscf_energy_3e_naive] + + implicit none + integer :: i, j, k + integer :: neu, ned, D(elec_num) + integer :: ii, jj, kk + integer :: si, sj, sk + double precision :: I_ijk, I_jki, I_kij, I_jik, I_ikj, I_kji + double precision :: I_tot + + PROVIDE mo_l_coef mo_r_coef + + neu = elec_alpha_num + ned = elec_beta_num + if (neu > 0) D(1:neu) = [(2*i-1, i = 1, neu)] + if (ned > 0) D(neu+1:neu+ned) = [(2*i, i = 1, ned)] + + !print*, "D = " + !do i = 1, elec_num + ! ii = (D(i) - 1) / 2 + 1 + ! si = mod(D(i), 2) + ! print*, i, D(i), ii, si + !enddo + + tcscf_energy_3e_naive = 0.d0 + + do i = 1, elec_num - 2 + ii = (D(i) - 1) / 2 + 1 + si = mod(D(i), 2) + + do j = i + 1, elec_num - 1 + jj = (D(j) - 1) / 2 + 1 + sj = mod(D(j), 2) + + do k = j + 1, elec_num + kk = (D(k) - 1) / 2 + 1 + sk = mod(D(k), 2) + + call give_integrals_3_body_bi_ort(ii, jj, kk, ii, jj, kk, I_ijk) + I_tot = I_ijk + + if(sj==si .and. sk==sj) then + call give_integrals_3_body_bi_ort(ii, jj, kk, jj, kk, ii, I_jki) + I_tot += I_jki + endif + + if(sk==si .and. si==sj) then + call give_integrals_3_body_bi_ort(ii, jj, kk, kk, ii, jj, I_kij) + I_tot += I_kij + endif + + if(sj==si) then + call give_integrals_3_body_bi_ort(ii, jj, kk, jj, ii, kk, I_jik) + I_tot -= I_jik + endif + + if(sk==sj) then + call give_integrals_3_body_bi_ort(ii, jj, kk, ii, kk, jj, I_ikj) + I_tot -= I_ikj + endif + + if(sk==si) then + call give_integrals_3_body_bi_ort(ii, jj, kk, kk, jj, ii, I_kji) + I_tot -= I_kji + endif + + tcscf_energy_3e_naive += I_tot + enddo + enddo + enddo + + tcscf_energy_3e_naive = -tcscf_energy_3e_naive + +END_PROVIDER + +! --- + diff --git a/src/tc_scf/three_e_energy_bi_ortho.irp.f b/src/tc_scf/three_e_energy_bi_ortho.irp.f index 64212da8..0c9ebbd7 100644 --- a/src/tc_scf/three_e_energy_bi_ortho.irp.f +++ b/src/tc_scf/three_e_energy_bi_ortho.irp.f @@ -1,24 +1,32 @@ -subroutine contrib_3e_diag_sss(i,j,k,integral) - implicit none - integer, intent(in) :: i,j,k - BEGIN_DOC - ! returns the pure same spin contribution to diagonal matrix element of 3e term - END_DOC - double precision, intent(out) :: integral - double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int - call give_integrals_3_body_bi_ort(i, k, j, i, k, j, direct_int )!!! < i k j | i k j > - call give_integrals_3_body_bi_ort(i, k, j, j, i, k, c_3_int) ! < i k j | j i k > - call give_integrals_3_body_bi_ort(i, k, j, k, j, i, c_minus_3_int)! < i k j | k j i > - integral = direct_int + c_3_int + c_minus_3_int - ! negative terms :: exchange contrib - call give_integrals_3_body_bi_ort(i, k, j, j, k, i, exch_13_int)!!! < i k j | j k i > : E_13 - call give_integrals_3_body_bi_ort(i, k, j, i, j, k, exch_23_int)!!! < i k j | i j k > : E_23 - call give_integrals_3_body_bi_ort(i, k, j, k, i, j, exch_12_int)!!! < i k j | k i j > : E_12 - integral += - exch_13_int - exch_23_int - exch_12_int - integral = -integral +subroutine contrib_3e_diag_sss(i, j, k, integral) + + BEGIN_DOC + ! returns the pure same spin contribution to diagonal matrix element of 3e term + END_DOC + + implicit none + integer, intent(in) :: i, j, k + double precision, intent(out) :: integral + double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int + + call give_integrals_3_body_bi_ort(i, k, j, i, k, j, direct_int )!!! < i k j | i k j > + call give_integrals_3_body_bi_ort(i, k, j, j, i, k, c_3_int) ! < i k j | j i k > + call give_integrals_3_body_bi_ort(i, k, j, k, j, i, c_minus_3_int)! < i k j | k j i > + integral = direct_int + c_3_int + c_minus_3_int + + ! negative terms :: exchange contrib + call give_integrals_3_body_bi_ort(i, k, j, j, k, i, exch_13_int)!!! < i k j | j k i > : E_13 + call give_integrals_3_body_bi_ort(i, k, j, i, j, k, exch_23_int)!!! < i k j | i j k > : E_23 + call give_integrals_3_body_bi_ort(i, k, j, k, i, j, exch_12_int)!!! < i k j | k i j > : E_12 + + integral += - exch_13_int - exch_23_int - exch_12_int + integral = -integral + end +! --- + subroutine contrib_3e_diag_soo(i,j,k,integral) implicit none integer, intent(in) :: i,j,k @@ -51,23 +59,30 @@ subroutine give_aaa_contrib_bis(integral_aaa) end +! --- + subroutine give_aaa_contrib(integral_aaa) - implicit none - double precision, intent(out) :: integral_aaa - double precision :: integral - integer :: i,j,k - integral_aaa = 0.d0 - do i = 1, elec_alpha_num - do j = 1, elec_alpha_num - do k = 1, elec_alpha_num - call contrib_3e_diag_sss(i,j,k,integral) - integral_aaa += integral - enddo + + implicit none + integer :: i, j, k + double precision :: integral + double precision, intent(out) :: integral_aaa + + integral_aaa = 0.d0 + do i = 1, elec_alpha_num + do j = 1, elec_alpha_num + do k = 1, elec_alpha_num + call contrib_3e_diag_sss(i, j, k, integral) + integral_aaa += integral + enddo + enddo enddo - enddo - integral_aaa *= 1.d0/6.d0 + integral_aaa *= 1.d0/6.d0 + + return end +! --- subroutine give_aab_contrib(integral_aab) implicit none From d4b0312414ecf2fc38a672ef62c09e6b44bd6047 Mon Sep 17 00:00:00 2001 From: Emmanuel Giner LCT Date: Thu, 29 Jun 2023 18:31:48 +0200 Subject: [PATCH 194/337] removed UGLY NON ASCII CHARACTERS --- src/mo_optimization/debug_gradient_list_opt.irp.f | 4 ++-- src/mo_optimization/debug_gradient_opt.irp.f | 4 ++-- src/mo_optimization/debug_hessian_list_opt.irp.f | 2 +- src/mo_optimization/debug_hessian_opt.irp.f | 2 +- src/mol_properties/multi_s_dipole_moment.irp.f | 2 +- src/mol_properties/print_properties.irp.f | 12 ++++++------ src/tc_bi_ortho/dressing_vectors_lr.irp.f | 8 ++++---- src/tc_bi_ortho/h_biortho.irp.f | 6 +++--- src/tc_bi_ortho/slater_tc_3e_slow.irp.f | 4 ++-- src/tc_bi_ortho/slater_tc_opt.irp.f | 6 +++--- src/tc_bi_ortho/slater_tc_opt_double.irp.f | 4 ++-- src/tc_bi_ortho/slater_tc_opt_single.irp.f | 4 ++-- src/tc_bi_ortho/slater_tc_slow.irp.f | 8 ++++---- src/tc_bi_ortho/tc_hmat.irp.f | 2 +- src/utils_trust_region/algo_trust.irp.f | 2 +- 15 files changed, 35 insertions(+), 35 deletions(-) diff --git a/src/mo_optimization/debug_gradient_list_opt.irp.f b/src/mo_optimization/debug_gradient_list_opt.irp.f index 867e0105..32cea90c 100644 --- a/src/mo_optimization/debug_gradient_list_opt.irp.f +++ b/src/mo_optimization/debug_gradient_list_opt.irp.f @@ -35,14 +35,14 @@ program debug_gradient_list ! Definition of n n = m*(m-1)/2 - PROVIDE mo_two_e_integrals_in_map ! Vérifier pour suppression + PROVIDE mo_two_e_integrals_in_map ! Verifier pour suppression ! Allocation allocate(v_grad(n), v_grad2(n)) ! Calculation - call diagonalize_ci ! Vérifier pour suppression + call diagonalize_ci ! Verifier pour suppression ! Gradient call gradient_list_opt(n,m,list_act,v_grad,max_elem,norm) diff --git a/src/mo_optimization/debug_gradient_opt.irp.f b/src/mo_optimization/debug_gradient_opt.irp.f index 8aeec18f..529a02b6 100644 --- a/src/mo_optimization/debug_gradient_opt.irp.f +++ b/src/mo_optimization/debug_gradient_opt.irp.f @@ -34,14 +34,14 @@ program debug_gradient ! Definition of n n = mo_num*(mo_num-1)/2 - PROVIDE mo_two_e_integrals_in_map ! Vérifier pour suppression + PROVIDE mo_two_e_integrals_in_map ! Check for suppression ! Allocation allocate(v_grad(n), v_grad2(n)) ! Calculation - call diagonalize_ci ! Vérifier pour suppression + call diagonalize_ci ! Gradient call first_gradient_opt(n,v_grad) diff --git a/src/mo_optimization/debug_hessian_list_opt.irp.f b/src/mo_optimization/debug_hessian_list_opt.irp.f index d1aa79c4..65a7bcf3 100644 --- a/src/mo_optimization/debug_hessian_list_opt.irp.f +++ b/src/mo_optimization/debug_hessian_list_opt.irp.f @@ -49,7 +49,7 @@ program debug_hessian_list_opt ! Definition of n n = m*(m-1)/2 - PROVIDE mo_two_e_integrals_in_map ! Vérifier pour suppression + PROVIDE mo_two_e_integrals_in_map ! Hessian if (optimization_method == 'full') then diff --git a/src/mo_optimization/debug_hessian_opt.irp.f b/src/mo_optimization/debug_hessian_opt.irp.f index 6d22cc01..684a0da5 100644 --- a/src/mo_optimization/debug_hessian_opt.irp.f +++ b/src/mo_optimization/debug_hessian_opt.irp.f @@ -40,7 +40,7 @@ program debug_hessian ! Definition of n n = mo_num*(mo_num-1)/2 - PROVIDE mo_two_e_integrals_in_map ! Vérifier pour suppression + PROVIDE mo_two_e_integrals_in_map ! Allocation allocate(H(n,n),H2(n,n)) diff --git a/src/mol_properties/multi_s_dipole_moment.irp.f b/src/mol_properties/multi_s_dipole_moment.irp.f index d5e62799..913ae2f3 100644 --- a/src/mol_properties/multi_s_dipole_moment.irp.f +++ b/src/mol_properties/multi_s_dipole_moment.irp.f @@ -13,7 +13,7 @@ ! matrix as a expectation value ! \begin{align*} ! <\Psi_n|x| \Psi_m > = \sum_p \gamma_{pp}^{nm} < \phi_p | x | \phi_p > -! + \sum_{pq, p \neq q} \gamma_{pq}^{nm} < \phi_p | x | \phi_q > + < \Psi_m | \sum_A Z_A R_A | \Psi_n > +! + \sum_{pq, p \neq q} \gamma_{pq}^{nm} < \phi_p |x | \phi_q > + < \Psi_m | \sum_A Z_A R_A | \Psi_n > ! \end{align*} diff --git a/src/mol_properties/print_properties.irp.f b/src/mol_properties/print_properties.irp.f index 4c0a9f38..af413a88 100644 --- a/src/mol_properties/print_properties.irp.f +++ b/src/mol_properties/print_properties.irp.f @@ -13,7 +13,7 @@ subroutine print_dipole_moment implicit none BEGIN_DOC - ! To print the dipole moment ||<\Psi_i|µ|\Psi_i>|| and its x,y,z components + ! To print the dipole moment ||<\Psi_i|\mu|\Psi_i>|| and its x,y,z components END_DOC integer :: istate @@ -33,7 +33,7 @@ subroutine print_dipole_moment print*,'# Dipoles:' print*,'==============================================' print*,' Dipole moments (au)' - print*,' State X Y Z ||µ||' + print*,' State X Y Z ||MU||' do istate = 1, N_states write(*,'(I5,4(F12.6))') (istate-1), d_x(istate), d_y(istate), d_z(istate), d(istate) @@ -42,7 +42,7 @@ subroutine print_dipole_moment ! Debye print*,'' print*,' Dipole moments (D)' - print*,' State X Y Z ||µ||' + print*,' State X Y Z ||MU||' do istate = 1, N_states write(*,'(I5,4(F12.6))') (istate-1), d_x(istate)*au_to_D, d_y(istate)*au_to_D, d_z(istate)*au_to_D, d(istate)*au_to_D @@ -70,7 +70,7 @@ subroutine print_transition_dipole_moment implicit none BEGIN_DOC - ! To print the transition dipole moment ||<\Psi_i|µ|\Psi_j>|| and its components along x, y and z + ! To print the transition dipole moment ||<\Psi_i|\mu|\Psi_j>|| and its components along x, y and z END_DOC integer :: istate,jstate, n_states_print @@ -84,7 +84,7 @@ subroutine print_transition_dipole_moment print*,'# Transition dipoles:' print*,'==============================================' print*,' Transition dipole moments (au)' - write(*,'(A89)') ' # Transition X Y Z ||µ|| Dip. str. Osc. str.' + write(*,'(A89)') ' # Transition X Y Z ||MU|| Dip. str. Osc. str.' if (print_all_transitions) then n_states_print = N_states @@ -106,7 +106,7 @@ subroutine print_transition_dipole_moment print*,'' print*,' Transition dipole moments (D)' - write(*,'(A89)') ' # Transition X Y Z ||µ|| Dip. str. Osc. str.' + write(*,'(A89)') ' # Transition X Y Z ||MU|| Dip. str. Osc. str.' do jstate = 1, n_states_print !N_states do istate = jstate + 1, N_states diff --git a/src/tc_bi_ortho/dressing_vectors_lr.irp.f b/src/tc_bi_ortho/dressing_vectors_lr.irp.f index ed663f02..0aff9980 100644 --- a/src/tc_bi_ortho/dressing_vectors_lr.irp.f +++ b/src/tc_bi_ortho/dressing_vectors_lr.irp.f @@ -38,9 +38,9 @@ subroutine get_delta_bitc_right(psidet, psicoef, ndet, Nint, delta) do i = 1, ndet do j = 1, ndet - ! < I | Htilde | J > + ! < I |Htilde | J > call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) - ! < I | H | J > + ! < I |H | J > call hmat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot) delta_mat = htc_tot - h_tot @@ -87,7 +87,7 @@ subroutine get_htc_bitc_right(psidet, psicoef, ndet, Nint, delta) do i = 1, ndet do j = 1, ndet - ! < I | Htilde | J > + ! < I |Htilde | J > call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot) delta(i) = delta(i) + psicoef(j) * htc_tot @@ -141,7 +141,7 @@ subroutine get_h_bitc_right(psidet, psicoef, ndet, Nint, delta) do i = 1, ndet do j = 1, ndet - ! < I | H | J > + ! < I |H | J > call hmat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot) delta(i) = delta(i) + psicoef(j) * h_tot diff --git a/src/tc_bi_ortho/h_biortho.irp.f b/src/tc_bi_ortho/h_biortho.irp.f index 492e1282..bc06b88d 100644 --- a/src/tc_bi_ortho/h_biortho.irp.f +++ b/src/tc_bi_ortho/h_biortho.irp.f @@ -5,7 +5,7 @@ subroutine hmat_bi_ortho(key_j, key_i, Nint, hmono, htwoe, htot) BEGIN_DOC ! - ! < key_j | H | key_i > where | key_j > is developed on the LEFT basis and | key_i > is developed on the RIGHT basis + ! < key_j |H | key_i > where | key_j > is developed on the LEFT basis and | key_i > is developed on the RIGHT basis ! END_DOC @@ -111,7 +111,7 @@ subroutine single_hmat_bi_ortho(Nint, key_j, key_i, hmono, htwoe) BEGIN_DOC ! - ! < key_j | H | key_i > for single excitation + ! < key_j |H | key_i > for single excitation ! END_DOC @@ -185,7 +185,7 @@ subroutine double_hmat_bi_ortho(Nint, key_j, key_i, hmono, htwoe) BEGIN_DOC ! - ! < key_j | H | key_i> for double excitation + ! < key_j |H | key_i> for double excitation ! END_DOC diff --git a/src/tc_bi_ortho/slater_tc_3e_slow.irp.f b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f index 49977f37..76539cb3 100644 --- a/src/tc_bi_ortho/slater_tc_3e_slow.irp.f +++ b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f @@ -93,7 +93,7 @@ end subroutine single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) BEGIN_DOC - ! for single excitation ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS + ! for single excitation ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS !! !! WARNING !! ! @@ -188,7 +188,7 @@ end subroutine double_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) BEGIN_DOC - ! for double excitation ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS + ! for double excitation ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS !! !! WARNING !! ! diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index ceefbfb8..933479e9 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -36,7 +36,7 @@ subroutine htilde_mu_mat_opt_bi_ortho_tot(key_j, key_i, Nint, htot) implicit none BEGIN_DOC ! - ! where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis + ! where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis !! ! Returns the total matrix element !! WARNING !! @@ -55,7 +55,7 @@ end subroutine htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot) BEGIN_DOC ! - ! where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis + ! where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis !! ! Returns the detail of the matrix element in terms of single, two and three electron contribution. !! WARNING !! @@ -100,7 +100,7 @@ subroutine htilde_mu_mat_opt_bi_ortho_no_3e(key_j, key_i, Nint, htot) BEGIN_DOC ! - ! where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis + ! where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis !! ! Returns the detail of the matrix element WITHOUT ANY CONTRIBUTION FROM THE THREE ELECTRON TERMS !! WARNING !! diff --git a/src/tc_bi_ortho/slater_tc_opt_double.irp.f b/src/tc_bi_ortho/slater_tc_opt_double.irp.f index 12bbbec0..bd59583f 100644 --- a/src/tc_bi_ortho/slater_tc_opt_double.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_double.irp.f @@ -2,7 +2,7 @@ subroutine double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) BEGIN_DOC - ! for double excitation ONLY FOR ONE- AND TWO-BODY TERMS + ! for double excitation ONLY FOR ONE- AND TWO-BODY TERMS !! !! WARNING !! ! @@ -430,7 +430,7 @@ end subroutine double_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_j, key_i, htot) BEGIN_DOC - ! for double excitation ONLY FOR ONE- AND TWO-BODY TERMS + ! for double excitation ONLY FOR ONE- AND TWO-BODY TERMS !! !! WARNING !! ! diff --git a/src/tc_bi_ortho/slater_tc_opt_single.irp.f b/src/tc_bi_ortho/slater_tc_opt_single.irp.f index 9719a6e7..ddcd1e66 100644 --- a/src/tc_bi_ortho/slater_tc_opt_single.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_single.irp.f @@ -2,7 +2,7 @@ subroutine single_htilde_mu_mat_fock_bi_ortho (Nint, key_j, key_i, hmono, htwoe, hthree, htot) BEGIN_DOC - ! for single excitation ONLY FOR ONE- AND TWO-BODY TERMS + ! for single excitation ONLY FOR ONE- AND TWO-BODY TERMS !! !! WARNING !! ! @@ -464,7 +464,7 @@ END_PROVIDER subroutine single_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_j, key_i, htot) BEGIN_DOC - ! for single excitation ONLY FOR ONE- AND TWO-BODY TERMS + ! for single excitation ONLY FOR ONE- AND TWO-BODY TERMS !! !! WARNING !! ! diff --git a/src/tc_bi_ortho/slater_tc_slow.irp.f b/src/tc_bi_ortho/slater_tc_slow.irp.f index 1833d20f..0e0b5812 100644 --- a/src/tc_bi_ortho/slater_tc_slow.irp.f +++ b/src/tc_bi_ortho/slater_tc_slow.irp.f @@ -4,7 +4,7 @@ subroutine htilde_mu_mat_bi_ortho_tot_slow(key_j, key_i, Nint, htot) BEGIN_DOC - ! where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis + ! where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis !! !! WARNING !! ! @@ -35,7 +35,7 @@ subroutine htilde_mu_mat_bi_ortho_slow(key_j, key_i, Nint, hmono, htwoe, hthree, BEGIN_DOC ! - ! where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis + ! where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis !! ! Returns the detail of the matrix element in terms of single, two and three electron contribution. !! WARNING !! @@ -191,7 +191,7 @@ end subroutine double_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) BEGIN_DOC - ! for double excitation ONLY FOR ONE- AND TWO-BODY TERMS + ! for double excitation ONLY FOR ONE- AND TWO-BODY TERMS !! !! WARNING !! ! @@ -258,7 +258,7 @@ end subroutine single_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) BEGIN_DOC - ! for single excitation ONLY FOR ONE- AND TWO-BODY TERMS + ! for single excitation ONLY FOR ONE- AND TWO-BODY TERMS !! !! WARNING !! ! diff --git a/src/tc_bi_ortho/tc_hmat.irp.f b/src/tc_bi_ortho/tc_hmat.irp.f index ec072531..e2c6f010 100644 --- a/src/tc_bi_ortho/tc_hmat.irp.f +++ b/src/tc_bi_ortho/tc_hmat.irp.f @@ -21,7 +21,7 @@ !$OMP SHARED (N_det, psi_det, N_int,htilde_matrix_elmt_bi_ortho) do i = 1, N_det do j = 1, N_det - ! < J | Htilde | I > + ! < J |Htilde | I > call htilde_mu_mat_opt_bi_ortho_tot(psi_det(1,1,j), psi_det(1,1,i), N_int, htot) htilde_matrix_elmt_bi_ortho(j,i) = htot diff --git a/src/utils_trust_region/algo_trust.irp.f b/src/utils_trust_region/algo_trust.irp.f index 933d8eff..09d76a40 100644 --- a/src/utils_trust_region/algo_trust.irp.f +++ b/src/utils_trust_region/algo_trust.irp.f @@ -77,7 +77,7 @@ ! ! Criterion -> step accepted or rejected ! call trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, criterion_model,rho,cancel_step) ! -! ! ### TODO ### +! !### TODO ### ! !if (cancel_step) then ! ! Cancel the previous step (mo_coef = prev_mos if you keep them...) ! !endif From e83a1f962ebd60d8be004e3c555ae195f70404f9 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 29 Jun 2023 18:52:31 +0200 Subject: [PATCH 195/337] Cholesky flag in CCSD --- src/utils_cc/mo_integrals_cc.irp.f | 139 ++++++++++++++++++++--------- 1 file changed, 96 insertions(+), 43 deletions(-) diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f index 485d7002..dafcf7af 100644 --- a/src/utils_cc/mo_integrals_cc.irp.f +++ b/src/utils_cc/mo_integrals_cc.irp.f @@ -47,33 +47,61 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v) integer :: i1,i2,i3,i4,idx1,idx2,idx3,idx4,k - double precision, allocatable :: buffer(:,:,:) - !$OMP PARALLEL & - !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_num,cholesky_mo_transp,cholesky_ao_num) & - !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k,buffer)& - !$OMP DEFAULT(NONE) - allocate(buffer(mo_num,mo_num,mo_num)) - !$OMP DO - do i4 = 1, n4 - idx4 = list4(i4) - call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, & - cholesky_mo_transp, cholesky_ao_num, & - cholesky_mo_transp(1,1,idx4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num) - do i2 = 1, n2 - idx2 = list2(i2) - do i3 = 1, n3 - idx3 = list3(i3) - do i1 = 1, n1 - idx1 = list1(i1) - v(i1,i2,i3,i4) = buffer(idx1,idx3,idx2) + if (do_ao_cholesky) then + double precision, allocatable :: buffer(:,:,:) + !$OMP PARALLEL & + !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_num,cholesky_mo_transp,cholesky_ao_num) & + !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k,buffer)& + !$OMP DEFAULT(NONE) + allocate(buffer(mo_num,mo_num,mo_num)) + !$OMP DO + do i4 = 1, n4 + idx4 = list4(i4) + call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, & + cholesky_mo_transp, cholesky_ao_num, & + cholesky_mo_transp(1,1,idx4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num) + do i2 = 1, n2 + idx2 = list2(i2) + do i3 = 1, n3 + idx3 = list3(i3) + do i1 = 1, n1 + idx1 = list1(i1) + v(i1,i2,i3,i4) = buffer(idx1,idx3,idx2) + enddo enddo enddo enddo - enddo - !$OMP END DO - deallocate(buffer) - !$OMP END PARALLEL + !$OMP END DO + deallocate(buffer) + !$OMP END PARALLEL + else + double precision :: get_two_e_integral + + PROVIDE mo_two_e_integrals_in_map + + !$OMP PARALLEL & + !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_integrals_map) & + !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4)& + !$OMP DEFAULT(NONE) + !$OMP DO collapse(3) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + idx4 = list4(i4) + idx3 = list3(i3) + idx2 = list2(i2) + idx1 = list1(i1) + v(i1,i2,i3,i4) = get_two_e_integral(idx1,idx2,idx3,idx4,mo_integrals_map) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + endif end @@ -81,29 +109,54 @@ end BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)] implicit none - integer :: i1,i2,i3,i4,k - double precision, allocatable :: buffer(:,:,:) - !$OMP PARALLEL & - !$OMP SHARED(cc_space_v,mo_num,cholesky_mo_transp,cholesky_ao_num) & - !$OMP PRIVATE(i1,i2,i3,i4,k,buffer)& - !$OMP DEFAULT(NONE) - allocate(buffer(mo_num,mo_num,mo_num)) - !$OMP DO - do i4 = 1, mo_num - call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, & - cholesky_mo_transp, cholesky_ao_num, & - cholesky_mo_transp(1,1,i4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num) - do i2 = 1, mo_num - do i3 = 1, mo_num - do i1 = 1, mo_num - cc_space_v(i1,i2,i3,i4) = buffer(i1,i3,i2) + if (do_ao_cholesky) then + integer :: i1,i2,i3,i4 + double precision, allocatable :: buffer(:,:,:) + !$OMP PARALLEL & + !$OMP SHARED(cc_space_v,mo_num,cholesky_mo_transp,cholesky_ao_num) & + !$OMP PRIVATE(i1,i2,i3,i4,k,buffer)& + !$OMP DEFAULT(NONE) + allocate(buffer(mo_num,mo_num,mo_num)) + !$OMP DO + do i4 = 1, mo_num + call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, & + cholesky_mo_transp, cholesky_ao_num, & + cholesky_mo_transp(1,1,i4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num) + do i2 = 1, mo_num + do i3 = 1, mo_num + do i1 = 1, mo_num + cc_space_v(i1,i2,i3,i4) = buffer(i1,i3,i2) + enddo enddo enddo enddo - enddo - !$OMP END DO - deallocate(buffer) - !$OMP END PARALLEL + !$OMP END DO + deallocate(buffer) + !$OMP END PARALLEL + else + integer :: i,j,k,l + double precision :: get_two_e_integral + + PROVIDE mo_two_e_integrals_in_map + + !$OMP PARALLEL & + !$OMP SHARED(cc_space_v,mo_num,mo_integrals_map) & + !$OMP PRIVATE(i,j,k,l) & + !$OMP DEFAULT(NONE) + + !$OMP DO collapse(3) + do l = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do i = 1, mo_num + cc_space_v(i,j,k,l) = get_two_e_integral(i,j,k,l,mo_integrals_map) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + endif END_PROVIDER From 727c70c0fa6a000489fed5f6ce93ea48c5bbabb7 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Sun, 2 Jul 2023 00:19:17 +0200 Subject: [PATCH 196/337] \int dr2 phi_i(r2) phi_j(r2) u(r12) v_1b(r2) --- src/ao_many_one_e_ints/ao_erf_gauss.irp.f | 142 ++++++++++++++++- .../grad_lapl_jmu_modif.irp.f | 144 +++++++++++++++++- src/ao_tc_eff_map/fit_j.irp.f | 29 ++-- src/non_h_ints_mu/tc_integ.irp.f | 8 +- src/tc_bi_ortho/print_tc_energy.irp.f | 8 +- src/utils/integration.irp.f | 2 +- 6 files changed, 306 insertions(+), 27 deletions(-) diff --git a/src/ao_many_one_e_ints/ao_erf_gauss.irp.f b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f index 3d7fbe50..b1077161 100644 --- a/src/ao_many_one_e_ints/ao_erf_gauss.irp.f +++ b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f @@ -212,9 +212,7 @@ subroutine NAI_pol_x_mult_erf_ao(i_ao, j_ao, mu_in, C_center, ints) ! Computes the following integral : ! ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. - ! ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. - ! ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. ! END_DOC @@ -279,9 +277,7 @@ subroutine NAI_pol_x_mult_erf_ao_v0(i_ao, j_ao, mu_in, C_center, LD_C, ints, LD_ ! Computes the following integral : ! ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. - ! ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. - ! ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. ! END_DOC @@ -1111,3 +1107,141 @@ end ! --- +subroutine NAI_pol_x2_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_center, ints) + + BEGIN_DOC + ! + ! Computes the following integral : + ! + ! $\int_{-\infty}^{infty} dr x^2 * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! $\int_{-\infty}^{infty} dr y^2 * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! $\int_{-\infty}^{infty} dr z^2 * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + END_DOC + + include 'utils/constants.include.F' + + implicit none + + integer, intent(in) :: i_ao, j_ao + double precision, intent(in) :: beta, B_center(3), mu_in, C_center(3) + double precision, intent(out) :: ints(3) + + integer :: i, j, power_Ai(3), power_Aj(3), n_pt_in, m + integer :: power_A1(3), power_A2(3) + double precision :: Ai_center(3), Aj_center(3), alphai, alphaj, coef, coefi + double precision :: integral0, integral1, integral2 + + double precision, external :: NAI_pol_mult_erf_with1s + + ASSERT(beta .ge. 0.d0) + if(beta .lt. 1d-10) then + call NAI_pol_x2_mult_erf_ao(i_ao, j_ao, mu_in, C_center, ints) + return + endif + + ints = 0.d0 + + power_Ai(1:3) = ao_power(i_ao,1:3) + power_Aj(1:3) = ao_power(j_ao,1:3) + + Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3) + Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3) + + n_pt_in = n_pt_max_integrals + + do i = 1, ao_prim_num(i_ao) + alphai = ao_expo_ordered_transp (i,i_ao) + coefi = ao_coef_normalized_ordered_transp(i,i_ao) + + do m = 1, 3 + + power_A1 = power_Ai + power_A1(m) += 1 + + power_A2 = power_Ai + power_A2(m) += 2 + + do j = 1, ao_prim_num(j_ao) + alphaj = ao_expo_ordered_transp (j,j_ao) + coef = coefi * ao_coef_normalized_ordered_transp(j,j_ao) + + integral0 = NAI_pol_mult_erf_with1s(Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj, beta, B_center, C_center, n_pt_in, mu_in) + integral1 = NAI_pol_mult_erf_with1s(Ai_center, Aj_center, power_A1, power_Aj, alphai, alphaj, beta, B_center, C_center, n_pt_in, mu_in) + integral2 = NAI_pol_mult_erf_with1s(Ai_center, Aj_center, power_A2, power_Aj, alphai, alphaj, beta, B_center, C_center, n_pt_in, mu_in) + + ints(m) += coef * (integral2 + Ai_center(m) * (2.d0*integral1 + Ai_center(m)*integral0)) + enddo + enddo + enddo + +end subroutine NAI_pol_x2_mult_erf_ao_with1s + +! --- + +subroutine NAI_pol_x2_mult_erf_ao(i_ao, j_ao, mu_in, C_center, ints) + + BEGIN_DOC + ! + ! Computes the following integral : + ! + ! $\int_{-\infty}^{infty} dr x^2 * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! $\int_{-\infty}^{infty} dr y^2 * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! $\int_{-\infty}^{infty} dr z^2 * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + END_DOC + + include 'utils/constants.include.F' + + implicit none + + integer, intent(in) :: i_ao, j_ao + double precision, intent(in) :: mu_in, C_center(3) + double precision, intent(out) :: ints(3) + + integer :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in, m + integer :: power_A1(3), power_A2(3) + double precision :: A_center(3), B_center(3), alpha, beta, coef + double precision :: integral0, integral1, integral2 + + double precision :: NAI_pol_mult_erf + + ints = 0.d0 + + num_A = ao_nucl(i_ao) + power_A(1:3) = ao_power(i_ao,1:3) + A_center(1:3) = nucl_coord(num_A,1:3) + num_B = ao_nucl(j_ao) + power_B(1:3) = ao_power(j_ao,1:3) + B_center(1:3) = nucl_coord(num_B,1:3) + + n_pt_in = n_pt_max_integrals + + do i = 1, ao_prim_num(i_ao) + alpha = ao_expo_ordered_transp(i,i_ao) + + do m = 1, 3 + + power_A1 = power_A + power_A1(m) += 1 + + power_A2 = power_A + power_A2(m) += 2 + + do j = 1, ao_prim_num(j_ao) + beta = ao_expo_ordered_transp(j,j_ao) + coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao) + + integral0 = NAI_pol_mult_erf(A_center, B_center, power_A , power_B, alpha, beta, C_center, n_pt_in, mu_in) + integral1 = NAI_pol_mult_erf(A_center, B_center, power_A1, power_B, alpha, beta, C_center, n_pt_in, mu_in) + integral2 = NAI_pol_mult_erf(A_center, B_center, power_A2, power_B, alpha, beta, C_center, n_pt_in, mu_in) + + ints(m) += coef * (integral2 + A_center(m) * (2.d0*integral1 + A_center(m)*integral0)) + enddo + enddo + enddo + +end subroutine NAI_pol_x2_mult_erf_ao + +! --- + diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f index 25bb2f8b..9d34e1d7 100644 --- a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f +++ b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f @@ -195,7 +195,6 @@ END_PROVIDER ! --- -! TODO analytically BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_final_grid)] BEGIN_DOC @@ -217,6 +216,8 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_ call wall_time(wall0) provide mu_erf final_grid_points j1b_pen + PROVIDE ng_fit_jast expo_gauss_j_mu_x coef_gauss_j_mu_x + PROVIDE List_all_comb_b2_size List_all_comb_b2_coef List_all_comb_b2_expo List_all_comb_b2_cent v_ij_u_cst_mu_j1b = 0.d0 @@ -229,7 +230,6 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_ !$OMP List_all_comb_b2_coef, List_all_comb_b2_expo, & !$OMP List_all_comb_b2_cent, v_ij_u_cst_mu_j1b) !$OMP DO - !do ipoint = 1, 10 do ipoint = 1, n_points_final_grid r(1) = final_grid_points(1,ipoint) r(2) = final_grid_points(2,ipoint) @@ -240,10 +240,13 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_ tmp = 0.d0 do i_fit = 1, ng_fit_jast - expo_fit = expo_gauss_j_mu_x(i_fit) coef_fit = coef_gauss_j_mu_x(i_fit) +! do i_fit = ng_fit_jast, ng_fit_jast +! expo_fit = 5.0d0 +! coef_fit = 1.0d0 + ! --- coef = List_all_comb_b2_coef (1) @@ -253,7 +256,6 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_ B_center(3) = List_all_comb_b2_cent(3,1) int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) -! if(dabs(int_fit*coef) .lt. 1d-12) cycle tmp += coef * coef_fit * int_fit @@ -298,3 +300,137 @@ END_PROVIDER ! --- +BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_an, (ao_num, ao_num, n_points_final_grid)] + + BEGIN_DOC + ! + ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2) u(mu, r12) + ! + END_DOC + + include 'constants.include.F' + + implicit none + integer :: i, j, ipoint, i_1s + double precision :: r(3), r1_2 + double precision :: int_c1, int_e1, int_o + double precision :: int_c2(3), int_e2(3) + double precision :: int_c3(3), int_e3(3) + double precision :: coef, beta, B_center(3) + double precision :: tmp, ct + double precision :: wall0, wall1 + + double precision, external :: overlap_gauss_r12_ao_with1s + double precision, external :: NAI_pol_mult_erf_ao_with1s + + print*, ' providing v_ij_u_cst_mu_j1b_an ...' + call wall_time(wall0) + + provide mu_erf final_grid_points j1b_pen + PROVIDE ng_fit_jast expo_gauss_j_mu_x coef_gauss_j_mu_x + PROVIDE List_all_comb_b2_size List_all_comb_b2_coef List_all_comb_b2_expo List_all_comb_b2_cent + + ct = inv_sq_pi_2 / mu_erf + + v_ij_u_cst_mu_j1b_an = 0.d0 + + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, & + !$OMP r1_2, tmp, int_c1, int_e1, int_o, int_c2, & + !$OMP int_e2, int_c3, int_e3) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b2_size, & + !$OMP final_grid_points, mu_erf, ct, & + !$OMP expo_gauss_j_mu_x, coef_gauss_j_mu_x, & + !$OMP List_all_comb_b2_coef, List_all_comb_b2_expo, & + !$OMP List_all_comb_b2_cent, v_ij_u_cst_mu_j1b_an) + !$OMP DO + do ipoint = 1, n_points_final_grid + + r(1) = final_grid_points(1,ipoint) + r(2) = final_grid_points(2,ipoint) + r(3) = final_grid_points(3,ipoint) + r1_2 = 0.5d0 * (r(1)*r(1) + r(2)*r(2) + r(3)*r(3)) + + do i = 1, ao_num + do j = i, ao_num + + ! --- + + coef = List_all_comb_b2_coef (1) + beta = List_all_comb_b2_expo (1) + B_center(1) = List_all_comb_b2_cent(1,1) + B_center(2) = List_all_comb_b2_cent(2,1) + B_center(3) = List_all_comb_b2_cent(3,1) + + int_c1 = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, 1.d+9, r) + int_e1 = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r) + + call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, 1.d+9, r, int_c2) + call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, int_e2) + + call NAI_pol_x2_mult_erf_ao_with1s(i, j, beta, B_center, 1.d+9, r, int_c3) + call NAI_pol_x2_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, int_e3) + + int_o = overlap_gauss_r12_ao_with1s(B_center, beta, r, mu_erf*mu_erf, i, j) + + tmp = coef & + * ( r1_2 * (int_c1 - int_e1) & + - r(1) * (int_c2(1) - int_e2(1)) - r(2) * (int_c2(2) - int_e2(2)) - r(3) * (int_c2(3) - int_e2(3)) & + + 0.5d0 * (int_c3(1) + int_c3(2) + int_c3(3) - int_e3(1) - int_e3(2) - int_e3(3)) & + - ct * int_o & + ) + + ! --- + + do i_1s = 2, List_all_comb_b2_size + + coef = List_all_comb_b2_coef (i_1s) + beta = List_all_comb_b2_expo (i_1s) + B_center(1) = List_all_comb_b2_cent(1,i_1s) + B_center(2) = List_all_comb_b2_cent(2,i_1s) + B_center(3) = List_all_comb_b2_cent(3,i_1s) + + int_c1 = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, 1.d+9, r) + int_e1 = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r) + + call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, 1.d+9, r, int_c2) + call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, int_e2) + + call NAI_pol_x2_mult_erf_ao_with1s(i, j, beta, B_center, 1.d+9, r, int_c3) + call NAI_pol_x2_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, int_e3) + + int_o = overlap_gauss_r12_ao_with1s(B_center, beta, r, mu_erf*mu_erf, i, j) + + tmp = tmp + coef & + * ( r1_2 * (int_c1 - int_e1) & + - r(1) * (int_c2(1) - int_e2(1)) - r(2) * (int_c2(2) - int_e2(2)) - r(3) * (int_c2(3) - int_e2(3)) & + + 0.5d0 * (int_c3(1) + int_c3(2) + int_c3(3) - int_e3(1) - int_e3(2) - int_e3(3)) & + - ct * int_o & + ) + + enddo + + ! --- + + v_ij_u_cst_mu_j1b_an(j,i,ipoint) = tmp + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + do ipoint = 1, n_points_final_grid + do i = 2, ao_num + do j = 1, i-1 + v_ij_u_cst_mu_j1b_an(j,i,ipoint) = v_ij_u_cst_mu_j1b_an(i,j,ipoint) + enddo + enddo + enddo + + call wall_time(wall1) + print*, ' wall time for v_ij_u_cst_mu_j1b_an', wall1 - wall0 + +END_PROVIDER + +! --- + diff --git a/src/ao_tc_eff_map/fit_j.irp.f b/src/ao_tc_eff_map/fit_j.irp.f index 4730d003..0fc3da2f 100644 --- a/src/ao_tc_eff_map/fit_j.irp.f +++ b/src/ao_tc_eff_map/fit_j.irp.f @@ -36,16 +36,25 @@ END_PROVIDER END_PROVIDER BEGIN_PROVIDER [ double precision, expo_j_xmu, (n_fit_1_erf_x) ] - implicit none - BEGIN_DOC - ! F(x) = x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2) is fitted with a gaussian and a Slater - ! - ! \approx - 1/sqrt(pi) * exp(-alpha * x ) exp(-beta * x**2) - ! - ! where alpha = expo_j_xmu(1) and beta = expo_j_xmu(2) - END_DOC - expo_j_xmu(1) = 1.7477d0 - expo_j_xmu(2) = 0.668662d0 + + BEGIN_DOC + ! F(x) = x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2) is fitted with a gaussian and a Slater + ! + ! \approx - 1/sqrt(pi) * exp(-alpha * x ) exp(-beta * x**2) + ! + ! where alpha = expo_j_xmu(1) and beta = expo_j_xmu(2) + END_DOC + + implicit none + + !expo_j_xmu(1) = 1.7477d0 + !expo_j_xmu(2) = 0.668662d0 + + !expo_j_xmu(1) = 1.74766377595541d0 + !expo_j_xmu(2) = 0.668719925486403d0 + + expo_j_xmu(1) = 1.74770446934522d0 + expo_j_xmu(2) = 0.668659706559979d0 END_PROVIDER diff --git a/src/non_h_ints_mu/tc_integ.irp.f b/src/non_h_ints_mu/tc_integ.irp.f index b2c0df31..ce65b203 100644 --- a/src/non_h_ints_mu/tc_integ.irp.f +++ b/src/non_h_ints_mu/tc_integ.irp.f @@ -70,14 +70,14 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 4)) then - PROVIDE v_1b_grad v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b x_v_ij_erf_rk_cst_mu_j1b + PROVIDE v_1b_grad v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b_an x_v_ij_erf_rk_cst_mu_j1b int2_grad1_u12_ao = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (ipoint, i, j, x, y, z, tmp0, tmp1, tmp2, tmp_x, tmp_y, tmp_z) & !$OMP SHARED ( ao_num, n_points_final_grid, final_grid_points, v_1b, v_1b_grad & - !$OMP , v_ij_erf_rk_cst_mu_j1b, v_ij_u_cst_mu_j1b, x_v_ij_erf_rk_cst_mu_j1b, int2_grad1_u12_ao) + !$OMP , v_ij_erf_rk_cst_mu_j1b, v_ij_u_cst_mu_j1b_an, x_v_ij_erf_rk_cst_mu_j1b, int2_grad1_u12_ao) !$OMP DO SCHEDULE (static) do ipoint = 1, n_points_final_grid x = final_grid_points(1,ipoint) @@ -90,7 +90,7 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f do j = 1, ao_num do i = 1, ao_num tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) - tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint) + tmp2 = v_ij_u_cst_mu_j1b_an(i,j,ipoint) int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z @@ -100,7 +100,7 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f !$OMP END DO !$OMP END PARALLEL - FREE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b x_v_ij_erf_rk_cst_mu_j1b + FREE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b_an x_v_ij_erf_rk_cst_mu_j1b elseif(j1b_type .ge. 100) then diff --git a/src/tc_bi_ortho/print_tc_energy.irp.f b/src/tc_bi_ortho/print_tc_energy.irp.f index 2f667a48..522f4cd7 100644 --- a/src/tc_bi_ortho/print_tc_energy.irp.f +++ b/src/tc_bi_ortho/print_tc_energy.irp.f @@ -9,11 +9,11 @@ program print_tc_energy print *, 'Hello world' my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 + !my_n_pt_r_grid = 30 + !my_n_pt_a_grid = 50 - !my_n_pt_r_grid = 100 - !my_n_pt_a_grid = 170 + my_n_pt_r_grid = 100 + my_n_pt_a_grid = 170 !my_n_pt_r_grid = 100 !my_n_pt_a_grid = 266 diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f index b548b18a..72029c73 100644 --- a/src/utils/integration.irp.f +++ b/src/utils/integration.irp.f @@ -418,7 +418,7 @@ subroutine gaussian_product_x(a,xa,b,xb,k,p,xp) xab = xa-xb ab = ab*p_inv k = ab*xab*xab - if (k > 40.d0) then + if (k > 400.d0) then k=0.d0 return endif From 87b05b798b9f1154d6395dca46f72dfe5c6d8767 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Sun, 2 Jul 2023 15:29:21 +0200 Subject: [PATCH 197/337] clean in PROVIDERS --- src/bi_ort_ints/semi_num_ints_mo.irp.f | 4 +- src/bi_ort_ints/three_body_ijm.irp.f | 10 ++ src/fci_tc_bi/diagonalize_ci.irp.f | 137 ++++++++++++---------- src/fci_tc_bi/fci_tc_bi_ortho.irp.f | 22 ++-- src/non_h_ints_mu/tc_integ.irp.f | 3 +- src/tc_bi_ortho/slater_tc_3e_slow.irp.f | 128 ++++++++++---------- src/tc_bi_ortho/slater_tc_opt.irp.f | 5 +- src/tc_bi_ortho/slater_tc_opt_diag.irp.f | 106 ++++++++++------- src/tc_bi_ortho/slater_tc_slow.irp.f | 11 +- src/tc_bi_ortho/symmetrized_3_e_int.irp.f | 46 ++++++-- 10 files changed, 277 insertions(+), 195 deletions(-) diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f index 355fa38f..51f0cba4 100644 --- a/src/bi_ort_ints/semi_num_ints_mo.irp.f +++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f @@ -140,8 +140,6 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3, enddo enddo - FREE int2_grad1_u12_ao - endif call wall_time(wall1) @@ -225,6 +223,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_t, (n_points_final_grid, 3, implicit none integer :: i, j, ipoint + PROVIDE int2_grad1_u12_ao + do ipoint = 1, n_points_final_grid do i = 1, ao_num do j = 1, ao_num diff --git a/src/bi_ort_ints/three_body_ijm.irp.f b/src/bi_ort_ints/three_body_ijm.irp.f index ae100fb5..5de33a76 100644 --- a/src/bi_ort_ints/three_body_ijm.irp.f +++ b/src/bi_ort_ints/three_body_ijm.irp.f @@ -17,6 +17,8 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_direct_bi_ort, (mo_num, mo_num, integer :: i, j, m double precision :: integral, wall1, wall0 + PROVIDE mo_l_coef mo_r_coef + three_e_3_idx_direct_bi_ort = 0.d0 print *, ' Providing the three_e_3_idx_direct_bi_ort ...' call wall_time(wall0) @@ -125,6 +127,8 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_cycle_2_bi_ort, (mo_num, mo_num integer :: i, j, m double precision :: integral, wall1, wall0 + PROVIDE mo_l_coef mo_r_coef + three_e_3_idx_cycle_2_bi_ort = 0.d0 print *, ' Providing the three_e_3_idx_cycle_2_bi_ort ...' call wall_time(wall0) @@ -179,6 +183,8 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_exch23_bi_ort, (mo_num, mo_num, integer :: i, j, m double precision :: integral, wall1, wall0 + PROVIDE mo_l_coef mo_r_coef + three_e_3_idx_exch23_bi_ort = 0.d0 print*,'Providing the three_e_3_idx_exch23_bi_ort ...' call wall_time(wall0) @@ -233,6 +239,8 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_exch13_bi_ort, (mo_num, mo_num, integer :: i,j,m double precision :: integral, wall1, wall0 + PROVIDE mo_l_coef mo_r_coef + three_e_3_idx_exch13_bi_ort = 0.d0 print *, ' Providing the three_e_3_idx_exch13_bi_ort ...' call wall_time(wall0) @@ -287,6 +295,8 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_exch12_bi_ort, (mo_num, mo_num, integer :: i, j, m double precision :: integral, wall1, wall0 + PROVIDE mo_l_coef mo_r_coef + three_e_3_idx_exch12_bi_ort = 0.d0 print *, ' Providing the three_e_3_idx_exch12_bi_ort ...' call wall_time(wall0) diff --git a/src/fci_tc_bi/diagonalize_ci.irp.f b/src/fci_tc_bi/diagonalize_ci.irp.f index df753449..8b59785a 100644 --- a/src/fci_tc_bi/diagonalize_ci.irp.f +++ b/src/fci_tc_bi/diagonalize_ci.irp.f @@ -1,21 +1,27 @@ -subroutine diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) +! --- + +subroutine diagonalize_CI_tc_bi_ortho(ndet, E_tc, norm, pt2_data, print_pt2) + + BEGIN_DOC + ! Replace the coefficients of the CI states by the coefficients of the + ! eigenstates of the CI matrix + END_DOC + use selection_types implicit none - integer, intent(inout) :: ndet ! number of determinants from before - double precision, intent(inout) :: E_tc,norm ! E and norm from previous wave function - type(pt2_type) , intent(in) :: pt2_data ! PT2 from previous wave function - logical, intent(in) :: print_pt2 - BEGIN_DOC -! Replace the coefficients of the CI states by the coefficients of the -! eigenstates of the CI matrix - END_DOC - integer :: i,j - double precision :: pt2_tmp,pt1_norm,rpt2_tmp,abs_pt2 - pt2_tmp = pt2_data % pt2(1) - abs_pt2 = pt2_data % variance(1) - pt1_norm = pt2_data % overlap(1,1) - rpt2_tmp = pt2_tmp/(1.d0 + pt1_norm) + integer, intent(inout) :: ndet ! number of determinants from before + double precision, intent(inout) :: E_tc, norm ! E and norm from previous wave function + type(pt2_type) , intent(in) :: pt2_data ! PT2 from previous wave function + logical, intent(in) :: print_pt2 + integer :: i, j + double precision :: pt2_tmp, pt1_norm, rpt2_tmp, abs_pt2 + + pt2_tmp = pt2_data % pt2(1) + abs_pt2 = pt2_data % variance(1) + pt1_norm = pt2_data % overlap(1,1) + rpt2_tmp = pt2_tmp/(1.d0 + pt1_norm) + print*,'*****' print*,'New wave function information' print*,'N_det tc = ',N_det @@ -23,53 +29,61 @@ subroutine diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) print*,'eigval_right_tc = ',eigval_right_tc_bi_orth(1) print*,'Ndet, E_tc = ',N_det,eigval_right_tc_bi_orth(1) print*,'*****' - if(print_pt2)then - print*,'*****' - print*,'previous wave function info' - print*,'norm(before) = ',norm - print*,'E(before) = ',E_tc - print*,'PT1 norm = ',dsqrt(pt1_norm) - print*,'PT2 = ',pt2_tmp - print*,'rPT2 = ',rpt2_tmp - print*,'|PT2| = ',abs_pt2 - print*,'Positive PT2 = ',(pt2_tmp + abs_pt2)*0.5d0 - print*,'Negative PT2 = ',(pt2_tmp - abs_pt2)*0.5d0 - print*,'E(before) + PT2 = ',E_tc + pt2_tmp/norm - print*,'E(before) +rPT2 = ',E_tc + rpt2_tmp/norm - write(*,'(A28,X,I10,X,100(F16.8,X))')'Ndet,E,E+PT2,E+RPT2,|PT2|=',ndet,E_tc ,E_tc + pt2_tmp/norm,E_tc + rpt2_tmp/norm,abs_pt2 - print*,'*****' + + if(print_pt2) then + print*,'*****' + print*,'previous wave function info' + print*,'norm(before) = ',norm + print*,'E(before) = ',E_tc + print*,'PT1 norm = ',dsqrt(pt1_norm) + print*,'PT2 = ',pt2_tmp + print*,'rPT2 = ',rpt2_tmp + print*,'|PT2| = ',abs_pt2 + print*,'Positive PT2 = ',(pt2_tmp + abs_pt2)*0.5d0 + print*,'Negative PT2 = ',(pt2_tmp - abs_pt2)*0.5d0 + print*,'E(before) + PT2 = ',E_tc + pt2_tmp/norm + print*,'E(before) +rPT2 = ',E_tc + rpt2_tmp/norm + write(*,'(A28,X,I10,X,100(F16.8,X))')'Ndet,E,E+PT2,E+RPT2,|PT2|=',ndet,E_tc ,E_tc + pt2_tmp/norm,E_tc + rpt2_tmp/norm,abs_pt2 + print*,'*****' endif + psi_energy(1:N_states) = eigval_right_tc_bi_orth(1:N_states) - nuclear_repulsion psi_s2(1:N_states) = s2_eigvec_tc_bi_orth(1:N_states) - E_tc = eigval_right_tc_bi_orth(1) - norm = norm_ground_left_right_bi_orth - ndet = N_det - do j=1,N_states - do i=1,N_det + E_tc = eigval_right_tc_bi_orth(1) + norm = norm_ground_left_right_bi_orth + ndet = N_det + do j = 1, N_states + do i = 1, N_det psi_l_coef_bi_ortho(i,j) = leigvec_tc_bi_orth(i,j) psi_r_coef_bi_ortho(i,j) = reigvec_tc_bi_orth(i,j) - psi_coef(i,j) = dabs(psi_l_coef_bi_ortho(i,j) * psi_r_coef_bi_ortho(i,j)) + psi_coef(i,j) = dabs(psi_l_coef_bi_ortho(i,j) * psi_r_coef_bi_ortho(i,j)) enddo enddo SOFT_TOUCH eigval_left_tc_bi_orth eigval_right_tc_bi_orth leigvec_tc_bi_orth reigvec_tc_bi_orth norm_ground_left_right_bi_orth SOFT_TOUCH psi_l_coef_bi_ortho psi_r_coef_bi_ortho psi_coef psi_energy psi_s2 - call save_tc_bi_ortho_wavefunction + call save_tc_bi_ortho_wavefunction() + end -subroutine print_CI_dressed(ndet, E_tc,norm,pt2_data,print_pt2) +! --- + +subroutine print_CI_dressed(ndet, E_tc, norm, pt2_data, print_pt2) + + BEGIN_DOC + ! Replace the coefficients of the CI states by the coefficients of the + ! eigenstates of the CI matrix + END_DOC + use selection_types implicit none - integer, intent(inout) :: ndet ! number of determinants from before - double precision, intent(inout) :: E_tc,norm ! E and norm from previous wave function - type(pt2_type) , intent(in) :: pt2_data ! PT2 from previous wave function - logical, intent(in) :: print_pt2 - BEGIN_DOC -! Replace the coefficients of the CI states by the coefficients of the -! eigenstates of the CI matrix - END_DOC - integer :: i,j + integer, intent(inout) :: ndet ! number of determinants from before + double precision, intent(inout) :: E_tc,norm ! E and norm from previous wave function + type(pt2_type) , intent(in) :: pt2_data ! PT2 from previous wave function + logical, intent(in) :: print_pt2 + integer :: i, j + print*,'*****' print*,'New wave function information' print*,'N_det tc = ',N_det @@ -77,22 +91,25 @@ subroutine print_CI_dressed(ndet, E_tc,norm,pt2_data,print_pt2) print*,'eigval_right_tc = ',eigval_right_tc_bi_orth(1) print*,'Ndet, E_tc = ',N_det,eigval_right_tc_bi_orth(1) print*,'*****' - if(print_pt2)then - print*,'*****' - print*,'previous wave function info' - print*,'norm(before) = ',norm - print*,'E(before) = ',E_tc - print*,'PT1 norm = ',dsqrt(pt2_data % overlap(1,1)) - print*,'E(before) + PT2 = ',E_tc + (pt2_data % pt2(1))/norm - print*,'PT2 = ',pt2_data % pt2(1) - print*,'Ndet, E_tc, E+PT2 = ',ndet,E_tc,E_tc + (pt2_data % pt2(1))/norm,dsqrt(pt2_data % overlap(1,1)) - print*,'*****' + + if(print_pt2) then + print*,'*****' + print*,'previous wave function info' + print*,'norm(before) = ',norm + print*,'E(before) = ',E_tc + print*,'PT1 norm = ',dsqrt(pt2_data % overlap(1,1)) + print*,'E(before) + PT2 = ',E_tc + (pt2_data % pt2(1))/norm + print*,'PT2 = ',pt2_data % pt2(1) + print*,'Ndet, E_tc, E+PT2 = ',ndet,E_tc,E_tc + (pt2_data % pt2(1))/norm,dsqrt(pt2_data % overlap(1,1)) + print*,'*****' endif + E_tc = eigval_right_tc_bi_orth(1) norm = norm_ground_left_right_bi_orth ndet = N_det - do j=1,N_states - do i=1,N_det + + do j = 1, N_states + do i = 1, N_det psi_coef(i,j) = reigvec_tc_bi_orth(i,j) enddo enddo @@ -100,3 +117,5 @@ subroutine print_CI_dressed(ndet, E_tc,norm,pt2_data,print_pt2) end +! --- + diff --git a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f index 3e6f229b..cf3361bf 100644 --- a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f +++ b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f @@ -41,16 +41,20 @@ program fci my_n_pt_r_grid = 30 my_n_pt_a_grid = 50 touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + pruning = -1.d0 touch pruning + ! pt2_relative_error = 0.01d0 ! touch pt2_relative_error - call run_cipsi_tc + + call run_cipsi_tc() end +! --- -subroutine run_cipsi_tc +subroutine run_cipsi_tc() implicit none @@ -59,19 +63,20 @@ subroutine run_cipsi_tc PROVIDE psi_det psi_coef mo_bi_ortho_tc_two_e mo_bi_ortho_tc_one_e if(elec_alpha_num+elec_beta_num .ge. 3) then - if(three_body_h_tc)then + if(three_body_h_tc) then call provide_all_three_ints_bi_ortho() endif endif + FREE int2_grad1_u12_ao FREE int2_grad1_u12_bimo_transp int2_grad1_u12_ao_transp write(json_unit,json_array_open_fmt) 'fci_tc' if (do_pt2) then - call run_stochastic_cipsi + call run_stochastic_cipsi() else - call run_cipsi + call run_cipsi() endif write(json_unit,json_dict_uopen_fmt) @@ -83,12 +88,13 @@ subroutine run_cipsi_tc PROVIDE mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e pt2_min_parallel_tasks - if(elec_alpha_num+elec_beta_num.ge.3)then - if(three_body_h_tc)then - call provide_all_three_ints_bi_ortho + if(elec_alpha_num+elec_beta_num .ge. 3) then + if(three_body_h_tc) then + call provide_all_three_ints_bi_ortho() endif endif + FREE int2_grad1_u12_ao FREE int2_grad1_u12_bimo_transp int2_grad1_u12_ao_transp call run_slave_cipsi diff --git a/src/non_h_ints_mu/tc_integ.irp.f b/src/non_h_ints_mu/tc_integ.irp.f index ce65b203..d569b25c 100644 --- a/src/non_h_ints_mu/tc_integ.irp.f +++ b/src/non_h_ints_mu/tc_integ.irp.f @@ -70,7 +70,8 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 4)) then - PROVIDE v_1b_grad v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b_an x_v_ij_erf_rk_cst_mu_j1b + PROVIDE v_1b_grad + PROVIDE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b_an x_v_ij_erf_rk_cst_mu_j1b int2_grad1_u12_ao = 0.d0 !$OMP PARALLEL & diff --git a/src/tc_bi_ortho/slater_tc_3e_slow.irp.f b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f index 49977f37..e6e78534 100644 --- a/src/tc_bi_ortho/slater_tc_3e_slow.irp.f +++ b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f @@ -14,81 +14,89 @@ subroutine diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree) integer :: occ(Nint*bit_kind_size,2) integer :: Ne(2),i,j,ii,jj,ispin,jspin,m,mm integer(bit_kind) :: key_i_core(Nint,2) - double precision :: direct_int, exchange_int - double precision :: sym_3_e_int_from_6_idx_tensor - double precision :: three_e_diag_parrallel_spin + double precision :: direct_int, exchange_int, ref + double precision, external :: sym_3_e_int_from_6_idx_tensor + double precision, external :: three_e_diag_parrallel_spin - if(core_tc_op)then - do i = 1, Nint - key_i_core(i,1) = xor(key_i(i,1),core_bitmask(i,1)) - key_i_core(i,2) = xor(key_i(i,2),core_bitmask(i,2)) - enddo - call bitstring_to_list_ab(key_i_core,occ,Ne,Nint) + PROVIDE mo_l_coef mo_r_coef + + if(core_tc_op) then + do i = 1, Nint + key_i_core(i,1) = xor(key_i(i,1),core_bitmask(i,1)) + key_i_core(i,2) = xor(key_i(i,2),core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core,occ,Ne,Nint) else - call bitstring_to_list_ab(key_i,occ,Ne,Nint) + call bitstring_to_list_ab(key_i,occ,Ne,Nint) endif + hthree = 0.d0 - if(Ne(1)+Ne(2).ge.3)then -!! ! alpha/alpha/beta three-body - do i = 1, Ne(1) - ii = occ(i,1) - do j = i+1, Ne(1) - jj = occ(j,1) - do m = 1, Ne(2) - mm = occ(m,2) -! direct_int = three_body_ints_bi_ort(mm,jj,ii,mm,jj,ii) USES THE 6-IDX TENSOR -! exchange_int = three_body_ints_bi_ort(mm,jj,ii,mm,ii,jj) USES THE 6-IDX TENSOR - direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) ! USES 3-IDX TENSOR - exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii) ! USES 3-IDX TENSOR - hthree += direct_int - exchange_int - enddo - enddo - enddo + if((Ne(1)+Ne(2)) .ge. 3) then - ! beta/beta/alpha three-body - do i = 1, Ne(2) - ii = occ(i,2) - do j = i+1, Ne(2) - jj = occ(j,2) - do m = 1, Ne(1) - mm = occ(m,1) - direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) - exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii) - hthree += direct_int - exchange_int - enddo + ! alpha/alpha/beta three-body + do i = 1, Ne(1) + ii = occ(i,1) + do j = i+1, Ne(1) + jj = occ(j,1) + do m = 1, Ne(2) + mm = occ(m,2) + !direct_int = three_body_ints_bi_ort(mm,jj,ii,mm,jj,ii) !uses the 6-idx tensor + !exchange_int = three_body_ints_bi_ort(mm,jj,ii,mm,ii,jj) !uses the 6-idx tensor + direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) !uses 3-idx tensor + exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii) !uses 3-idx tensor + hthree += direct_int - exchange_int + enddo + enddo enddo - enddo - ! alpha/alpha/alpha three-body - do i = 1, Ne(1) - ii = occ(i,1) ! 1 - do j = i+1, Ne(1) - jj = occ(j,1) ! 2 - do m = j+1, Ne(1) - mm = occ(m,1) ! 3 -! ref = sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) USES THE 6 IDX TENSOR - hthree += three_e_diag_parrallel_spin(mm,jj,ii) ! USES ONLY 3-IDX TENSORS - enddo + ! beta/beta/alpha three-body + do i = 1, Ne(2) + ii = occ(i,2) + do j = i+1, Ne(2) + jj = occ(j,2) + do m = 1, Ne(1) + mm = occ(m,1) + !direct_int = three_body_ints_bi_ort(mm,jj,ii,mm,jj,ii) !uses the 6-idx tensor + !exchange_int = three_body_ints_bi_ort(mm,jj,ii,mm,ii,jj) !uses the 6-idx tensor + direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) + exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii) + hthree += direct_int - exchange_int + enddo + enddo enddo - enddo - ! beta/beta/beta three-body - do i = 1, Ne(2) - ii = occ(i,2) ! 1 - do j = i+1, Ne(2) - jj = occ(j,2) ! 2 - do m = j+1, Ne(2) - mm = occ(m,2) ! 3 -! ref = sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) USES THE 6 IDX TENSOR - hthree += three_e_diag_parrallel_spin(mm,jj,ii) ! USES ONLY 3-IDX TENSORS - enddo + ! alpha/alpha/alpha three-body + do i = 1, Ne(1) + ii = occ(i,1) ! 1 + do j = i+1, Ne(1) + jj = occ(j,1) ! 2 + do m = j+1, Ne(1) + mm = occ(m,1) ! 3 + !hthree += sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) !uses the 6 idx tensor + hthree += three_e_diag_parrallel_spin(mm,jj,ii) !uses only 3-idx tensors + enddo + enddo enddo - enddo + + ! beta/beta/beta three-body + do i = 1, Ne(2) + ii = occ(i,2) ! 1 + do j = i+1, Ne(2) + jj = occ(j,2) ! 2 + do m = j+1, Ne(2) + mm = occ(m,2) ! 3 + !hthree += sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) !uses the 6 idx tensor + hthree += three_e_diag_parrallel_spin(mm,jj,ii) !uses only 3-idx tensors + enddo + enddo + enddo + endif end +! --- subroutine single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index ceefbfb8..d1c1d45d 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -1,3 +1,4 @@ + ! --- subroutine provide_all_three_ints_bi_ortho() @@ -25,9 +26,9 @@ subroutine provide_all_three_ints_bi_ortho() PROVIDE normal_two_body_bi_orth endif - endif + endif - return + return end ! --- diff --git a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f index 531f0141..5c156a4d 100644 --- a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f @@ -1,32 +1,48 @@ + +! --- + BEGIN_PROVIDER [ double precision, ref_tc_energy_tot] &BEGIN_PROVIDER [ double precision, ref_tc_energy_1e] &BEGIN_PROVIDER [ double precision, ref_tc_energy_2e] &BEGIN_PROVIDER [ double precision, ref_tc_energy_3e] - implicit none - BEGIN_DOC -! Various component of the TC energy for the reference "HF" Slater determinant - END_DOC - double precision :: hmono, htwoe, htot, hthree - call diag_htilde_mu_mat_bi_ortho_slow(N_int,HF_bitmask , hmono, htwoe, htot) - ref_tc_energy_1e = hmono - ref_tc_energy_2e = htwoe - if(three_body_h_tc)then - call diag_htilde_three_body_ints_bi_ort_slow(N_int, HF_bitmask, hthree) - ref_tc_energy_3e = hthree - else - ref_tc_energy_3e = 0.d0 - endif - ref_tc_energy_tot = ref_tc_energy_1e + ref_tc_energy_2e + ref_tc_energy_3e + nuclear_repulsion - END_PROVIDER + + BEGIN_DOC + ! Various component of the TC energy for the reference "HF" Slater determinant + END_DOC + + implicit none + double precision :: hmono, htwoe, htot, hthree + + PROVIDE mo_l_coef mo_r_coef + + call diag_htilde_mu_mat_bi_ortho_slow(N_int, HF_bitmask, hmono, htwoe, htot) + + ref_tc_energy_1e = hmono + ref_tc_energy_2e = htwoe + + if(three_body_h_tc) then + call diag_htilde_three_body_ints_bi_ort_slow(N_int, HF_bitmask, hthree) + ref_tc_energy_3e = hthree + else + ref_tc_energy_3e = 0.d0 + endif + + ref_tc_energy_tot = ref_tc_energy_1e + ref_tc_energy_2e + ref_tc_energy_3e + nuclear_repulsion + +END_PROVIDER + +! --- subroutine diag_htilde_mu_mat_fock_bi_ortho(Nint, det_in, hmono, htwoe, hthree, htot) - implicit none + BEGIN_DOC ! Computes $\langle i|H|i \rangle$. END_DOC - integer,intent(in) :: Nint - integer(bit_kind),intent(in) :: det_in(Nint,2) - double precision, intent(out) :: hmono,htwoe,htot,hthree + + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: det_in(Nint,2) + double precision, intent(out) :: hmono, htwoe, htot, hthree integer(bit_kind) :: hole(Nint,2) integer(bit_kind) :: particle(Nint,2) @@ -40,7 +56,6 @@ subroutine diag_htilde_mu_mat_fock_bi_ortho(Nint, det_in, hmono, htwoe, hthree, ASSERT (sum(popcnt(det_in(:,1))) == elec_alpha_num) ASSERT (sum(popcnt(det_in(:,2))) == elec_beta_num) - nexc(1) = 0 nexc(2) = 0 do i=1,Nint @@ -55,15 +70,15 @@ subroutine diag_htilde_mu_mat_fock_bi_ortho(Nint, det_in, hmono, htwoe, hthree, enddo if (nexc(1)+nexc(2) == 0) then - hmono = ref_tc_energy_1e - htwoe = ref_tc_energy_2e - hthree= ref_tc_energy_3e - htot = ref_tc_energy_tot + hmono = ref_tc_energy_1e + htwoe = ref_tc_energy_2e + hthree = ref_tc_energy_3e + htot = ref_tc_energy_tot return endif !call debug_det(det_in,Nint) - integer :: tmp(2) + integer :: tmp(2) !DIR$ FORCEINLINE call bitstring_to_list_ab(particle, occ_particle, tmp, Nint) ASSERT (tmp(1) == nexc(1)) ! Number of particles alpha @@ -73,27 +88,31 @@ subroutine diag_htilde_mu_mat_fock_bi_ortho(Nint, det_in, hmono, htwoe, hthree, ASSERT (tmp(1) == nexc(1)) ! Number of holes alpha ASSERT (tmp(2) == nexc(2)) ! Number of holes beta - + hmono = ref_tc_energy_1e + htwoe = ref_tc_energy_2e + hthree = ref_tc_energy_3e + det_tmp = ref_bitmask - hmono = ref_tc_energy_1e - htwoe = ref_tc_energy_2e - hthree= ref_tc_energy_3e - do ispin=1,2 + + do ispin = 1, 2 na = elec_num_tab(ispin) nb = elec_num_tab(iand(ispin,1)+1) - do i=1,nexc(ispin) + do i = 1, nexc(ispin) !DIR$ FORCEINLINE - call ac_tc_operator( occ_particle(i,ispin), ispin, det_tmp, hmono,htwoe,hthree, Nint,na,nb) + call ac_tc_operator(occ_particle(i,ispin), ispin, det_tmp, hmono, htwoe, hthree, Nint, na, nb) !DIR$ FORCEINLINE - call a_tc_operator ( occ_hole (i,ispin), ispin, det_tmp, hmono,htwoe,hthree, Nint,na,nb) + call a_tc_operator (occ_hole (i,ispin), ispin, det_tmp, hmono, htwoe, hthree, Nint, na, nb) enddo enddo - htot = hmono+htwoe+hthree+nuclear_repulsion + + htot = hmono + htwoe + hthree + nuclear_repulsion + end +! --- + subroutine ac_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb) - use bitmasks - implicit none + BEGIN_DOC ! Routine that computes one- and two-body energy corresponding ! @@ -105,6 +124,9 @@ subroutine ac_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb) ! ! and the quantities hmono,htwoe,hthree are INCREMENTED END_DOC + + use bitmasks + implicit none integer, intent(in) :: iorb, ispin, Nint integer, intent(inout) :: na, nb integer(bit_kind), intent(inout) :: key(Nint,2) @@ -460,14 +482,16 @@ subroutine a_tc_operator_no_3e(iorb,ispin,key,hmono,htwoe,Nint,na,nb) hmono = hmono - mo_bi_ortho_tc_one_e(iorb,iorb) ! Same spin - do i=1,na - htwoe= htwoe- mo_bi_ortho_tc_two_e_jj_anti(occ(i,ispin),iorb) + do i = 1, na + htwoe = htwoe- mo_bi_ortho_tc_two_e_jj_anti(occ(i,ispin),iorb) enddo ! Opposite spin - do i=1,nb - htwoe= htwoe- mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb) + do i = 1, nb + htwoe = htwoe- mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb) enddo end +! --- + diff --git a/src/tc_bi_ortho/slater_tc_slow.irp.f b/src/tc_bi_ortho/slater_tc_slow.irp.f index 301cfe0f..083d4ba3 100644 --- a/src/tc_bi_ortho/slater_tc_slow.irp.f +++ b/src/tc_bi_ortho/slater_tc_slow.irp.f @@ -106,6 +106,8 @@ subroutine diag_htilde_mu_mat_bi_ortho_slow(Nint, key_i, hmono, htwoe, htot) double precision :: get_mo_two_e_integral_tc_int integer(bit_kind) :: key_i_core(Nint,2) + PROVIDE mo_bi_ortho_tc_two_e + ! PROVIDE mo_two_e_integrals_tc_int_in_map mo_bi_ortho_tc_two_e ! ! PROVIDE mo_integrals_erf_map core_energy nuclear_repulsion core_bitmask @@ -135,15 +137,6 @@ subroutine diag_htilde_mu_mat_bi_ortho_slow(Nint, key_i, hmono, htwoe, htot) ii = occ(i,ispin) hmono += mo_bi_ortho_tc_one_e(ii,ii) -! if(j1b_gauss .eq. 1) then -! print*,'j1b not implemented for bi ortho TC' -! print*,'stopping ....' -! stop -! !hmono += mo_j1b_gauss_hermI (ii,ii) & -! ! + mo_j1b_gauss_hermII (ii,ii) & -! ! + mo_j1b_gauss_nonherm(ii,ii) -! endif - ! if(core_tc_op)then ! print*,'core_tc_op not already taken into account for bi ortho' ! print*,'stopping ...' diff --git a/src/tc_bi_ortho/symmetrized_3_e_int.irp.f b/src/tc_bi_ortho/symmetrized_3_e_int.irp.f index 3180d946..66360c36 100644 --- a/src/tc_bi_ortho/symmetrized_3_e_int.irp.f +++ b/src/tc_bi_ortho/symmetrized_3_e_int.irp.f @@ -41,14 +41,21 @@ subroutine give_all_perm_for_three_e(n,l,k,m,j,i,idx_list,phase) end -double precision function sym_3_e_int_from_6_idx_tensor(n,l,k,m,j,i) - implicit none - BEGIN_DOC - ! returns all good combinations of permutations of integrals with the good signs - ! - ! for a given (k^dagger l^dagger n^dagger m j i) when all indices have the same spins - END_DOC - integer, intent(in) :: n,l,k,m,j,i +! --- + +double precision function sym_3_e_int_from_6_idx_tensor(n, l, k, m, j, i) + + BEGIN_DOC + ! returns all good combinations of permutations of integrals with the good signs + ! + ! for a given (k^dagger l^dagger n^dagger m j i) when all indices have the same spins + END_DOC + + implicit none + integer, intent(in) :: n, l, k, m, j, i + + PROVIDE mo_l_coef mo_r_coef + sym_3_e_int_from_6_idx_tensor = three_body_ints_bi_ort(n,l,k,m,j,i) & ! direct + three_body_ints_bi_ort(n,l,k,j,i,m) & ! 1st cyclic permutation + three_body_ints_bi_ort(n,l,k,i,m,j) & ! 2nd cyclic permutation @@ -56,8 +63,11 @@ double precision function sym_3_e_int_from_6_idx_tensor(n,l,k,m,j,i) - three_body_ints_bi_ort(n,l,k,i,j,m) & ! elec 2 is kept fixed - three_body_ints_bi_ort(n,l,k,m,i,j) ! elec 3 is kept fixed + return end +! --- + double precision function direct_sym_3_e_int(n,l,k,m,j,i) implicit none BEGIN_DOC @@ -83,15 +93,25 @@ double precision function direct_sym_3_e_int(n,l,k,m,j,i) end -double precision function three_e_diag_parrallel_spin(m,j,i) - implicit none - integer, intent(in) :: i,j,m +! --- + +double precision function three_e_diag_parrallel_spin(m, j, i) + + implicit none + integer, intent(in) :: i, j, m + + PROVIDE mo_l_coef mo_r_coef + three_e_diag_parrallel_spin = three_e_3_idx_direct_bi_ort(m,j,i) ! direct three_e_diag_parrallel_spin += three_e_3_idx_cycle_1_bi_ort(m,j,i) + three_e_3_idx_cycle_2_bi_ort(m,j,i) & ! two cyclic permutations - - three_e_3_idx_exch23_bi_ort(m,j,i) - three_e_3_idx_exch13_bi_ort(m,j,i) & ! two first exchange - - three_e_3_idx_exch12_bi_ort(m,j,i) ! last exchange + - three_e_3_idx_exch23_bi_ort (m,j,i) - three_e_3_idx_exch13_bi_ort(m,j,i) & ! two first exchange + - three_e_3_idx_exch12_bi_ort (m,j,i) ! last exchange + + return end +! --- + double precision function three_e_single_parrallel_spin(m,j,k,i) implicit none integer, intent(in) :: i,k,j,m From b39daa53c4d64fec77837e1a11279345f944abfe Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Sun, 2 Jul 2023 21:49:25 +0200 Subject: [PATCH 198/337] added keywords for r1 grid --- .../grad_lapl_jmu_modif.irp.f | 16 ++- src/bi_ort_ints/bi_ort_ints.irp.f | 7 +- src/bi_ort_ints/total_twoe_pot.irp.f | 62 +++++----- src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f | 86 +++++++------- src/fci_tc_bi/diagonalize_ci.irp.f | 4 +- src/fci_tc_bi/fci_tc_bi_ortho.irp.f | 27 +++-- src/fci_tc_bi/pt2_tc.irp.f | 31 +++-- src/non_h_ints_mu/debug_fit.irp.f | 10 +- src/non_h_ints_mu/debug_integ_jmu_modif.irp.f | 19 ++-- src/non_h_ints_mu/new_grad_tc.irp.f | 106 ------------------ src/non_h_ints_mu/test_non_h_ints.irp.f | 17 ++- src/tc_bi_ortho/compute_deltamu_right.irp.f | 5 +- src/tc_bi_ortho/print_tc_dump.irp.f | 7 +- src/tc_bi_ortho/print_tc_energy.irp.f | 16 +-- src/tc_bi_ortho/print_tc_spin_dens.irp.f | 24 ++-- src/tc_bi_ortho/print_tc_var.irp.f | 9 +- src/tc_bi_ortho/print_tc_wf.irp.f | 23 +++- src/tc_bi_ortho/pt2_tc_cisd.irp.f | 10 +- src/tc_bi_ortho/save_tc_bi_ortho_nat.irp.f | 74 +++++++----- src/tc_bi_ortho/select_dets_bi_ortho.irp.f | 27 +++-- src/tc_bi_ortho/slater_tc_3e_slow.irp.f | 10 +- src/tc_bi_ortho/slater_tc_opt.irp.f | 24 ++-- src/tc_bi_ortho/slater_tc_opt_diag.irp.f | 79 ++++++------- src/tc_bi_ortho/slater_tc_slow.irp.f | 14 +-- src/tc_bi_ortho/tc_bi_ortho.irp.f | 41 ++++--- src/tc_bi_ortho/tc_bi_ortho_prop.irp.f | 27 +++-- src/tc_bi_ortho/tc_cisd_sc2.irp.f | 26 +++-- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 88 +++++++++------ src/tc_bi_ortho/tc_hmat.irp.f | 4 +- src/tc_bi_ortho/tc_som.irp.f | 7 +- src/tc_bi_ortho/test_natorb.irp.f | 29 +++-- src/tc_bi_ortho/test_normal_order.irp.f | 27 +++-- src/tc_bi_ortho/test_s2_tc.irp.f | 28 +++-- src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 19 +++- src/tc_bi_ortho/test_tc_fock.irp.f | 22 +++- src/tc_keywords/EZFIO.cfg | 13 +++ src/tc_scf/combine_lr_tcscf.irp.f | 5 +- src/tc_scf/minimize_tc_angles.irp.f | 25 +++-- src/tc_scf/molden_lr_mos.irp.f | 7 +- src/tc_scf/print_fit_param.irp.f | 7 +- src/tc_scf/print_tcscf_energy.irp.f | 13 +-- src/tc_scf/rotate_tcscf_orbitals.irp.f | 5 +- src/tc_scf/tc_petermann_factor.irp.f | 7 +- src/tc_scf/tc_scf.irp.f | 10 +- src/tc_scf/test_int.irp.f | 11 +- 45 files changed, 608 insertions(+), 520 deletions(-) diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f index 9d34e1d7..18d71134 100644 --- a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f +++ b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f @@ -195,7 +195,7 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_final_grid)] +BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_fit, (ao_num, ao_num, n_points_final_grid)] BEGIN_DOC ! @@ -212,14 +212,14 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_ double precision, external :: overlap_gauss_r12_ao_with1s - print*, ' providing v_ij_u_cst_mu_j1b ...' + print*, ' providing v_ij_u_cst_mu_j1b_fit ...' call wall_time(wall0) provide mu_erf final_grid_points j1b_pen PROVIDE ng_fit_jast expo_gauss_j_mu_x coef_gauss_j_mu_x PROVIDE List_all_comb_b2_size List_all_comb_b2_coef List_all_comb_b2_expo List_all_comb_b2_cent - v_ij_u_cst_mu_j1b = 0.d0 + v_ij_u_cst_mu_j1b_fit = 0.d0 !$OMP PARALLEL DEFAULT (NONE) & !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & @@ -228,7 +228,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_ !$OMP final_grid_points, ng_fit_jast, & !$OMP expo_gauss_j_mu_x, coef_gauss_j_mu_x, & !$OMP List_all_comb_b2_coef, List_all_comb_b2_expo, & - !$OMP List_all_comb_b2_cent, v_ij_u_cst_mu_j1b) + !$OMP List_all_comb_b2_cent, v_ij_u_cst_mu_j1b_fit) !$OMP DO do ipoint = 1, n_points_final_grid r(1) = final_grid_points(1,ipoint) @@ -278,7 +278,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_ enddo - v_ij_u_cst_mu_j1b(j,i,ipoint) = tmp + v_ij_u_cst_mu_j1b_fit(j,i,ipoint) = tmp enddo enddo enddo @@ -288,13 +288,13 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_ do ipoint = 1, n_points_final_grid do i = 2, ao_num do j = 1, i-1 - v_ij_u_cst_mu_j1b(j,i,ipoint) = v_ij_u_cst_mu_j1b(i,j,ipoint) + v_ij_u_cst_mu_j1b_fit(j,i,ipoint) = v_ij_u_cst_mu_j1b_fit(i,j,ipoint) enddo enddo enddo call wall_time(wall1) - print*, ' wall time for v_ij_u_cst_mu_j1b', wall1 - wall0 + print*, ' wall time for v_ij_u_cst_mu_j1b_fit', wall1 - wall0 END_PROVIDER @@ -327,7 +327,6 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_an, (ao_num, ao_num, n_poin call wall_time(wall0) provide mu_erf final_grid_points j1b_pen - PROVIDE ng_fit_jast expo_gauss_j_mu_x coef_gauss_j_mu_x PROVIDE List_all_comb_b2_size List_all_comb_b2_coef List_all_comb_b2_expo List_all_comb_b2_cent ct = inv_sq_pi_2 / mu_erf @@ -340,7 +339,6 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_an, (ao_num, ao_num, n_poin !$OMP int_e2, int_c3, int_e3) & !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b2_size, & !$OMP final_grid_points, mu_erf, ct, & - !$OMP expo_gauss_j_mu_x, coef_gauss_j_mu_x, & !$OMP List_all_comb_b2_coef, List_all_comb_b2_expo, & !$OMP List_all_comb_b2_cent, v_ij_u_cst_mu_j1b_an) !$OMP DO diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index 2ff96326..cac46b18 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -9,10 +9,9 @@ program bi_ort_ints implicit none my_grid_becke = .True. - !my_n_pt_r_grid = 10 - !my_n_pt_a_grid = 14 - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid ! call test_3e diff --git a/src/bi_ort_ints/total_twoe_pot.irp.f b/src/bi_ort_ints/total_twoe_pot.irp.f index c1bacbd0..f03e8a34 100644 --- a/src/bi_ort_ints/total_twoe_pot.irp.f +++ b/src/bi_ort_ints/total_twoe_pot.irp.f @@ -261,51 +261,55 @@ END_PROVIDER ! --- - BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_two_e_jj, (mo_num,mo_num) ] -&BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_two_e_jj_exchange, (mo_num,mo_num) ] -&BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_two_e_jj_anti, (mo_num,mo_num) ] - implicit none + BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_two_e_jj, (mo_num,mo_num)] +&BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_two_e_jj_exchange, (mo_num,mo_num)] +&BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_two_e_jj_anti, (mo_num,mo_num)] + BEGIN_DOC ! mo_bi_ortho_tc_two_e_jj(i,j) = J_ij = ! mo_bi_ortho_tc_two_e_jj_exchange(i,j) = K_ij = ! mo_bi_ortho_tc_two_e_jj_anti(i,j) = J_ij - K_ij END_DOC - integer :: i,j - double precision :: get_two_e_integral + implicit none + integer :: i, j - mo_bi_ortho_tc_two_e_jj = 0.d0 + mo_bi_ortho_tc_two_e_jj = 0.d0 mo_bi_ortho_tc_two_e_jj_exchange = 0.d0 - do i=1,mo_num - do j=1,mo_num - mo_bi_ortho_tc_two_e_jj(i,j) = mo_bi_ortho_tc_two_e(j,i,j,i) + do i = 1, mo_num + do j = 1, mo_num + mo_bi_ortho_tc_two_e_jj(i,j) = mo_bi_ortho_tc_two_e(j,i,j,i) mo_bi_ortho_tc_two_e_jj_exchange(i,j) = mo_bi_ortho_tc_two_e(i,j,j,i) - mo_bi_ortho_tc_two_e_jj_anti(i,j) = mo_bi_ortho_tc_two_e_jj(i,j) - mo_bi_ortho_tc_two_e_jj_exchange(i,j) + mo_bi_ortho_tc_two_e_jj_anti(i,j) = mo_bi_ortho_tc_two_e_jj(i,j) - mo_bi_ortho_tc_two_e_jj_exchange(i,j) enddo enddo END_PROVIDER - BEGIN_PROVIDER [double precision, tc_2e_3idx_coulomb_integrals, (mo_num,mo_num, mo_num)] -&BEGIN_PROVIDER [double precision, tc_2e_3idx_exchange_integrals,(mo_num,mo_num, mo_num)] - implicit none - BEGIN_DOC - ! tc_2e_3idx_coulomb_integrals(j,k,i) = - ! - ! tc_2e_3idx_exchange_integrals(j,k,i) = - END_DOC - integer :: i,j,k,l - double precision :: get_two_e_integral - double precision :: integral +! --- - do i = 1, mo_num - do k = 1, mo_num - do j = 1, mo_num - tc_2e_3idx_coulomb_integrals(j, k,i) = mo_bi_ortho_tc_two_e(j ,k ,j ,i ) - tc_2e_3idx_exchange_integrals(j,k,i) = mo_bi_ortho_tc_two_e(k ,j ,j ,i ) - enddo + BEGIN_PROVIDER [double precision, tc_2e_3idx_coulomb_integrals , (mo_num,mo_num,mo_num)] +&BEGIN_PROVIDER [double precision, tc_2e_3idx_exchange_integrals, (mo_num,mo_num,mo_num)] + + BEGIN_DOC + ! tc_2e_3idx_coulomb_integrals (j,k,i) = + ! tc_2e_3idx_exchange_integrals(j,k,i) = + END_DOC + + implicit none + integer :: i, j, k + + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + tc_2e_3idx_coulomb_integrals(j, k,i) = mo_bi_ortho_tc_two_e(j ,k ,j ,i ) + tc_2e_3idx_exchange_integrals(j,k,i) = mo_bi_ortho_tc_two_e(k ,j ,j ,i ) + enddo + enddo enddo - enddo END_PROVIDER + +! --- + diff --git a/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f index a06f28e9..66d82964 100644 --- a/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f +++ b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f @@ -1,47 +1,54 @@ + +! --- + subroutine run_stochastic_cipsi + + BEGIN_DOC + ! Selected Full Configuration Interaction with Stochastic selection and PT2. + END_DOC + use selection_types implicit none - BEGIN_DOC -! Selected Full Configuration Interaction with Stochastic selection and PT2. - END_DOC - integer :: i,j,k,ndet - double precision, allocatable :: zeros(:) - integer :: to_select - type(pt2_type) :: pt2_data, pt2_data_err - logical, external :: qp_stop - logical :: print_pt2 + integer :: i, j, k, ndet + integer :: to_select + logical :: print_pt2 + logical :: has + type(pt2_type) :: pt2_data, pt2_data_err + double precision :: rss + double precision :: correlation_energy_ratio, E_denom, E_tc, norm + double precision :: hf_energy_ref + double precision :: relative_error + double precision, allocatable :: ept2(:), pt1(:), extrap_energy(:) + double precision, allocatable :: zeros(:) - double precision :: rss - double precision, external :: memory_of_double - double precision :: correlation_energy_ratio,E_denom,E_tc,norm - double precision, allocatable :: ept2(:), pt1(:),extrap_energy(:) + logical, external :: qp_stop + double precision, external :: memory_of_double + + PROVIDE mo_l_coef mo_r_coef PROVIDE H_apply_buffer_allocated distributed_davidson - print*,'Diagonal elements of the Fock matrix ' + print*, ' Diagonal elements of the Fock matrix ' do i = 1, mo_num - write(*,*)i,Fock_matrix_tc_mo_tot(i,i) + write(*,*) i, Fock_matrix_tc_mo_tot(i,i) enddo + N_iter = 1 threshold_generators = 1.d0 SOFT_TOUCH threshold_generators rss = memory_of_double(N_states)*4.d0 - call check_mem(rss,irp_here) + call check_mem(rss, irp_here) - allocate (zeros(N_states)) + allocate(zeros(N_states)) call pt2_alloc(pt2_data, N_states) call pt2_alloc(pt2_data_err, N_states) - double precision :: hf_energy_ref - logical :: has - double precision :: relative_error + relative_error = PT2_relative_error - relative_error=PT2_relative_error - - zeros = 0.d0 - pt2_data % pt2 = -huge(1.e0) - pt2_data % rpt2 = -huge(1.e0) - pt2_data % overlap= 0.d0 + zeros = 0.d0 + pt2_data % pt2 = -huge(1.e0) + pt2_data % rpt2 = -huge(1.e0) + pt2_data % overlap = 0.d0 pt2_data % variance = huge(1.e0) !!!! WARNING !!!! SEEMS TO BE PROBLEM WTH make_s2_eigenfunction !!!! THE DETERMINANTS CAN APPEAR TWICE IN THE WFT DURING SELECTION @@ -49,7 +56,7 @@ subroutine run_stochastic_cipsi ! call make_s2_eigenfunction ! endif print_pt2 = .False. - call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) + call diagonalize_CI_tc_bi_ortho(ndet, E_tc, norm, pt2_data, print_pt2) ! call routine_save_right @@ -74,14 +81,12 @@ subroutine run_stochastic_cipsi ! soft_touch thresh_it_dav print_pt2 = .True. - do while ( & - (N_det < N_det_max) .and. & - (maxval(abs(pt2_data % pt2(1:N_states))) > pt2_max) & - ) - print*,'maxval(abs(pt2_data % pt2(1:N_states)))',maxval(abs(pt2_data % pt2(1:N_states))) - print*,pt2_max - write(*,'(A)') '--------------------------------------------------------------------------------' + do while( (N_det < N_det_max) .and. & + (maxval(abs(pt2_data % pt2(1:N_states))) > pt2_max)) + print*,'maxval(abs(pt2_data % pt2(1:N_states)))',maxval(abs(pt2_data % pt2(1:N_states))) + print*,pt2_max + write(*,'(A)') '--------------------------------------------------------------------------------' to_select = int(sqrt(dble(N_states))*dble(N_det)*selection_factor) to_select = max(N_states_diag, to_select) @@ -94,8 +99,7 @@ subroutine run_stochastic_cipsi call ZMQ_pt2(E_denom, pt2_data, pt2_data_err, relative_error,to_select) ! Stochastic PT2 and selection ! stop - call print_summary(psi_energy_with_nucl_rep, & - pt2_data, pt2_data_err, N_det,N_configuration,N_states,psi_s2) + call print_summary(psi_energy_with_nucl_rep, pt2_data, pt2_data_err, N_det, N_configuration, N_states, psi_s2) call save_energy(psi_energy_with_nucl_rep, pt2_data % pt2) @@ -109,13 +113,13 @@ subroutine run_stochastic_cipsi ! Add selected determinants call copy_H_apply_buffer_to_wf_tc() - PROVIDE psi_l_coef_bi_ortho psi_r_coef_bi_ortho - PROVIDE psi_det - PROVIDE psi_det_sorted_tc + PROVIDE psi_l_coef_bi_ortho psi_r_coef_bi_ortho + PROVIDE psi_det + PROVIDE psi_det_sorted_tc ept2(N_iter-1) = E_tc + nuclear_repulsion + (pt2_data % pt2(1))/norm - pt1(N_iter-1) = dsqrt(pt2_data % overlap(1,1)) - call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2) + pt1(N_iter-1) = dsqrt(pt2_data % overlap(1,1)) + call diagonalize_CI_tc_bi_ortho(ndet, E_tc, norm, pt2_data, print_pt2) ! stop if (qp_stop()) exit enddo diff --git a/src/fci_tc_bi/diagonalize_ci.irp.f b/src/fci_tc_bi/diagonalize_ci.irp.f index 8b59785a..6c8f3431 100644 --- a/src/fci_tc_bi/diagonalize_ci.irp.f +++ b/src/fci_tc_bi/diagonalize_ci.irp.f @@ -17,6 +17,8 @@ subroutine diagonalize_CI_tc_bi_ortho(ndet, E_tc, norm, pt2_data, print_pt2) integer :: i, j double precision :: pt2_tmp, pt1_norm, rpt2_tmp, abs_pt2 + PROVIDE mo_l_coef mo_r_coef + pt2_tmp = pt2_data % pt2(1) abs_pt2 = pt2_data % variance(1) pt1_norm = pt2_data % overlap(1,1) @@ -60,7 +62,7 @@ subroutine diagonalize_CI_tc_bi_ortho(ndet, E_tc, norm, pt2_data, print_pt2) psi_coef(i,j) = dabs(psi_l_coef_bi_ortho(i,j) * psi_r_coef_bi_ortho(i,j)) enddo enddo - SOFT_TOUCH eigval_left_tc_bi_orth eigval_right_tc_bi_orth leigvec_tc_bi_orth reigvec_tc_bi_orth norm_ground_left_right_bi_orth + SOFT_TOUCH eigval_left_tc_bi_orth eigval_right_tc_bi_orth leigvec_tc_bi_orth reigvec_tc_bi_orth norm_ground_left_right_bi_orth SOFT_TOUCH psi_l_coef_bi_ortho psi_r_coef_bi_ortho psi_coef psi_energy psi_s2 call save_tc_bi_ortho_wavefunction() diff --git a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f index cf3361bf..1c1c0411 100644 --- a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f +++ b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f @@ -1,5 +1,8 @@ -program fci - implicit none + +! --- + +program fci_tc_bi + BEGIN_DOC ! Selected Full Configuration Interaction with stochastic selection ! and PT2. @@ -36,10 +39,12 @@ program fci ! END_DOC + implicit none my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid pruning = -1.d0 @@ -62,18 +67,18 @@ subroutine run_cipsi_tc() PROVIDE psi_det psi_coef mo_bi_ortho_tc_two_e mo_bi_ortho_tc_one_e - if(elec_alpha_num+elec_beta_num .ge. 3) then + if((elec_alpha_num+elec_beta_num) .ge. 3) then if(three_body_h_tc) then call provide_all_three_ints_bi_ortho() endif endif - FREE int2_grad1_u12_ao - FREE int2_grad1_u12_bimo_transp int2_grad1_u12_ao_transp + FREE int2_grad1_u12_ao int2_grad1_u12_ao_t int2_grad1_u12_ao_transp + FREE int2_grad1_u12_bimo_transp write(json_unit,json_array_open_fmt) 'fci_tc' - if (do_pt2) then + if(do_pt2) then call run_stochastic_cipsi() else call run_cipsi() @@ -88,14 +93,14 @@ subroutine run_cipsi_tc() PROVIDE mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e pt2_min_parallel_tasks - if(elec_alpha_num+elec_beta_num .ge. 3) then + if((elec_alpha_num+elec_beta_num) .ge. 3) then if(three_body_h_tc) then call provide_all_three_ints_bi_ortho() endif endif - FREE int2_grad1_u12_ao - FREE int2_grad1_u12_bimo_transp int2_grad1_u12_ao_transp + FREE int2_grad1_u12_ao int2_grad1_u12_ao_t int2_grad1_u12_ao_transp + FREE int2_grad1_u12_bimo_transp call run_slave_cipsi diff --git a/src/fci_tc_bi/pt2_tc.irp.f b/src/fci_tc_bi/pt2_tc.irp.f index 96a54825..390042bf 100644 --- a/src/fci_tc_bi/pt2_tc.irp.f +++ b/src/fci_tc_bi/pt2_tc.irp.f @@ -1,31 +1,42 @@ + +! --- + program tc_pt2_prog + implicit none + my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + pruning = -1.d0 touch pruning + ! pt2_relative_error = 0.01d0 ! touch pt2_relative_error - call run_pt2_tc + call run_pt2_tc() end +! --- -subroutine run_pt2_tc +subroutine run_pt2_tc() - implicit none + implicit none PROVIDE psi_det psi_coef mo_bi_ortho_tc_two_e mo_bi_ortho_tc_one_e - if(elec_alpha_num+elec_beta_num.ge.3)then + + if(elec_alpha_num+elec_beta_num.ge.3) then if(three_body_h_tc)then - call provide_all_three_ints_bi_ortho + call provide_all_three_ints_bi_ortho() endif endif - ! --- - - call tc_pt2 + call tc_pt2() end + +! --- + diff --git a/src/non_h_ints_mu/debug_fit.irp.f b/src/non_h_ints_mu/debug_fit.irp.f index 146028d5..05d2db68 100644 --- a/src/non_h_ints_mu/debug_fit.irp.f +++ b/src/non_h_ints_mu/debug_fit.irp.f @@ -6,13 +6,9 @@ program debug_fit implicit none my_grid_becke = .True. - - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 - !my_n_pt_r_grid = 100 - !my_n_pt_a_grid = 170 - !my_n_pt_r_grid = 150 - !my_n_pt_a_grid = 194 + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid PROVIDE mu_erf j1b_pen diff --git a/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f b/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f index f3e93360..b9e8df25 100644 --- a/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f +++ b/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f @@ -6,13 +6,9 @@ program debug_integ_jmu_modif implicit none my_grid_becke = .True. - - !my_n_pt_r_grid = 30 - !my_n_pt_a_grid = 50 - !my_n_pt_r_grid = 100 - !my_n_pt_a_grid = 170 - my_n_pt_r_grid = 150 - my_n_pt_a_grid = 194 + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid PROVIDE mu_erf j1b_pen @@ -48,22 +44,21 @@ subroutine test_v_ij_u_cst_mu_j1b() print*, ' test_v_ij_u_cst_mu_j1b ...' - PROVIDE v_ij_u_cst_mu_j1b + PROVIDE v_ij_u_cst_mu_j1b_fit eps_ij = 1d-3 acc_tot = 0.d0 normalz = 0.d0 - !do ipoint = 1, 10 do ipoint = 1, n_points_final_grid do j = 1, ao_num do i = 1, ao_num - i_exc = v_ij_u_cst_mu_j1b(i,j,ipoint) - i_num = num_v_ij_u_cst_mu_j1b(i,j,ipoint) + i_exc = v_ij_u_cst_mu_j1b_fit(i,j,ipoint) + i_num = num_v_ij_u_cst_mu_j1b (i,j,ipoint) acc_ij = dabs(i_exc - i_num) if(acc_ij .gt. eps_ij) then - print *, ' problem in v_ij_u_cst_mu_j1b on', i, j, ipoint + print *, ' problem in v_ij_u_cst_mu_j1b_fit on', i, j, ipoint print *, ' analyt integ = ', i_exc print *, ' numeri integ = ', i_num print *, ' diff = ', acc_ij diff --git a/src/non_h_ints_mu/new_grad_tc.irp.f b/src/non_h_ints_mu/new_grad_tc.irp.f index 499ffe9d..dc76431d 100644 --- a/src/non_h_ints_mu/new_grad_tc.irp.f +++ b/src/non_h_ints_mu/new_grad_tc.irp.f @@ -1,68 +1,3 @@ -! --- - -!BEGIN_PROVIDER [ double precision, int1_grad2_u12_ao, (3, ao_num, ao_num, n_points_final_grid)] -! -! BEGIN_DOC -! ! -! ! int1_grad2_u12_ao(:,i,j,ipoint) = \int dr1 [-1 * \grad_r2 J(r1,r2)] \phi_i(r1) \phi_j(r1) -! ! -! ! where r1 = r(ipoint) -! ! -! ! if J(r1,r2) = u12: -! ! -! ! int1_grad2_u12_ao(:,i,j,ipoint) = +0.5 x \int dr1 [-(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r1) \phi_j(r1) -! ! = -0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ] -! ! = -int2_grad1_u12_ao(i,j,ipoint,:) -! ! -! ! if J(r1,r2) = u12 x v1 x v2 -! ! -! ! int1_grad2_u12_ao(:,i,j,ipoint) = v2 x [ 0.5 x \int dr1 [-(r1 - r2) (erf(mu * r12)-1)r_12] v1 \phi_i(r1) \phi_j(r1) ] -! ! - \grad_2 v2 x [ \int dr1 u12 v1 \phi_i(r1) \phi_j(r1) ] -! ! = -0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) -! ! + 0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) -! ! - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint) -! ! -! ! -! END_DOC -! -! implicit none -! integer :: ipoint, i, j -! double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2 -! -! PROVIDE j1b_type -! -! if(j1b_type .eq. 3) then -! -! do ipoint = 1, n_points_final_grid -! x = final_grid_points(1,ipoint) -! y = final_grid_points(2,ipoint) -! z = final_grid_points(3,ipoint) -! -! tmp0 = 0.5d0 * v_1b(ipoint) -! tmp_x = v_1b_grad(1,ipoint) -! tmp_y = v_1b_grad(2,ipoint) -! tmp_z = v_1b_grad(3,ipoint) -! -! do j = 1, ao_num -! do i = 1, ao_num -! -! tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) -! tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint) -! -! int1_grad2_u12_ao(1,i,j,ipoint) = -tmp1 * x + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x -! int1_grad2_u12_ao(2,i,j,ipoint) = -tmp1 * y + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y -! int1_grad2_u12_ao(3,i,j,ipoint) = -tmp1 * z + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z -! enddo -! enddo -! enddo -! -! else -! -! int1_grad2_u12_ao = -1.d0 * int2_grad1_u12_ao -! -! endif -! -!END_PROVIDER ! --- @@ -98,22 +33,14 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_loop, (ao_num, ao_num, ao_ weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) do i = 1, ao_num - !ao_i_r = weight1 * aos_in_r_array_transp (ipoint,i) - !ao_i_dx = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,1) - !ao_i_dy = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,2) - !ao_i_dz = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,3) ao_i_r = weight1 * aos_in_r_array (i,ipoint) ao_i_dx = weight1 * aos_grad_in_r_array(i,ipoint,1) ao_i_dy = weight1 * aos_grad_in_r_array(i,ipoint,2) ao_i_dz = weight1 * aos_grad_in_r_array(i,ipoint,3) do k = 1, ao_num - !ao_k_r = aos_in_r_array_transp(ipoint,k) ao_k_r = aos_in_r_array(k,ipoint) - !tmp_x = ao_k_r * ao_i_dx - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1) - !tmp_y = ao_k_r * ao_i_dy - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2) - !tmp_z = ao_k_r * ao_i_dz - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3) tmp_x = ao_k_r * ao_i_dx - ao_i_r * aos_grad_in_r_array(k,ipoint,1) tmp_y = ao_k_r * ao_i_dy - ao_i_r * aos_grad_in_r_array(k,ipoint,2) tmp_z = ao_k_r * ao_i_dz - ao_i_r * aos_grad_in_r_array(k,ipoint,3) @@ -134,44 +61,11 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_loop, (ao_num, ao_num, ao_ ! --- - !do ipoint = 1, n_points_final_grid - ! weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) - - ! do l = 1, ao_num - ! ao_l_r = weight1 * aos_in_r_array_transp (ipoint,l) - ! ao_l_dx = weight1 * aos_grad_in_r_array_transp_bis(ipoint,l,1) - ! ao_l_dy = weight1 * aos_grad_in_r_array_transp_bis(ipoint,l,2) - ! ao_l_dz = weight1 * aos_grad_in_r_array_transp_bis(ipoint,l,3) - - ! do j = 1, ao_num - ! ao_j_r = aos_in_r_array_transp(ipoint,j) - - ! tmp_x = ao_j_r * ao_l_dx - ao_l_r * aos_grad_in_r_array_transp_bis(ipoint,j,1) - ! tmp_y = ao_j_r * ao_l_dy - ao_l_r * aos_grad_in_r_array_transp_bis(ipoint,j,2) - ! tmp_z = ao_j_r * ao_l_dz - ao_l_r * aos_grad_in_r_array_transp_bis(ipoint,j,3) - - ! do i = 1, ao_num - ! do k = 1, ao_num - - ! contrib_x = int2_grad1_u12_ao(k,i,ipoint,1) * tmp_x - ! contrib_y = int2_grad1_u12_ao(k,i,ipoint,2) * tmp_y - ! contrib_z = int2_grad1_u12_ao(k,i,ipoint,3) * tmp_z - - ! ac_mat(k,i,l,j) += contrib_x + contrib_y + contrib_z - ! enddo - ! enddo - ! enddo - ! enddo - !enddo - - ! --- - do j = 1, ao_num do l = 1, ao_num do i = 1, ao_num do k = 1, ao_num tc_grad_and_lapl_ao_loop(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) - !tc_grad_and_lapl_ao_loop(k,i,l,j) = ac_mat(k,i,l,j) enddo enddo enddo diff --git a/src/non_h_ints_mu/test_non_h_ints.irp.f b/src/non_h_ints_mu/test_non_h_ints.irp.f index a6e0a311..aff53c2d 100644 --- a/src/non_h_ints_mu/test_non_h_ints.irp.f +++ b/src/non_h_ints_mu/test_non_h_ints.irp.f @@ -1,19 +1,18 @@ + +! --- + program test_non_h implicit none my_grid_becke = .True. - my_n_pt_r_grid = 50 - my_n_pt_a_grid = 74 - !my_n_pt_r_grid = 400 - !my_n_pt_a_grid = 974 - -! my_n_pt_r_grid = 10 ! small grid for quick debug -! my_n_pt_a_grid = 26 ! small grid for quick debug + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - !call routine_grad_squared - !call routine_fit + !call routine_grad_squared() + !call routine_fit() call test_ipp() end diff --git a/src/tc_bi_ortho/compute_deltamu_right.irp.f b/src/tc_bi_ortho/compute_deltamu_right.irp.f index 7ca2c890..ab9dc093 100644 --- a/src/tc_bi_ortho/compute_deltamu_right.irp.f +++ b/src/tc_bi_ortho/compute_deltamu_right.irp.f @@ -3,8 +3,9 @@ program compute_deltamu_right implicit none my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid read_wf = .True. diff --git a/src/tc_bi_ortho/print_tc_dump.irp.f b/src/tc_bi_ortho/print_tc_dump.irp.f index 0a7e08d2..868de444 100644 --- a/src/tc_bi_ortho/print_tc_dump.irp.f +++ b/src/tc_bi_ortho/print_tc_dump.irp.f @@ -6,10 +6,9 @@ program tc_bi_ortho implicit none my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 - !my_n_pt_r_grid = 100 - !my_n_pt_a_grid = 170 + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid call ERI_dump() diff --git a/src/tc_bi_ortho/print_tc_energy.irp.f b/src/tc_bi_ortho/print_tc_energy.irp.f index 522f4cd7..7bca72a1 100644 --- a/src/tc_bi_ortho/print_tc_energy.irp.f +++ b/src/tc_bi_ortho/print_tc_energy.irp.f @@ -7,16 +7,12 @@ program print_tc_energy implicit none print *, 'Hello world' + my_grid_becke = .True. - - !my_n_pt_r_grid = 30 - !my_n_pt_a_grid = 50 - - my_n_pt_r_grid = 100 - my_n_pt_a_grid = 170 - - !my_n_pt_r_grid = 100 - !my_n_pt_a_grid = 266 + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid read_wf = .True. touch read_wf @@ -24,8 +20,6 @@ program print_tc_energy PROVIDE j1b_type print*, 'j1b_type = ', j1b_type - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - call write_tc_energy() end diff --git a/src/tc_bi_ortho/print_tc_spin_dens.irp.f b/src/tc_bi_ortho/print_tc_spin_dens.irp.f index 8308140d..c7da5bc8 100644 --- a/src/tc_bi_ortho/print_tc_spin_dens.irp.f +++ b/src/tc_bi_ortho/print_tc_spin_dens.irp.f @@ -1,16 +1,26 @@ + +! --- + program test_spin_dens - implicit none + BEGIN_DOC -! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together with the energy. Saves the left-right wave functions at the end. + ! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together with the energy. Saves the left-right wave functions at the end. END_DOC + + implicit none + print *, 'Hello world' + my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + read_wf = .True. touch read_wf - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - call tc_print_mulliken_sd -! call test + + call tc_print_mulliken_sd() + !call test end diff --git a/src/tc_bi_ortho/print_tc_var.irp.f b/src/tc_bi_ortho/print_tc_var.irp.f index fa0a4363..bec34f18 100644 --- a/src/tc_bi_ortho/print_tc_var.irp.f +++ b/src/tc_bi_ortho/print_tc_var.irp.f @@ -7,12 +7,15 @@ program print_tc_var implicit none print *, 'Hello world' + my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + read_wf = .True. touch read_wf - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid call write_tc_var() diff --git a/src/tc_bi_ortho/print_tc_wf.irp.f b/src/tc_bi_ortho/print_tc_wf.irp.f index 0c4198a9..c755485b 100644 --- a/src/tc_bi_ortho/print_tc_wf.irp.f +++ b/src/tc_bi_ortho/print_tc_wf.irp.f @@ -1,20 +1,31 @@ + +! --- + program print_tc_bi_ortho - implicit none + BEGIN_DOC -! TODO : Put the documentation of the program here + ! TODO : Put the documentation of the program here END_DOC + + implicit none + print *, 'Hello world' + my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + read_wf = .True. touch read_wf - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + ! if(three_body_h_tc)then ! call provide_all_three_ints_bi_ortho ! endif ! call routine - call write_l_r_wf + call write_l_r_wf + end subroutine write_l_r_wf diff --git a/src/tc_bi_ortho/pt2_tc_cisd.irp.f b/src/tc_bi_ortho/pt2_tc_cisd.irp.f index 9cb9a600..8940a4f6 100644 --- a/src/tc_bi_ortho/pt2_tc_cisd.irp.f +++ b/src/tc_bi_ortho/pt2_tc_cisd.irp.f @@ -7,12 +7,16 @@ program pt2_tc_cisd ! END_DOC + implicit none + my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + read_wf = .True. touch read_wf - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid print*, ' nb of states = ', N_states print*, ' nb of det = ', N_det diff --git a/src/tc_bi_ortho/save_tc_bi_ortho_nat.irp.f b/src/tc_bi_ortho/save_tc_bi_ortho_nat.irp.f index 8b6eb1d1..47ade8df 100644 --- a/src/tc_bi_ortho/save_tc_bi_ortho_nat.irp.f +++ b/src/tc_bi_ortho/save_tc_bi_ortho_nat.irp.f @@ -1,35 +1,59 @@ - program tc_natorb_bi_ortho - implicit none - BEGIN_DOC - ! TODO : Put the documentation of the program here - END_DOC - print *, 'Hello world' - my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 - read_wf = .True. - touch read_wf - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - call print_energy_and_mos - call save_tc_natorb -! call minimize_tc_orb_angles - end - - subroutine save_tc_natorb + +! --- + +program tc_natorb_bi_ortho + + BEGIN_DOC + ! TODO : Put the documentation of the program here + END_DOC + implicit none + + print *, 'Hello world' + + my_grid_becke = .True. + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + read_wf = .True. + touch read_wf + + call print_energy_and_mos() + call save_tc_natorb() + !call minimize_tc_orb_angles() + +end + +! --- + +subroutine save_tc_natorb() + + implicit none + print*,'Saving the natorbs ' + provide natorb_tc_leigvec_ao natorb_tc_reigvec_ao + call ezfio_set_bi_ortho_mos_mo_l_coef(natorb_tc_leigvec_ao) call ezfio_set_bi_ortho_mos_mo_r_coef(natorb_tc_reigvec_ao) - call save_ref_determinant_nstates_1 + call save_ref_determinant_nstates_1() call ezfio_set_determinants_read_wf(.False.) - end + +end + +! --- - subroutine save_ref_determinant_nstates_1 - implicit none +subroutine save_ref_determinant_nstates_1() + use bitmasks - double precision :: buffer(1,N_states) + implicit none + double precision :: buffer(1,N_states) + buffer = 0.d0 buffer(1,1) = 1.d0 - call save_wavefunction_general(1,1,ref_bitmask,1,buffer) - end + call save_wavefunction_general(1, 1, ref_bitmask, 1, buffer) + +end + diff --git a/src/tc_bi_ortho/select_dets_bi_ortho.irp.f b/src/tc_bi_ortho/select_dets_bi_ortho.irp.f index e6bf3d6e..e923548a 100644 --- a/src/tc_bi_ortho/select_dets_bi_ortho.irp.f +++ b/src/tc_bi_ortho/select_dets_bi_ortho.irp.f @@ -1,15 +1,24 @@ -program tc_bi_ortho - implicit none + +! --- + +program select_dets_bi_ortho() + BEGIN_DOC -! TODO : Put the documentation of the program here + ! TODO : Put the documentation of the program here END_DOC + + implicit none + print *, 'Hello world' + my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + read_wf = .True. touch read_wf - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid !!!!!!!!!!!!!!! WARNING NO 3-BODY !!!!!!!!!!!!!!! WARNING NO 3-BODY @@ -22,6 +31,8 @@ program tc_bi_ortho ! call test end +! --- + subroutine routine_test implicit none use bitmasks ! you need to include the bitmasks_module.f90 features @@ -57,5 +68,7 @@ subroutine routine_test enddo call save_wavefunction_general(n_good,n_states,dets,n_good,coef_new) - end + +! --- + diff --git a/src/tc_bi_ortho/slater_tc_3e_slow.irp.f b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f index e6e78534..3a2e3606 100644 --- a/src/tc_bi_ortho/slater_tc_3e_slow.irp.f +++ b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f @@ -1,4 +1,6 @@ +! --- + subroutine diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree) BEGIN_DOC @@ -22,12 +24,12 @@ subroutine diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree) if(core_tc_op) then do i = 1, Nint - key_i_core(i,1) = xor(key_i(i,1),core_bitmask(i,1)) - key_i_core(i,2) = xor(key_i(i,2),core_bitmask(i,2)) + key_i_core(i,1) = xor(key_i(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(key_i(i,2), core_bitmask(i,2)) enddo - call bitstring_to_list_ab(key_i_core,occ,Ne,Nint) + call bitstring_to_list_ab(key_i_core, occ, Ne, Nint) else - call bitstring_to_list_ab(key_i,occ,Ne,Nint) + call bitstring_to_list_ab(key_i, occ, Ne, Nint) endif hthree = 0.d0 diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index d1c1d45d..002f870e 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -47,13 +47,19 @@ subroutine htilde_mu_mat_opt_bi_ortho_tot(key_j, key_i, Nint, htot) END_DOC use bitmasks - integer, intent(in) :: Nint - integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2) - double precision, intent(out) :: htot - double precision :: hmono, htwoe, hthree - call htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot) + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2) + double precision, intent(out) :: htot + double precision :: hmono, htwoe, hthree + + call htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot) + end + +! --- + subroutine htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot) + BEGIN_DOC ! ! where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis @@ -81,11 +87,11 @@ subroutine htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, call get_excitation_degree(key_i, key_j, degree, Nint) if(degree.gt.2) return - if(degree == 0)then + if(degree == 0) then call diag_htilde_mu_mat_fock_bi_ortho (Nint, key_i, hmono, htwoe, hthree, htot) - else if (degree == 1)then - call single_htilde_mu_mat_fock_bi_ortho(Nint,key_j, key_i , hmono, htwoe, hthree, htot) - else if(degree == 2)then + else if (degree == 1) then + call single_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i , hmono, htwoe, hthree, htot) + else if(degree == 2) then call double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) endif diff --git a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f index 5c156a4d..d95c87b1 100644 --- a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f @@ -111,7 +111,7 @@ end ! --- -subroutine ac_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb) +subroutine ac_tc_operator(iorb, ispin, key, hmono, htwoe, hthree, Nint, na, nb) BEGIN_DOC ! Routine that computes one- and two-body energy corresponding @@ -127,17 +127,17 @@ subroutine ac_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb) use bitmasks implicit none - integer, intent(in) :: iorb, ispin, Nint - integer, intent(inout) :: na, nb + integer, intent(in) :: iorb, ispin, Nint + integer, intent(inout) :: na, nb integer(bit_kind), intent(inout) :: key(Nint,2) - double precision, intent(inout) :: hmono,htwoe,hthree + double precision, intent(inout) :: hmono, htwoe, hthree - integer :: occ(Nint*bit_kind_size,2) - integer :: other_spin - integer :: k,l,i,jj,mm,j,m - double precision :: direct_int, exchange_int + integer :: occ(Nint*bit_kind_size,2) + integer :: other_spin + integer :: k, l, i, jj, mm, j, m + integer :: tmp(2) + double precision :: direct_int, exchange_int - if (iorb < 1) then print *, irp_here, ': iorb < 1' print *, iorb, mo_num @@ -153,7 +153,6 @@ subroutine ac_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb) ASSERT (ispin < 3) ASSERT (Nint > 0) - integer :: tmp(2) !DIR$ FORCEINLINE call bitstring_to_list_ab(key, occ, tmp, Nint) ASSERT (tmp(1) == elec_alpha_num) @@ -169,50 +168,54 @@ subroutine ac_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb) hmono = hmono + mo_bi_ortho_tc_one_e(iorb,iorb) ! Same spin - do i=1,na + do i = 1, na htwoe = htwoe + mo_bi_ortho_tc_two_e_jj_anti(occ(i,ispin),iorb) enddo ! Opposite spin - do i=1,nb + do i = 1, nb htwoe = htwoe + mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb) enddo - if(three_body_h_tc.and.elec_num.gt.2.and.three_e_3_idx_term)then - !!!!! 3-e part - !! same-spin/same-spin - do j = 1, na - jj = occ(j,ispin) - do m = j+1, na - mm = occ(m,ispin) - hthree += three_e_diag_parrallel_spin_prov(mm,jj,iorb) + if(three_body_h_tc .and. (elec_num.gt.2) .and. three_e_3_idx_term) then + + !!!!! 3-e part + !! same-spin/same-spin + do j = 1, na + jj = occ(j,ispin) + do m = j+1, na + mm = occ(m,ispin) + hthree += three_e_diag_parrallel_spin_prov(mm,jj,iorb) + enddo enddo - enddo - !! same-spin/oposite-spin - do j = 1, na - jj = occ(j,ispin) - do m = 1, nb - mm = occ(m,other_spin) - direct_int = three_e_3_idx_direct_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR - exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR - hthree += direct_int - exchange_int + !! same-spin/oposite-spin + do j = 1, na + jj = occ(j,ispin) + do m = 1, nb + mm = occ(m,other_spin) + direct_int = three_e_3_idx_direct_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR + exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR + hthree += direct_int - exchange_int + enddo enddo - enddo - !! oposite-spin/opposite-spin + !! oposite-spin/opposite-spin do j = 1, nb - jj = occ(j,other_spin) - do m = j+1, nb - mm = occ(m,other_spin) - direct_int = three_e_3_idx_direct_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR - exchange_int = three_e_3_idx_exch23_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR - hthree += direct_int - exchange_int - enddo + jj = occ(j,other_spin) + do m = j+1, nb + mm = occ(m,other_spin) + direct_int = three_e_3_idx_direct_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR + exchange_int = three_e_3_idx_exch23_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR + hthree += direct_int - exchange_int + enddo enddo endif na = na+1 + end +! --- + subroutine a_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb) use bitmasks implicit none diff --git a/src/tc_bi_ortho/slater_tc_slow.irp.f b/src/tc_bi_ortho/slater_tc_slow.irp.f index 083d4ba3..d78540e7 100644 --- a/src/tc_bi_ortho/slater_tc_slow.irp.f +++ b/src/tc_bi_ortho/slater_tc_slow.irp.f @@ -21,7 +21,7 @@ subroutine htilde_mu_mat_bi_ortho_tot_slow(key_j, key_i, Nint, htot) integer :: degree call get_excitation_degree(key_j, key_i, degree, Nint) - if(degree.gt.2)then + if(degree.gt.2) then htot = 0.d0 else call htilde_mu_mat_bi_ortho_slow(key_j, key_i, Nint, hmono, htwoe, hthree, htot) @@ -60,22 +60,22 @@ subroutine htilde_mu_mat_bi_ortho_slow(key_j, key_i, Nint, hmono, htwoe, hthree, call get_excitation_degree(key_i, key_j, degree, Nint) if(degree.gt.2) return - if(degree == 0)then + if(degree == 0) then call diag_htilde_mu_mat_bi_ortho_slow(Nint, key_i, hmono, htwoe, htot) - else if (degree == 1)then + else if (degree == 1) then call single_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) - else if(degree == 2)then + else if(degree == 2) then call double_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) endif if(three_body_h_tc) then if(degree == 2) then - if(.not.double_normal_ord.and.elec_num.gt.2.and.three_e_5_idx_term) then + if((.not.double_normal_ord) .and. (elec_num .gt. 2) .and. three_e_5_idx_term) then call double_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) endif - else if(degree == 1.and.elec_num.gt.2.and.three_e_4_idx_term) then + else if((degree == 1) .and. (elec_num .gt. 2) .and. three_e_4_idx_term) then call single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) - else if(degree == 0.and.elec_num.gt.2.and.three_e_3_idx_term) then + else if((degree == 0) .and. (elec_num .gt. 2) .and. three_e_3_idx_term) then call diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree) endif endif diff --git a/src/tc_bi_ortho/tc_bi_ortho.irp.f b/src/tc_bi_ortho/tc_bi_ortho.irp.f index f69684c0..2887c7be 100644 --- a/src/tc_bi_ortho/tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/tc_bi_ortho.irp.f @@ -8,11 +8,13 @@ program tc_bi_ortho END_DOC my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + read_wf = .True. touch read_wf - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid print*, ' nb of states = ', N_states print*, ' nb of det = ', N_det @@ -20,22 +22,29 @@ program tc_bi_ortho call routine_diag() call write_tc_energy() call save_tc_bi_ortho_wavefunction() -end - -subroutine test - implicit none - integer :: i,j - double precision :: hmono,htwoe,hthree,htot - use bitmasks - print*,'reading the wave function ' - do i = 1, N_det - call debug_det(psi_det(1,1,i),N_int) - print*,i,psi_l_coef_bi_ortho(i,1)*psi_r_coef_bi_ortho(i,1) - print*,i,psi_l_coef_bi_ortho(i,1),psi_r_coef_bi_ortho(i,1) - enddo end +! --- + +subroutine test() + + use bitmasks + implicit none + integer :: i, j + double precision :: hmono, htwoe, hthree, htot + + print*, 'reading the wave function ' + do i = 1, N_det + call debug_det(psi_det(1,1,i), N_int) + print*, i, psi_l_coef_bi_ortho(i,1)*psi_r_coef_bi_ortho(i,1) + print*, i, psi_l_coef_bi_ortho(i,1),psi_r_coef_bi_ortho(i,1) + enddo + +end + +! --- + subroutine routine_diag() implicit none diff --git a/src/tc_bi_ortho/tc_bi_ortho_prop.irp.f b/src/tc_bi_ortho/tc_bi_ortho_prop.irp.f index 28f122ee..9168fb3d 100644 --- a/src/tc_bi_ortho/tc_bi_ortho_prop.irp.f +++ b/src/tc_bi_ortho/tc_bi_ortho_prop.irp.f @@ -1,19 +1,32 @@ + +! --- + program tc_bi_ortho_prop - implicit none + BEGIN_DOC -! TODO : Put the documentation of the program here + ! TODO : Put the documentation of the program here END_DOC + + implicit none + print *, 'Hello world' + my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + read_wf = .True. touch read_wf - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid -! call routine_diag - call test + + !call routine_diag + call test + end +! --- + subroutine test implicit none integer :: i diff --git a/src/tc_bi_ortho/tc_cisd_sc2.irp.f b/src/tc_bi_ortho/tc_cisd_sc2.irp.f index 0fb9f524..d4c8c55d 100644 --- a/src/tc_bi_ortho/tc_cisd_sc2.irp.f +++ b/src/tc_bi_ortho/tc_cisd_sc2.irp.f @@ -1,20 +1,32 @@ -program tc_bi_ortho - implicit none + +! --- + +program tc_cisd_sc2 + BEGIN_DOC -! TODO : Put the documentation of the program here + ! TODO : Put the documentation of the program here END_DOC + + implicit none + print *, 'Hello world' + my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + read_wf = .True. touch read_wf - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid call test + end -subroutine test +! --- + +subroutine test() implicit none ! double precision, allocatable :: dressing_dets(:),e_corr_dets(:) ! allocate(dressing_dets(N_det),e_corr_dets(N_det)) diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index fa946d6a..f027c38f 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -1,42 +1,56 @@ + +! --- + use bitmasks - BEGIN_PROVIDER [ integer, index_HF_psi_det] - implicit none - integer :: i,degree - do i = 1, N_det - call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int) - if(degree == 0)then - index_HF_psi_det = i - exit - endif - enddo - END_PROVIDER +! --- + +BEGIN_PROVIDER [integer, index_HF_psi_det] -subroutine diagonalize_CI_tc implicit none + integer :: i, degree + + do i = 1, N_det + call get_excitation_degree(HF_bitmask, psi_det(1,1,i), degree, N_int) + if(degree == 0) then + index_HF_psi_det = i + exit + endif + enddo + +END_PROVIDER + +! --- + +subroutine diagonalize_CI_tc() + BEGIN_DOC -! Replace the coefficients of the |CI| states by the coefficients of the -! eigenstates of the |CI| matrix. + ! Replace the coefficients of the |CI| states by the coefficients of the + ! eigenstates of the |CI| matrix. END_DOC - integer :: i,j - do j=1,N_states - do i=1,N_det + + implicit none + integer :: i, j + + do j = 1, N_states + do i = 1, N_det psi_l_coef_bi_ortho(i,j) = leigvec_tc_bi_orth(i,j) psi_r_coef_bi_ortho(i,j) = reigvec_tc_bi_orth(i,j) enddo enddo SOFT_TOUCH psi_l_coef_bi_ortho psi_r_coef_bi_ortho + end +! --- - - BEGIN_PROVIDER [double precision, eigval_right_tc_bi_orth, (N_states)] -&BEGIN_PROVIDER [double precision, eigval_left_tc_bi_orth, (N_states)] -&BEGIN_PROVIDER [double precision, reigvec_tc_bi_orth, (N_det,N_states)] -&BEGIN_PROVIDER [double precision, leigvec_tc_bi_orth, (N_det,N_states)] -&BEGIN_PROVIDER [double precision, s2_eigvec_tc_bi_orth, (N_states)] -&BEGIN_PROVIDER [double precision, norm_ground_left_right_bi_orth ] + BEGIN_PROVIDER [double precision, eigval_right_tc_bi_orth, (N_states) ] +&BEGIN_PROVIDER [double precision, eigval_left_tc_bi_orth , (N_states) ] +&BEGIN_PROVIDER [double precision, reigvec_tc_bi_orth , (N_det,N_states)] +&BEGIN_PROVIDER [double precision, leigvec_tc_bi_orth , (N_det,N_states)] +&BEGIN_PROVIDER [double precision, s2_eigvec_tc_bi_orth , (N_states) ] +&BEGIN_PROVIDER [double precision, norm_ground_left_right_bi_orth ] BEGIN_DOC ! eigenvalues, right and left eigenvectors of the transcorrelated Hamiltonian on the BI-ORTHO basis @@ -44,29 +58,29 @@ end implicit none integer :: i, idx_dress, j, istate, k + integer :: i_good_state, i_other_state, i_state + integer :: n_real_tc_bi_orth_eigval_right, igood_r, igood_l logical :: converged, dagger - integer :: n_real_tc_bi_orth_eigval_right,igood_r,igood_l - double precision, allocatable :: reigvec_tc_bi_orth_tmp(:,:),leigvec_tc_bi_orth_tmp(:,:),eigval_right_tmp(:) + double precision, parameter :: alpha = 0.1d0 + integer, allocatable :: index_good_state_array(:) + integer, allocatable :: iorder(:) + logical, allocatable :: good_state_array(:) + double precision, allocatable :: reigvec_tc_bi_orth_tmp(:,:), leigvec_tc_bi_orth_tmp(:,:),eigval_right_tmp(:) double precision, allocatable :: s2_values_tmp(:), H_prime(:,:), expect_e(:) - double precision, parameter :: alpha = 0.1d0 - integer :: i_good_state,i_other_state, i_state - integer, allocatable :: index_good_state_array(:) - logical, allocatable :: good_state_array(:) - double precision, allocatable :: coef_hf_r(:),coef_hf_l(:) - double precision, allocatable :: Stmp(:,:) - integer, allocatable :: iorder(:) + double precision, allocatable :: coef_hf_r(:),coef_hf_l(:) + double precision, allocatable :: Stmp(:,:) PROVIDE N_det N_int - if(n_det .le. N_det_max_full) then + if(N_det .le. N_det_max_full) then - allocate(reigvec_tc_bi_orth_tmp(N_det,N_det),leigvec_tc_bi_orth_tmp(N_det,N_det),eigval_right_tmp(N_det),expect_e(N_det)) - allocate (H_prime(N_det,N_det),s2_values_tmp(N_det)) + allocate(reigvec_tc_bi_orth_tmp(N_det,N_det), leigvec_tc_bi_orth_tmp(N_det,N_det), eigval_right_tmp(N_det), expect_e(N_det)) + allocate(H_prime(N_det,N_det), s2_values_tmp(N_det)) H_prime(1:N_det,1:N_det) = htilde_matrix_elmt_bi_ortho(1:N_det,1:N_det) if(s2_eig) then H_prime(1:N_det,1:N_det) += alpha * S2_matrix_all_dets(1:N_det,1:N_det) - do j=1,N_det + do j = 1, N_det H_prime(j,j) = H_prime(j,j) - alpha*expected_s2 enddo endif diff --git a/src/tc_bi_ortho/tc_hmat.irp.f b/src/tc_bi_ortho/tc_hmat.irp.f index ec072531..4c21b5bd 100644 --- a/src/tc_bi_ortho/tc_hmat.irp.f +++ b/src/tc_bi_ortho/tc_hmat.irp.f @@ -31,7 +31,9 @@ END_PROVIDER - BEGIN_PROVIDER [double precision, htilde_matrix_elmt_bi_ortho_tranp, (N_det,N_det)] +! --- + +BEGIN_PROVIDER [double precision, htilde_matrix_elmt_bi_ortho_tranp, (N_det,N_det)] implicit none integer ::i,j do i = 1, N_det diff --git a/src/tc_bi_ortho/tc_som.irp.f b/src/tc_bi_ortho/tc_som.irp.f index a7e4d09e..427508d2 100644 --- a/src/tc_bi_ortho/tc_som.irp.f +++ b/src/tc_bi_ortho/tc_som.irp.f @@ -12,10 +12,9 @@ program tc_som print *, ' do not forget to do tc-scf first' my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 -! my_n_pt_r_grid = 10 ! small grid for quick debug -! my_n_pt_a_grid = 26 ! small grid for quick debug + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid PROVIDE mu_erf diff --git a/src/tc_bi_ortho/test_natorb.irp.f b/src/tc_bi_ortho/test_natorb.irp.f index 54c9a827..5b8801f7 100644 --- a/src/tc_bi_ortho/test_natorb.irp.f +++ b/src/tc_bi_ortho/test_natorb.irp.f @@ -1,21 +1,34 @@ + +! --- + program test_natorb - implicit none + BEGIN_DOC -! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together with the energy. Saves the left-right wave functions at the end. + ! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together with the energy. Saves the left-right wave functions at the end. END_DOC + + implicit none + print *, 'Hello world' + my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + read_wf = .True. touch read_wf - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - call routine -! call test + + call routine() + ! call test() end -subroutine routine +! --- + +subroutine routine() + implicit none double precision, allocatable :: fock_diag(:),eigval(:),leigvec(:,:),reigvec(:,:),mat_ref(:,:) allocate(eigval(mo_num),leigvec(mo_num,mo_num),reigvec(mo_num,mo_num),fock_diag(mo_num),mat_ref(mo_num, mo_num)) diff --git a/src/tc_bi_ortho/test_normal_order.irp.f b/src/tc_bi_ortho/test_normal_order.irp.f index cb0c355c..e805eecb 100644 --- a/src/tc_bi_ortho/test_normal_order.irp.f +++ b/src/tc_bi_ortho/test_normal_order.irp.f @@ -1,19 +1,32 @@ + +! --- + program test_normal_order - implicit none + BEGIN_DOC -! TODO : Put the documentation of the program here + ! TODO : Put the documentation of the program here END_DOC + + implicit none + print *, 'Hello world' + my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + read_wf = .True. touch read_wf - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - call provide_all_three_ints_bi_ortho - call test + + call provide_all_three_ints_bi_ortho() + call test() + end +! --- + subroutine test implicit none use bitmasks ! you need to include the bitmasks_module.f90 features diff --git a/src/tc_bi_ortho/test_s2_tc.irp.f b/src/tc_bi_ortho/test_s2_tc.irp.f index 1f7bdfda..b398507a 100644 --- a/src/tc_bi_ortho/test_s2_tc.irp.f +++ b/src/tc_bi_ortho/test_s2_tc.irp.f @@ -1,14 +1,22 @@ + +! --- + program test_tc - implicit none - read_wf = .True. - my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 - read_wf = .True. - touch read_wf - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - call routine_test_s2 - call routine_test_s2_davidson + + implicit none + + my_grid_becke = .True. + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + read_wf = .True. + touch read_wf + + call routine_test_s2 + call routine_test_s2_davidson + end subroutine routine_test_s2 diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index 902f7295..b6beb65b 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -1,15 +1,24 @@ + +! --- + program tc_bi_ortho - implicit none + BEGIN_DOC -! TODO : Put the documentation of the program here + ! TODO : Put the documentation of the program here END_DOC + + implicit none + print *, 'Hello world' + my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + read_wf = .True. touch read_wf - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid ! call test_h_u0 ! call test_slater_tc_opt diff --git a/src/tc_bi_ortho/test_tc_fock.irp.f b/src/tc_bi_ortho/test_tc_fock.irp.f index b7de067f..182c03d7 100644 --- a/src/tc_bi_ortho/test_tc_fock.irp.f +++ b/src/tc_bi_ortho/test_tc_fock.irp.f @@ -1,22 +1,32 @@ + +! --- + program test_tc_fock - implicit none + BEGIN_DOC -! TODO : Put the documentation of the program here + ! TODO : Put the documentation of the program here END_DOC + + implicit none + print *, 'Hello world' + my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + read_wf = .True. touch read_wf - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid !call routine_1 !call routine_2 ! call routine_3() ! call test_3e - call routine_tot + call routine_tot + end ! --- diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index ea1503c3..29c238e1 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -262,3 +262,16 @@ doc: If |true|, use Manu IPP interface: ezfio,provider,ocaml default: True +[tc_grid1_a] +type: integer +doc: size of angular grid over r1 +interface: ezfio,provider,ocaml +default: 50 + +[tc_grid1_r] +type: integer +doc: size of radial grid over r1 +interface: ezfio,provider,ocaml +default: 30 + + diff --git a/src/tc_scf/combine_lr_tcscf.irp.f b/src/tc_scf/combine_lr_tcscf.irp.f index b257f4a5..a22614ba 100644 --- a/src/tc_scf/combine_lr_tcscf.irp.f +++ b/src/tc_scf/combine_lr_tcscf.irp.f @@ -10,8 +10,9 @@ program combine_lr_tcscf implicit none my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid bi_ortho = .True. diff --git a/src/tc_scf/minimize_tc_angles.irp.f b/src/tc_scf/minimize_tc_angles.irp.f index 5d3ff7f0..c7752930 100644 --- a/src/tc_scf/minimize_tc_angles.irp.f +++ b/src/tc_scf/minimize_tc_angles.irp.f @@ -1,17 +1,26 @@ -program print_angles - implicit none - BEGIN_DOC -! program that minimizes the angle between left- and right-orbitals when degeneracies are found in the TC-Fock matrix - END_DOC + +! --- + +program minimize_tc_angles + + BEGIN_DOC + ! program that minimizes the angle between left- and right-orbitals when degeneracies are found in the TC-Fock matrix + END_DOC + + implicit none + my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a touch my_n_pt_r_grid my_n_pt_a_grid -! call sort_by_tc_fock + + ! call sort_by_tc_fock ! TODO ! check if rotations of orbitals affect the TC energy ! and refuse the step call minimize_tc_orb_angles + end diff --git a/src/tc_scf/molden_lr_mos.irp.f b/src/tc_scf/molden_lr_mos.irp.f index e12fcd1c..b86009ee 100644 --- a/src/tc_scf/molden_lr_mos.irp.f +++ b/src/tc_scf/molden_lr_mos.irp.f @@ -11,10 +11,9 @@ program molden_lr_mos print *, 'starting ...' my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 -! my_n_pt_r_grid = 10 ! small grid for quick debug -! my_n_pt_a_grid = 26 ! small grid for quick debug + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid !call molden_lr diff --git a/src/tc_scf/print_fit_param.irp.f b/src/tc_scf/print_fit_param.irp.f index f8bcfa7f..e62f0dde 100644 --- a/src/tc_scf/print_fit_param.irp.f +++ b/src/tc_scf/print_fit_param.irp.f @@ -7,10 +7,9 @@ program print_fit_param implicit none my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 -! my_n_pt_r_grid = 10 ! small grid for quick debug -! my_n_pt_a_grid = 26 ! small grid for quick debug + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid !call create_guess diff --git a/src/tc_scf/print_tcscf_energy.irp.f b/src/tc_scf/print_tcscf_energy.irp.f index 96512cb7..05b8df23 100644 --- a/src/tc_scf/print_tcscf_energy.irp.f +++ b/src/tc_scf/print_tcscf_energy.irp.f @@ -8,16 +8,9 @@ program print_tcscf_energy print *, 'Hello world' my_grid_becke = .True. - - !my_n_pt_r_grid = 30 - !my_n_pt_a_grid = 50 - - my_n_pt_r_grid = 100 - my_n_pt_a_grid = 170 - - !my_n_pt_r_grid = 100 - !my_n_pt_a_grid = 266 - + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid call main() diff --git a/src/tc_scf/rotate_tcscf_orbitals.irp.f b/src/tc_scf/rotate_tcscf_orbitals.irp.f index 2567faf0..0f2663e5 100644 --- a/src/tc_scf/rotate_tcscf_orbitals.irp.f +++ b/src/tc_scf/rotate_tcscf_orbitals.irp.f @@ -10,8 +10,9 @@ program rotate_tcscf_orbitals implicit none my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid bi_ortho = .True. diff --git a/src/tc_scf/tc_petermann_factor.irp.f b/src/tc_scf/tc_petermann_factor.irp.f index d3722098..2e9c67e2 100644 --- a/src/tc_scf/tc_petermann_factor.irp.f +++ b/src/tc_scf/tc_petermann_factor.irp.f @@ -10,10 +10,9 @@ program tc_petermann_factor implicit none my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 -! my_n_pt_r_grid = 10 ! small grid for quick debug -! my_n_pt_a_grid = 26 ! small grid for quick debug + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid call main() diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f index 2f2d803f..e4c38741 100644 --- a/src/tc_scf/tc_scf.irp.f +++ b/src/tc_scf/tc_scf.irp.f @@ -14,14 +14,10 @@ program tc_scf my_grid_becke = .True. - !my_n_pt_r_grid = 30 - !my_n_pt_a_grid = 50 + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a - my_n_pt_r_grid = 100 - my_n_pt_a_grid = 170 - -! my_n_pt_r_grid = 10 ! small grid for quick debug -! my_n_pt_a_grid = 26 ! small grid for quick debug touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid PROVIDE mu_erf diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f index b9287d58..649d0f3e 100644 --- a/src/tc_scf/test_int.irp.f +++ b/src/tc_scf/test_int.irp.f @@ -9,10 +9,9 @@ program test_ints print *, ' starting test_ints ...' my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 -! my_n_pt_r_grid = 15 ! small grid for quick debug -! my_n_pt_a_grid = 26 ! small grid for quick debug + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid my_extra_grid_becke = .True. @@ -280,7 +279,7 @@ subroutine routine_v_ij_u_cst_mu_j1b_test do i = 1, ao_num do j = 1, ao_num array(j,i,l,k) += v_ij_u_cst_mu_j1b_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight - array_ref(j,i,l,k) += v_ij_u_cst_mu_j1b(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight + array_ref(j,i,l,k) += v_ij_u_cst_mu_j1b_fit (j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight enddo enddo enddo @@ -506,7 +505,7 @@ subroutine routine_v_ij_u_cst_mu_j1b do i = 1, ao_num do j = 1, ao_num array(j,i,l,k) += v_ij_u_cst_mu_j1b_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight - array_ref(j,i,l,k) += v_ij_u_cst_mu_j1b(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight + array_ref(j,i,l,k) += v_ij_u_cst_mu_j1b_fit (j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight enddo enddo enddo From f6d8f326c7453d272ca83d9864e4cec498a768be Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Mon, 3 Jul 2023 00:47:55 +0200 Subject: [PATCH 199/337] cycle tc-integ when beta=0 --- src/ao_many_one_e_ints/grad2_jmu_modif.irp.f | 4 ++++ src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f | 11 +++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f index 7c68de75..fda2db82 100644 --- a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f +++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f @@ -128,6 +128,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n do i_1s = 2, List_all_comb_b3_size coef = List_all_comb_b3_coef (i_1s) + if(dabs(coef) .lt. 1d-15) cycle ! beta = 0.0 beta = List_all_comb_b3_expo (i_1s) B_center(1) = List_all_comb_b3_cent(1,i_1s) B_center(2) = List_all_comb_b3_cent(2,i_1s) @@ -222,6 +223,7 @@ BEGIN_PROVIDER [double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final_ do i_1s = 2, List_all_comb_b3_size coef = List_all_comb_b3_coef (i_1s) + if(dabs(coef) .lt. 1d-15) cycle ! beta = 0.0 beta = List_all_comb_b3_expo (i_1s) B_center(1) = List_all_comb_b3_cent(1,i_1s) B_center(2) = List_all_comb_b3_cent(2,i_1s) @@ -322,6 +324,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (ao_num, ao_num, n_poin do i_1s = 2, List_all_comb_b3_size coef = List_all_comb_b3_coef (i_1s) + if(dabs(coef) .lt. 1d-15) cycle ! beta = 0.0 beta = List_all_comb_b3_expo (i_1s) B_center(1) = List_all_comb_b3_cent(1,i_1s) B_center(2) = List_all_comb_b3_cent(2,i_1s) @@ -436,6 +439,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points do i_1s = 2, List_all_comb_b3_size coef = List_all_comb_b3_coef (i_1s) + if(dabs(coef) .lt. 1d-15) cycle ! beta = 0.0 beta = List_all_comb_b3_expo (i_1s) B_center(1) = List_all_comb_b3_cent(1,i_1s) B_center(2) = List_all_comb_b3_cent(2,i_1s) diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f index 18d71134..9af3f9a9 100644 --- a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f +++ b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f @@ -60,6 +60,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_po do i_1s = 2, List_all_comb_b2_size coef = List_all_comb_b2_coef (i_1s) + if(dabs(coef) .lt. 1d-15) cycle ! beta = 0.0 beta = List_all_comb_b2_expo (i_1s) B_center(1) = List_all_comb_b2_cent(1,i_1s) B_center(2) = List_all_comb_b2_cent(2,i_1s) @@ -154,6 +155,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_ do i_1s = 2, List_all_comb_b2_size coef = List_all_comb_b2_coef (i_1s) + if(dabs(coef) .lt. 1d-15) cycle ! beta = 0.0 beta = List_all_comb_b2_expo (i_1s) B_center(1) = List_all_comb_b2_cent(1,i_1s) B_center(2) = List_all_comb_b2_cent(2,i_1s) @@ -243,10 +245,6 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_fit, (ao_num, ao_num, n_poi expo_fit = expo_gauss_j_mu_x(i_fit) coef_fit = coef_gauss_j_mu_x(i_fit) -! do i_fit = ng_fit_jast, ng_fit_jast -! expo_fit = 5.0d0 -! coef_fit = 1.0d0 - ! --- coef = List_all_comb_b2_coef (1) @@ -264,6 +262,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_fit, (ao_num, ao_num, n_poi do i_1s = 2, List_all_comb_b2_size coef = List_all_comb_b2_coef (i_1s) + if(dabs(coef) .lt. 1d-15) cycle ! beta = 0.0 beta = List_all_comb_b2_expo (i_1s) B_center(1) = List_all_comb_b2_cent(1,i_1s) B_center(2) = List_all_comb_b2_cent(2,i_1s) @@ -306,6 +305,9 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_an, (ao_num, ao_num, n_poin ! ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2) u(mu, r12) ! + ! TODO + ! one subroutine for all integrals + ! END_DOC include 'constants.include.F' @@ -383,6 +385,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_an, (ao_num, ao_num, n_poin do i_1s = 2, List_all_comb_b2_size coef = List_all_comb_b2_coef (i_1s) + if(dabs(coef) .lt. 1d-15) cycle ! beta = 0.0 beta = List_all_comb_b2_expo (i_1s) B_center(1) = List_all_comb_b2_cent(1,i_1s) B_center(2) = List_all_comb_b2_cent(2,i_1s) From d911f4eee8b1569d6e34dc4ec4081031f400bcc1 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 3 Jul 2023 17:41:34 +0200 Subject: [PATCH 200/337] Rewrote Cholesky --- src/ao_two_e_ints/cholesky.irp.f | 197 ++++++++++++++++++++++++++++++- 1 file changed, 193 insertions(+), 4 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 77eb6ddc..2d2a40ab 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -1,10 +1,28 @@ -BEGIN_PROVIDER [ integer, cholesky_ao_num_guess ] +BEGIN_PROVIDER [ integer, mini_basis_size, (128) ] + implicit none + BEGIN_DOC + ! Size of the minimal basis set per element + END_DOC + + mini_basis_size(1:2) = 1 + mini_basis_size(3:4) = 2 + mini_basis_size(5:10) = 5 + mini_basis_size(11:12) = 6 + mini_basis_size(13:18) = 9 + mini_basis_size(19:20) = 13 + mini_basis_size(21:36) = 18 + mini_basis_size(37:38) = 22 + mini_basis_size(39:54) = 27 + mini_basis_size(55:) = 36 +END_PROVIDER + + BEGIN_PROVIDER [ integer, cholesky_ao_num_guess ] implicit none BEGIN_DOC ! Number of Cholesky vectors in AO basis END_DOC - cholesky_ao_num_guess = ao_num*ao_num + cholesky_ao_num_guess = ao_num*ao_num !sum(mini_basis_size(int(nucl_charge(:)))) END_PROVIDER BEGIN_PROVIDER [ integer, cholesky_ao_num ] @@ -103,8 +121,10 @@ END_PROVIDER ! Call Lapack cholesky_ao_num = cholesky_ao_num_guess - call pivoted_cholesky(ao_integrals, cholesky_ao_num, ao_cholesky_threshold, ao_num*ao_num, cholesky_ao) - print *, 'Rank: ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' +! call pivoted_cholesky(ao_integrals, cholesky_ao_num, ao_cholesky_threshold, ao_num*ao_num, cholesky_ao) + + call direct_cholesky(ao_integrals, cholesky_ao_num, ao_cholesky_threshold, ao_num*ao_num, cholesky_ao) + print *, 'Rank : ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' ! Remove mmap double precision, external :: getUnitAndOpen @@ -131,3 +151,172 @@ BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, enddo END_PROVIDER + +subroutine direct_cholesky( A, rank, tau, ndim, L) + implicit none + integer :: ndim + integer, intent(inout) :: rank + double precision, intent(inout) :: A(ndim, ndim) + double precision, intent(out) :: L(ndim, rank) + double precision, intent(in) :: tau + + double precision, parameter :: s = 1.d-2 + double precision, parameter :: dscale = 1.d0 + + double precision, allocatable :: D(:), Delta(:,:) + integer, allocatable :: Lset(:), Dset(:) + + integer :: i,j,k,m,p,q, qj, dj + integer :: N, np, nq + + double precision :: Dmax, Dmin, Qmax, f + allocate( D(ndim), Lset(ndim), Dset(ndim) ) + + L = 0.d0 + + ! 1. + do i=1,ndim + D(i) = A(i,i) + enddo + Dmax = maxval(D) +! print *, '# 1. ', D +! print *, '# 1. ', Dmax + + ! 2. + np=0 + do p=1,ndim + if ( dscale*dscale*Dmax*D(p) > tau*tau ) then + np = np+1 + Lset(np) = p + endif + enddo +! print *, '# 2. ', Lset(:np) + + ! 3. + N = 0 +! print *, '# 3. ', N + + ! 4. + i = 0 +! print *, '# 4. ', i + + ! 5. + do while (Dmax > tau) + ! a. + i = i+1 +! print *, '# 5.a ', i + + ! b. + Dmin = max(s*Dmax, tau) +! print *, '# 5.b ', Dmin + + ! c. + nq=0 + do q=1,np + if ( D(Lset(q)) > Dmin ) then + nq = nq+1 + Dset(nq) = Lset(q) + endif + enddo +! print *, '# 5.c ', Dset(:nq) + + ! d. + allocate(Delta(np,nq)) + do m=1,nq + do k=1,np + Delta(k,m) = A(Lset(k), Dset(m)) + enddo + enddo +! print *, '# 5.d ', Delta + + ! e. + do m=1,nq + do k=1,np + do p=1,N + Delta(k,m) = Delta(k,m) - L(Lset(k),p) * L(Dset(m),p) + enddo + enddo + enddo +! print *, '# 5.e ', Delta + + ! f. + Qmax = D(Dset(1)) + do q=1,nq + Qmax = max(Qmax, D(Dset(q))) + enddo +! print *, '# 5.f ', Qmax + + ! g. + j = 0 +! print *, '# 5.g ', j + + do while ( (j <= nq).and.(Qmax > Dmin) ) + ! i. + j = j+1 + rank = N+j +! print *, '# 5.h.i ', j, rank + + ! ii. + do dj=1,nq + qj = Dset(dj) + if (D(qj) == Qmax) then + exit + endif + enddo +! print *, ' # 5.h.ii ', qj, dj + + ! iii. + f = 1.d0/dsqrt(Qmax) + do p=1,np + L(Lset(p), rank) = Delta(p,dj) * f + enddo +! print *, ' # 5.h.iii ' +! do k=1,20 +! print *, L(k,1:rank) +! enddo + + ! iv. + do m=1, nq + do k=1, np + Delta(k,m) = Delta(k,m) - L(Lset(k),rank) * L(Dset(m),rank) + enddo + enddo + + do k=1, np + D(Lset(k)) = D(Lset(k)) - L(Lset(k),rank) * L(Lset(k),rank) + enddo + + Qmax = D(Dset(1)) + do q=1,np + Qmax = max(Qmax, D(Lset(q))) + enddo +! print *, '# 5.h.iv ', Delta +! print *, '# 5.h.iv ', D +! print *, '# 5.h.iv ', Qmax + + enddo + + deallocate(Delta) + + ! i. + N = N+j +! print *, '# 5.i ', N + + ! j. + Dmax = D(Lset(1)) + do p=1,np + Dmax = max(Dmax, D(Lset(p))) + enddo +! print *, '# 5.j ', Dmax + + np=0 + do p=1,ndim + if ( dscale*dscale*Dmax*D(p) > tau*tau ) then + np = np+1 + Lset(np) = p + endif + enddo +! print *, '# k. ', Lset(:np) + enddo + +end From 487e85c6aef05219eb9e716eae429b5a126600c6 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 3 Jul 2023 18:19:31 +0200 Subject: [PATCH 201/337] Cholesky OK --- src/ao_two_e_ints/cholesky.irp.f | 188 ++++++++----------------------- 1 file changed, 48 insertions(+), 140 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 2d2a40ab..6a78e9ff 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -34,106 +34,11 @@ END_PROVIDER ! = (ik|jl) = sum_a (ik|a).(a|jl) END_DOC - type(c_ptr) :: ptr - integer :: fd, i,j,k,l,m,rank - double precision, pointer :: ao_integrals(:,:,:,:) - double precision, external :: ao_two_e_integral - - ! Store AO integrals in a memory mapped file - call mmap(trim(ezfio_work_dir)//'ao_integrals', & - (/ int(ao_num,8), int(ao_num,8), int(ao_num,8), int(ao_num,8) /), & - 8, fd, .False., ptr) - call c_f_pointer(ptr, ao_integrals, (/ao_num, ao_num, ao_num, ao_num/)) - - print*, 'Providing the AO integrals (Cholesky)' - call wall_time(wall_1) - call cpu_time(cpu_1) - - ao_integrals = 0.d0 - - double precision :: integral, cpu_1, cpu_2, wall_1, wall_2 - logical, external :: ao_two_e_integral_zero - double precision, external :: get_ao_two_e_integral - - if (read_ao_two_e_integrals) then - PROVIDE ao_two_e_integrals_in_map - - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral, wall_2) - do m=0,9 - do l=1+m,ao_num,10 - !$OMP DO SCHEDULE(dynamic) - do j=1,ao_num - do k=1,ao_num - do i=1,ao_num - if (ao_two_e_integral_zero(i,j,k,l)) cycle - integral = get_ao_two_e_integral(i,j,k,l, ao_integrals_map) - ao_integrals(i,k,j,l) = integral - enddo - enddo - enddo - !$OMP END DO NOWAIT - enddo - !$OMP MASTER - call wall_time(wall_2) - print '(I10,'' % in'', 4X, F10.2, '' s.'')', (m+1) * 10, wall_2-wall_1 - !$OMP END MASTER - enddo - !$OMP END PARALLEL - - else - - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral, wall_2) - do m=0,9 - do l=1+m,ao_num,10 - !$OMP DO SCHEDULE(dynamic) - do j=1,l - do k=1,ao_num - do i=1,min(k,j) - if (ao_two_e_integral_zero(i,j,k,l)) cycle - integral = ao_two_e_integral(i,k,j,l) - ao_integrals(i,k,j,l) = integral - ao_integrals(k,i,j,l) = integral - ao_integrals(i,k,l,j) = integral - ao_integrals(k,i,l,j) = integral - ao_integrals(j,l,i,k) = integral - ao_integrals(j,l,k,i) = integral - ao_integrals(l,j,i,k) = integral - ao_integrals(l,j,k,i) = integral - enddo - enddo - enddo - !$OMP END DO NOWAIT - enddo - !$OMP MASTER - call wall_time(wall_2) - print '(I10,'' % in'', 4X, F10.2, '' s.'')', (m+1) * 10, wall_2-wall_1 - !$OMP END MASTER - enddo - !$OMP END PARALLEL - - call wall_time(wall_2) - call cpu_time(cpu_2) - print*, 'AO integrals provided:' - print*, ' cpu time :',cpu_2 - cpu_1, 's' - print*, ' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1+tiny(1.d0)), ' )' - - endif - - ! Call Lapack cholesky_ao_num = cholesky_ao_num_guess -! call pivoted_cholesky(ao_integrals, cholesky_ao_num, ao_cholesky_threshold, ao_num*ao_num, cholesky_ao) - call direct_cholesky(ao_integrals, cholesky_ao_num, ao_cholesky_threshold, ao_num*ao_num, cholesky_ao) + call direct_cholesky(cholesky_ao, ao_num*ao_num, cholesky_ao_num, ao_cholesky_threshold) print *, 'Rank : ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' - ! Remove mmap - double precision, external :: getUnitAndOpen - call munmap( & - (/ int(ao_num,8), int(ao_num,8), int(ao_num,8), int(ao_num,8) /), & - 8, fd, ptr) - open(unit=99,file=trim(ezfio_work_dir)//'ao_integrals') - close(99, status='delete') - END_PROVIDER BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, ao_num) ] @@ -152,35 +57,53 @@ BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, END_PROVIDER -subroutine direct_cholesky( A, rank, tau, ndim, L) +subroutine direct_cholesky(L, ndim, rank, tau) implicit none + BEGIN_DOC +! Cholesky-decomposed AOs. +! +! https://www.diva-portal.org/smash/get/diva2:396223/FULLTEXT01.pdf : +! Page 32, section 13.5 + END_DOC integer :: ndim - integer, intent(inout) :: rank - double precision, intent(inout) :: A(ndim, ndim) - double precision, intent(out) :: L(ndim, rank) + integer, intent(out) :: rank + double precision, intent(out) :: L(ndim, ndim) double precision, intent(in) :: tau double precision, parameter :: s = 1.d-2 double precision, parameter :: dscale = 1.d0 double precision, allocatable :: D(:), Delta(:,:) - integer, allocatable :: Lset(:), Dset(:) + integer, allocatable :: Lset(:), Dset(:), addr(:,:) integer :: i,j,k,m,p,q, qj, dj integer :: N, np, nq double precision :: Dmax, Dmin, Qmax, f - allocate( D(ndim), Lset(ndim), Dset(ndim) ) + double precision, external :: get_ao_two_e_integral - L = 0.d0 + allocate( D(ndim), Lset(ndim), Dset(ndim) ) + allocate( addr(2,ndim) ) ! 1. - do i=1,ndim - D(i) = A(i,i) + k=0 + do i=1,ao_num + do j=1,ao_num + k = k+1 + addr(1,k) = i + addr(2,k) = j + enddo enddo + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) + do i=1,ndim + D(i) = get_ao_two_e_integral(addr(1,i), addr(1,i), & + addr(2,i), addr(2,i), & + ao_integrals_map) + enddo + !$OMP END PARALLEL DO + Dmax = maxval(D) -! print *, '# 1. ', D -! print *, '# 1. ', Dmax ! 2. np=0 @@ -190,25 +113,20 @@ subroutine direct_cholesky( A, rank, tau, ndim, L) Lset(np) = p endif enddo -! print *, '# 2. ', Lset(:np) ! 3. N = 0 -! print *, '# 3. ', N - ! 4. + ! 4. i = 0 -! print *, '# 4. ', i - ! 5. + ! 5. do while (Dmax > tau) ! a. i = i+1 -! print *, '# 5.a ', i ! b. Dmin = max(s*Dmax, tau) -! print *, '# 5.b ', Dmin ! c. nq=0 @@ -218,43 +136,42 @@ subroutine direct_cholesky( A, rank, tau, ndim, L) Dset(nq) = Lset(q) endif enddo -! print *, '# 5.c ', Dset(:nq) - ! d. + ! d., e. allocate(Delta(np,nq)) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(m,k) do m=1,nq do k=1,np - Delta(k,m) = A(Lset(k), Dset(m)) + Delta(k,m) = get_ao_two_e_integral( & + addr(1,Lset(k)), & + addr(1,Dset(m)), & + addr(2,Lset(k)), & + addr(2,Dset(m)), & + ao_integrals_map) enddo - enddo -! print *, '# 5.d ', Delta - ! e. - do m=1,nq - do k=1,np - do p=1,N - Delta(k,m) = Delta(k,m) - L(Lset(k),p) * L(Dset(m),p) + do p=1,N + f = L(Dset(m),p) + do k=1,np + Delta(k,m) = Delta(k,m) - L(Lset(k),p) * f enddo enddo enddo -! print *, '# 5.e ', Delta + !$OMP END PARALLEL DO ! f. Qmax = D(Dset(1)) do q=1,nq Qmax = max(Qmax, D(Dset(q))) enddo -! print *, '# 5.f ', Qmax ! g. j = 0 -! print *, '# 5.g ', j do while ( (j <= nq).and.(Qmax > Dmin) ) ! i. j = j+1 rank = N+j -! print *, '# 5.h.i ', j, rank ! ii. do dj=1,nq @@ -263,22 +180,18 @@ subroutine direct_cholesky( A, rank, tau, ndim, L) exit endif enddo -! print *, ' # 5.h.ii ', qj, dj ! iii. f = 1.d0/dsqrt(Qmax) do p=1,np L(Lset(p), rank) = Delta(p,dj) * f enddo -! print *, ' # 5.h.iii ' -! do k=1,20 -! print *, L(k,1:rank) -! enddo ! iv. do m=1, nq + f = L(Dset(m),rank) do k=1, np - Delta(k,m) = Delta(k,m) - L(Lset(k),rank) * L(Dset(m),rank) + Delta(k,m) = Delta(k,m) - L(Lset(k),rank) * f enddo enddo @@ -290,9 +203,6 @@ subroutine direct_cholesky( A, rank, tau, ndim, L) do q=1,np Qmax = max(Qmax, D(Lset(q))) enddo -! print *, '# 5.h.iv ', Delta -! print *, '# 5.h.iv ', D -! print *, '# 5.h.iv ', Qmax enddo @@ -300,14 +210,12 @@ subroutine direct_cholesky( A, rank, tau, ndim, L) ! i. N = N+j -! print *, '# 5.i ', N ! j. Dmax = D(Lset(1)) do p=1,np Dmax = max(Dmax, D(Lset(p))) enddo -! print *, '# 5.j ', Dmax np=0 do p=1,ndim @@ -316,7 +224,7 @@ subroutine direct_cholesky( A, rank, tau, ndim, L) Lset(np) = p endif enddo -! print *, '# k. ', Lset(:np) + enddo end From 3c7a10934f51aea2b97ea3196fae6442b1c0030a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 3 Jul 2023 19:54:00 +0200 Subject: [PATCH 202/337] Accelerated Cholesky --- external/ezfio | 2 +- external/irpf90 | 2 +- external/qp2-dependencies | 2 +- src/ao_two_e_ints/cholesky.irp.f | 42 ++++++++++++++++++++------------ 4 files changed, 30 insertions(+), 18 deletions(-) diff --git a/external/ezfio b/external/ezfio index 0520b5e2..ed1df9f3 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit 0520b5e2cf70e2451c37ce5b7f2f64f6d2e5e956 +Subproject commit ed1df9f3c1f51752656ca98da5693a4119add05c diff --git a/external/irpf90 b/external/irpf90 index 0007f72f..33ca5e10 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit 0007f72f677fe7d61c5e1ed461882cb239517102 +Subproject commit 33ca5e1018f3bbb5e695e6ee558f5dac0753b271 diff --git a/external/qp2-dependencies b/external/qp2-dependencies index e0d0e02e..f40bde09 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit e0d0e02e9f5ece138d1520106954a881ab0b8db2 +Subproject commit f40bde0925808bbec0424b57bfcef1b26473a1c8 diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 6a78e9ff..dc5040be 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -73,7 +73,7 @@ subroutine direct_cholesky(L, ndim, rank, tau) double precision, parameter :: s = 1.d-2 double precision, parameter :: dscale = 1.d0 - double precision, allocatable :: D(:), Delta(:,:) + double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) integer, allocatable :: Lset(:), Dset(:), addr(:,:) integer :: i,j,k,m,p,q, qj, dj @@ -138,7 +138,16 @@ subroutine direct_cholesky(L, ndim, rank, tau) enddo ! d., e. - allocate(Delta(np,nq)) + allocate(Delta(np,nq), Ltmp_p(max(np,1),max(N,1)), Ltmp_q(max(nq,1),max(N,1))) + do k=1,N + do p=1,np + Ltmp_p(p,k) = L(Lset(p),k) + enddo + do q=1,nq + Ltmp_q(q,k) = L(Dset(q),k) + enddo + enddo + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(m,k) do m=1,nq do k=1,np @@ -149,17 +158,13 @@ subroutine direct_cholesky(L, ndim, rank, tau) addr(2,Dset(m)), & ao_integrals_map) enddo - - do p=1,N - f = L(Dset(m),p) - do k=1,np - Delta(k,m) = Delta(k,m) - L(Lset(k),p) * f - enddo - enddo enddo !$OMP END PARALLEL DO - ! f. + call dgemm('N','T',np,nq,N,-1.d0, & + Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + + ! f. Qmax = D(Dset(1)) do q=1,nq Qmax = max(Qmax, D(Dset(q))) @@ -184,19 +189,26 @@ subroutine direct_cholesky(L, ndim, rank, tau) ! iii. f = 1.d0/dsqrt(Qmax) do p=1,np - L(Lset(p), rank) = Delta(p,dj) * f + Ltmp_p(p,1) = Delta(p,dj) * f + L(Lset(p), rank) = Ltmp_p(p,1) + enddo + + do q=1,nq + Ltmp_q(q,1) = L(Dset(q), rank) enddo ! iv. +! call dger(np, nq, -1.d0, Ltmp_p, 1, Ltmp_q, 1, Delta, np) + !$OMP PARALLEL DO PRIVATE(f,m,k) do m=1, nq - f = L(Dset(m),rank) do k=1, np - Delta(k,m) = Delta(k,m) - L(Lset(k),rank) * f + Delta(k,m) = Delta(k,m) - Ltmp_p(k,1) * Ltmp_q(m,1) enddo enddo + !$OMP END PARALLEL DO do k=1, np - D(Lset(k)) = D(Lset(k)) - L(Lset(k),rank) * L(Lset(k),rank) + D(Lset(k)) = D(Lset(k)) - Ltmp_p(k,1) * Ltmp_p(k,1) enddo Qmax = D(Dset(1)) @@ -206,7 +218,7 @@ subroutine direct_cholesky(L, ndim, rank, tau) enddo - deallocate(Delta) + deallocate(Delta, Ltmp_p, Ltmp_q) ! i. N = N+j From 837ec89f1baf8cef63a045a63394edce15f2883d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 3 Jul 2023 21:04:50 +0200 Subject: [PATCH 203/337] Accelerate Cholesky --- src/ao_two_e_ints/cholesky.irp.f | 40 ++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index dc5040be..27aa1aa6 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -81,6 +81,9 @@ subroutine direct_cholesky(L, ndim, rank, tau) double precision :: Dmax, Dmin, Qmax, f double precision, external :: get_ao_two_e_integral + logical, external :: ao_two_e_integral_zero + + print *, 'Entering Cholesky' allocate( D(ndim), Lset(ndim), Dset(ndim) ) allocate( addr(2,ndim) ) @@ -139,6 +142,9 @@ subroutine direct_cholesky(L, ndim, rank, tau) ! d., e. allocate(Delta(np,nq), Ltmp_p(max(np,1),max(N,1)), Ltmp_q(max(nq,1),max(N,1))) + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q) + + !$OMP DO do k=1,N do p=1,np Ltmp_p(p,k) = L(Lset(p),k) @@ -147,10 +153,24 @@ subroutine direct_cholesky(L, ndim, rank, tau) Ltmp_q(q,k) = L(Dset(q),k) enddo enddo + !$OMP END DO NOWAIT - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(m,k) + !$OMP DO do m=1,nq do k=1,np + Delta(k,m) = 0.d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do m=1,nq + do k=1,np + if (ao_two_e_integral_zero( & + addr(1,Lset(k)), & + addr(1,Dset(m)), & + addr(2,Lset(k)), & + addr(2,Dset(m)) ) ) cycle Delta(k,m) = get_ao_two_e_integral( & addr(1,Lset(k)), & addr(1,Dset(m)), & @@ -159,7 +179,9 @@ subroutine direct_cholesky(L, ndim, rank, tau) ao_integrals_map) enddo enddo - !$OMP END PARALLEL DO + !$OMP END DO + + !$OMP END PARALLEL call dgemm('N','T',np,nq,N,-1.d0, & Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) @@ -188,28 +210,36 @@ subroutine direct_cholesky(L, ndim, rank, tau) ! iii. f = 1.d0/dsqrt(Qmax) + !$OMP PARALLEL PRIVATE(m,k) + !$OMP DO do p=1,np Ltmp_p(p,1) = Delta(p,dj) * f L(Lset(p), rank) = Ltmp_p(p,1) enddo + !$OMP END DO + !$OMP DO do q=1,nq Ltmp_q(q,1) = L(Dset(q), rank) enddo + !$OMP END DO ! iv. -! call dger(np, nq, -1.d0, Ltmp_p, 1, Ltmp_q, 1, Delta, np) - !$OMP PARALLEL DO PRIVATE(f,m,k) + !$OMP DO do m=1, nq do k=1, np Delta(k,m) = Delta(k,m) - Ltmp_p(k,1) * Ltmp_q(m,1) enddo enddo - !$OMP END PARALLEL DO + !$OMP END DO NOWAIT +! call dger(np, nq, -1.d0, Ltmp_p, 1, Ltmp_q, 1, Delta, np) + !$OMP DO do k=1, np D(Lset(k)) = D(Lset(k)) - Ltmp_p(k,1) * Ltmp_p(k,1) enddo + !$OMP END DO + !$OMP END PARALLEL Qmax = D(Dset(1)) do q=1,np From 06720f3f210bc548346f9194a8d89761aa228f35 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 4 Jul 2023 01:22:12 +0200 Subject: [PATCH 204/337] integer8 in cholesky --- external/qp2-dependencies | 2 +- src/ao_two_e_ints/cholesky.irp.f | 111 +++++++++++++++++++------------ src/utils/fast_mkl.c | 5 ++ 3 files changed, 73 insertions(+), 45 deletions(-) create mode 100644 src/utils/fast_mkl.c diff --git a/external/qp2-dependencies b/external/qp2-dependencies index f40bde09..e0d0e02e 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit f40bde0925808bbec0424b57bfcef1b26473a1c8 +Subproject commit e0d0e02e9f5ece138d1520106954a881ab0b8db2 diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 27aa1aa6..01c79d12 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -74,27 +74,31 @@ subroutine direct_cholesky(L, ndim, rank, tau) double precision, parameter :: dscale = 1.d0 double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) - integer, allocatable :: Lset(:), Dset(:), addr(:,:) + integer*8, allocatable :: Lset(:), Dset(:), addr(:,:), LDmap(:), DLmap(:) + integer*8, allocatable :: Lset_rev(:), Dset_rev(:) - integer :: i,j,k,m,p,q, qj, dj - integer :: N, np, nq + integer*8 :: i,j,k,m,p,q, qj, dj, p2, q2 + integer*8 :: N, np, nq double precision :: Dmax, Dmin, Qmax, f double precision, external :: get_ao_two_e_integral logical, external :: ao_two_e_integral_zero print *, 'Entering Cholesky' + rank = 0 - allocate( D(ndim), Lset(ndim), Dset(ndim) ) - allocate( addr(2,ndim) ) + allocate( D(ndim), Lset(ndim), LDmap(ndim), DLmap(ndim), Dset(ndim) ) + allocate( Lset_rev(ndim), Dset_rev(ndim) ) + allocate( addr(3,ndim) ) ! 1. k=0 - do i=1,ao_num - do j=1,ao_num + do j=1,ao_num + do i=1,ao_num k = k+1 addr(1,k) = i addr(2,k) = j + addr(3,k) = (i-1)*ao_num + j enddo enddo @@ -110,10 +114,12 @@ subroutine direct_cholesky(L, ndim, rank, tau) ! 2. np=0 + Lset_rev = 0 do p=1,ndim if ( dscale*dscale*Dmax*D(p) > tau*tau ) then np = np+1 Lset(np) = p + Lset_rev(p) = np endif enddo @@ -133,15 +139,21 @@ subroutine direct_cholesky(L, ndim, rank, tau) ! c. nq=0 - do q=1,np - if ( D(Lset(q)) > Dmin ) then + LDmap = 0 + DLmap = 0 + do p=1,np + if ( D(Lset(p)) > Dmin ) then nq = nq+1 - Dset(nq) = Lset(q) + Dset(nq) = Lset(p) + Dset_rev(Dset(nq)) = nq + LDmap(p) = nq + DLmap(nq) = p endif enddo ! d., e. allocate(Delta(np,nq), Ltmp_p(max(np,1),max(N,1)), Ltmp_q(max(nq,1),max(N,1))) + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q) !$OMP DO @@ -153,38 +165,47 @@ subroutine direct_cholesky(L, ndim, rank, tau) Ltmp_q(q,k) = L(Dset(q),k) enddo enddo - !$OMP END DO NOWAIT + !$OMP END DO - !$OMP DO + !$OMP DO SCHEDULE(dynamic,8) do m=1,nq - do k=1,np - Delta(k,m) = 0.d0 + + do k=1, nq + ! Apply only to (k,m) pairs both in Dset + p = DLmap(k) + q = Lset_rev(addr(3,Dset(k))) + if ((0 < q).and.(q < p)) cycle + if (ao_two_e_integral_zero( addr(1,Dset(k)), addr(1,Dset(m)), & + addr(2,Dset(k)), addr(2,Dset(m)) ) ) then + Delta(p,m) = 0.d0 + else + Delta(p,m) = get_ao_two_e_integral( addr(1,Dset(k)), addr(1,Dset(m)), & + addr(2,Dset(k)), addr(2,Dset(m)), ao_integrals_map) + endif + Delta(q,m) = Delta(p,m) enddo - enddo - !$OMP END DO - !$OMP DO - do m=1,nq do k=1,np - if (ao_two_e_integral_zero( & - addr(1,Lset(k)), & - addr(1,Dset(m)), & - addr(2,Lset(k)), & - addr(2,Dset(m)) ) ) cycle - Delta(k,m) = get_ao_two_e_integral( & - addr(1,Lset(k)), & - addr(1,Dset(m)), & - addr(2,Lset(k)), & - addr(2,Dset(m)), & - ao_integrals_map) + ! Apply only to (k,m) pairs where k is not in Dset + if (LDmap(k) /= 0) cycle + q = Lset_rev(addr(3,Lset(k))) + if ((0 < q).and.(q < k)) cycle + if (ao_two_e_integral_zero( addr(1,Lset(k)), addr(1,Dset(m)), & + addr(2,Lset(k)), addr(2,Dset(m)) ) ) then + Delta(k,m) = 0.d0 + else + Delta(k,m) = get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)), & + addr(2,Lset(k)), addr(2,Dset(m)), ao_integrals_map) + endif + Delta(q,m) = Delta(k,m) enddo enddo !$OMP END DO !$OMP END PARALLEL - call dgemm('N','T',np,nq,N,-1.d0, & - Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + call dgemm('N','T', int(np,4), int(nq,4), int(N,4), -1.d0, & + Ltmp_p, int(np,4), Ltmp_q, int(nq,4), 1.d0, Delta, int(np,4)) ! f. Qmax = D(Dset(1)) @@ -193,11 +214,11 @@ subroutine direct_cholesky(L, ndim, rank, tau) enddo ! g. - j = 0 - do while ( (j <= nq).and.(Qmax > Dmin) ) + do j=1,nq + + if ( (Qmax <= Dmin).or.(N+j > ndim) ) exit ! i. - j = j+1 rank = N+j ! ii. @@ -208,13 +229,17 @@ subroutine direct_cholesky(L, ndim, rank, tau) endif enddo + L(:, rank) = 0.d0 + ! iii. f = 1.d0/dsqrt(Qmax) - !$OMP PARALLEL PRIVATE(m,k) + + !$OMP PARALLEL PRIVATE(m,p,q,k) DEFAULT(shared) !$OMP DO do p=1,np Ltmp_p(p,1) = Delta(p,dj) * f L(Lset(p), rank) = Ltmp_p(p,1) + D(Lset(p)) = D(Lset(p)) - Ltmp_p(p,1) * Ltmp_p(p,1) enddo !$OMP END DO @@ -223,22 +248,17 @@ subroutine direct_cholesky(L, ndim, rank, tau) Ltmp_q(q,1) = L(Dset(q), rank) enddo !$OMP END DO - + ! iv. - !$OMP DO + + !$OMP DO SCHEDULE(static) do m=1, nq do k=1, np Delta(k,m) = Delta(k,m) - Ltmp_p(k,1) * Ltmp_q(m,1) enddo enddo - !$OMP END DO NOWAIT -! call dger(np, nq, -1.d0, Ltmp_p, 1, Ltmp_q, 1, Delta, np) - - !$OMP DO - do k=1, np - D(Lset(k)) = D(Lset(k)) - Ltmp_p(k,1) * Ltmp_p(k,1) - enddo !$OMP END DO + !$OMP END PARALLEL Qmax = D(Dset(1)) @@ -247,6 +267,7 @@ subroutine direct_cholesky(L, ndim, rank, tau) enddo enddo + print *, Qmax deallocate(Delta, Ltmp_p, Ltmp_q) @@ -260,10 +281,12 @@ subroutine direct_cholesky(L, ndim, rank, tau) enddo np=0 + Lset_rev = 0 do p=1,ndim if ( dscale*dscale*Dmax*D(p) > tau*tau ) then np = np+1 Lset(np) = p + Lset_rev(p) = np endif enddo diff --git a/src/utils/fast_mkl.c b/src/utils/fast_mkl.c new file mode 100644 index 00000000..aa1f82f1 --- /dev/null +++ b/src/utils/fast_mkl.c @@ -0,0 +1,5 @@ +int mkl_serv_intel_cpu_true() { + return 1; +} + + From 6a53e44e9bed0bf6aa40f24c1fc13a25889ef727 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 4 Jul 2023 01:43:44 +0200 Subject: [PATCH 205/337] Fast MKL on AMD --- src/ezfio_files/NEED | 1 + src/ezfio_files/ezfio.irp.f | 7 ++++++- src/utils/c_functions.f90 | 7 ++++++- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/ezfio_files/NEED b/src/ezfio_files/NEED index d06d604c..1766924f 100644 --- a/src/ezfio_files/NEED +++ b/src/ezfio_files/NEED @@ -1,2 +1,3 @@ mpi zmq +utils diff --git a/src/ezfio_files/ezfio.irp.f b/src/ezfio_files/ezfio.irp.f index 4f53b173..e18b2378 100644 --- a/src/ezfio_files/ezfio.irp.f +++ b/src/ezfio_files/ezfio.irp.f @@ -7,6 +7,8 @@ BEGIN_PROVIDER [ character*(1024), ezfio_filename ] PROVIDE mpi_initialized + integer :: i + ! Get the QPACKAGE_INPUT environment variable call getenv('QPACKAGE_INPUT',ezfio_filename) if (ezfio_filename == '') then @@ -44,11 +46,14 @@ BEGIN_PROVIDER [ character*(1024), ezfio_filename ] END_PROVIDER BEGIN_PROVIDER [ character*(1024), ezfio_work_dir ] + use c_functions implicit none BEGIN_DOC ! EZFIO/work/ END_DOC - call ezfio_set_work_empty(.False.) + logical :: b + b = mkl_serv_intel_cpu_true() /= 1 + call ezfio_set_work_empty(b) ezfio_work_dir = trim(ezfio_filename)//'/work/' END_PROVIDER diff --git a/src/utils/c_functions.f90 b/src/utils/c_functions.f90 index 65d4ad62..a9c8900b 100644 --- a/src/utils/c_functions.f90 +++ b/src/utils/c_functions.f90 @@ -57,6 +57,12 @@ module c_functions end subroutine sscanf_sd_c end interface + interface + integer(kind=c_int) function mkl_serv_intel_cpu_true() bind(C) + use iso_c_binding + end function + end interface + contains integer function atoi(a) @@ -131,4 +137,3 @@ subroutine usleep(us) call usleep_c(u) end subroutine usleep - From faf43331edb20391a10ec6cb85a354d471f1612c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 4 Jul 2023 01:46:49 +0200 Subject: [PATCH 206/337] Fix segfault in CC --- src/ccsd/ccsd_t_space_orb_stoch.irp.f | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f index 31fe67ce..770d629a 100644 --- a/src/ccsd/ccsd_t_space_orb_stoch.irp.f +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -332,7 +332,10 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ print '(A)', ' +----------------------+--------------+----------+' print '(A)', '' - deallocate(X_vovv,X_ooov,T_voov,T_oovv) + deallocate(X_vovv) + deallocate(X_ooov) + deallocate(T_voov) + deallocate(T_oovv) end From 9b0c270662c35f856ae98f4832a13d39dca59c8c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 4 Jul 2023 10:46:05 +0200 Subject: [PATCH 207/337] Block cholesky --- src/ao_two_e_ints/cholesky.irp.f | 46 +++++++++++++++++++-------- src/ccsd/ccsd_t_space_orb_stoch.irp.f | 4 ++- 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 01c79d12..f26a2729 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -22,7 +22,8 @@ END_PROVIDER ! Number of Cholesky vectors in AO basis END_DOC - cholesky_ao_num_guess = ao_num*ao_num !sum(mini_basis_size(int(nucl_charge(:)))) + cholesky_ao_num_guess = ao_num*ao_num + cholesky_ao_num_guess = 2* ao_num * sum(mini_basis_size(int(nucl_charge(:)))) END_PROVIDER BEGIN_PROVIDER [ integer, cholesky_ao_num ] @@ -84,6 +85,8 @@ subroutine direct_cholesky(L, ndim, rank, tau) double precision, external :: get_ao_two_e_integral logical, external :: ao_two_e_integral_zero + integer :: block_size, iblock + print *, 'Entering Cholesky' rank = 0 @@ -152,7 +155,10 @@ subroutine direct_cholesky(L, ndim, rank, tau) enddo ! d., e. - allocate(Delta(np,nq), Ltmp_p(max(np,1),max(N,1)), Ltmp_q(max(nq,1),max(N,1))) + block_size = max(N,32) + allocate(Delta(np,nq), & + Ltmp_p(max(np,1),block_size), & + Ltmp_q(max(nq,1),block_size) ) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q) @@ -215,12 +221,19 @@ subroutine direct_cholesky(L, ndim, rank, tau) ! g. + iblock = 0 do j=1,nq if ( (Qmax <= Dmin).or.(N+j > ndim) ) exit ! i. rank = N+j + if (iblock == block_size) then + call dgemm('N','T',np,nq,block_size,-1.d0, & + Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + iblock = 0 + endif + ! ii. do dj=1,nq qj = Dset(dj) @@ -231,33 +244,40 @@ subroutine direct_cholesky(L, ndim, rank, tau) L(:, rank) = 0.d0 + iblock = iblock+1 + do p=1,np + Ltmp_p(p,iblock) = Delta(p,dj) + enddo + call dgemv('N', np, iblock-1, -1.d0, Ltmp_p, np, Ltmp_q(dj,1), nq, 1.d0, & + Ltmp_p(1,iblock), 1) + ! iii. f = 1.d0/dsqrt(Qmax) !$OMP PARALLEL PRIVATE(m,p,q,k) DEFAULT(shared) !$OMP DO do p=1,np - Ltmp_p(p,1) = Delta(p,dj) * f - L(Lset(p), rank) = Ltmp_p(p,1) - D(Lset(p)) = D(Lset(p)) - Ltmp_p(p,1) * Ltmp_p(p,1) + Ltmp_p(p,iblock) = Ltmp_p(p,iblock) * f + L(Lset(p), rank) = Ltmp_p(p,iblock) + D(Lset(p)) = D(Lset(p)) - Ltmp_p(p,iblock) * Ltmp_p(p,iblock) enddo !$OMP END DO !$OMP DO do q=1,nq - Ltmp_q(q,1) = L(Dset(q), rank) + Ltmp_q(q,iblock) = L(Dset(q), rank) enddo !$OMP END DO ! iv. - !$OMP DO SCHEDULE(static) - do m=1, nq - do k=1, np - Delta(k,m) = Delta(k,m) - Ltmp_p(k,1) * Ltmp_q(m,1) - enddo - enddo - !$OMP END DO +! !$OMP DO SCHEDULE(static) +! do m=1, nq +! do k=1, np +! Delta(k,m) = Delta(k,m) - Ltmp_p(k,iblock) * Ltmp_q(m,iblock) +! enddo +! enddo +! !$OMP END DO !$OMP END PARALLEL diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f index 770d629a..dbbed19e 100644 --- a/src/ccsd/ccsd_t_space_orb_stoch.irp.f +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -94,6 +94,7 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ enddo !$OMP END DO nowait + !$OMP BARRIER !$OMP END PARALLEL double precision, external :: ccsd_t_task_aba @@ -280,9 +281,10 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ call wall_time(t01) if ((t01-t00 > 1.0d0).or.(imin >= Nabc)) then - t00 = t01 !$OMP TASKWAIT + call wall_time(t01) + t00 = t01 double precision :: ET, ET2 double precision :: energy_stoch, energy_det From 0242e9c37634ec593c3dcd3dd37d5c4a18ec3b69 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 4 Jul 2023 22:17:31 +0200 Subject: [PATCH 208/337] Changed formats E to ES --- config/ifort_2021_debug.cfg | 66 ++++++ src/ao_two_e_ints/cholesky.irp.f | 192 ++++++++++-------- src/ccsd/ccsd_space_orb_sub.irp.f | 4 +- src/ccsd/ccsd_spin_orb_sub.irp.f | 4 +- src/ccsd/ccsd_t_space_orb_stoch.irp.f | 12 +- src/cipsi/pt2_stoch_routines.irp.f | 2 +- .../dav_diag_dressed_ext_rout.irp.f | 2 +- .../dav_double_dress_ext_rout.irp.f | 2 +- .../dav_dressed_ext_rout.irp.f | 2 +- src/dav_general_mat/dav_ext_rout.irp.f | 2 +- src/dav_general_mat/dav_general.irp.f | 2 +- src/davidson/diagonalization_h_dressed.irp.f | 2 +- .../diagonalization_hcsf_dressed.irp.f | 2 +- .../diagonalization_hs2_dressed.irp.f | 2 +- .../diagonalization_nonsym_h_dressed.irp.f | 2 +- src/determinants/dipole_moments.irp.f | 6 +- src/ezfio_files/ezfio.irp.f | 2 +- src/mo_optimization/first_gradient_opt.irp.f | 2 +- src/tc_bi_ortho/print_tc_dump.irp.f | 10 +- src/tc_scf/molden_lr_mos.irp.f | 14 +- src/tools/molden.irp.f | 4 +- src/tools/print_ci_vectors.irp.f | 2 +- src/utils/format_w_error.irp.f | 2 +- .../rotation_matrix_iterative.irp.f | 4 +- .../trust_region_optimal_lambda.irp.f | 6 +- 25 files changed, 214 insertions(+), 136 deletions(-) create mode 100644 config/ifort_2021_debug.cfg diff --git a/config/ifort_2021_debug.cfg b/config/ifort_2021_debug.cfg new file mode 100644 index 00000000..d70b1465 --- /dev/null +++ b/config/ifort_2021_debug.cfg @@ -0,0 +1,66 @@ +# Common flags +############## +# +# -mkl=[parallel|sequential] : Use the MKL library +# --ninja : Allow the utilisation of ninja. It is mandatory ! +# --align=32 : Align all provided arrays on a 32-byte boundary +# +[COMMON] +FC : ifort -fpic +LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps +IRPF90 : irpf90 +IRPF90_FLAGS : --ninja --align=32 --assert -DINTEL + +# Global options +################ +# +# 1 : Activate +# 0 : Deactivate +# +[OPTION] +MODE : DEBUG ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +CACHE : 0 ; Enable cache_compile.py +OPENMP : 1 ; Append OpenMP flags + +# Optimization flags +#################### +# +# -xHost : Compile a binary optimized for the current architecture +# -O2 : O3 not better than O2. +# -ip : Inter-procedural optimizations +# -ftz : Flushes denormal results to zero +# +[OPT] +FC : -traceback +FCFLAGS : -msse4.2 -O2 -ip -ftz -g + + +# Profiling flags +################# +# +[PROFILE] +FC : -p -g +FCFLAGS : -msse4.2 -O2 -ip -ftz + + +# Debugging flags +################# +# +# -traceback : Activate backtrace on runtime +# -fpe0 : All floating point exaceptions +# -C : Checks uninitialized variables, array subscripts, etc... +# -g : Extra debugging information +# -msse4.2 : Valgrind needs a very simple x86 executable +# +[DEBUG] +FC : -g -traceback +FCFLAGS : -msse4.2 -check all -debug all -fpe-all=0 -implicitnone + + +# OpenMP flags +################# +# +[OPENMP] +FC : -qopenmp +IRPF90_FLAGS : --openmp + diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index f26a2729..18180efb 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -1,47 +1,3 @@ -BEGIN_PROVIDER [ integer, mini_basis_size, (128) ] - implicit none - BEGIN_DOC - ! Size of the minimal basis set per element - END_DOC - - mini_basis_size(1:2) = 1 - mini_basis_size(3:4) = 2 - mini_basis_size(5:10) = 5 - mini_basis_size(11:12) = 6 - mini_basis_size(13:18) = 9 - mini_basis_size(19:20) = 13 - mini_basis_size(21:36) = 18 - mini_basis_size(37:38) = 22 - mini_basis_size(39:54) = 27 - mini_basis_size(55:) = 36 -END_PROVIDER - - BEGIN_PROVIDER [ integer, cholesky_ao_num_guess ] - implicit none - BEGIN_DOC - ! Number of Cholesky vectors in AO basis - END_DOC - - cholesky_ao_num_guess = ao_num*ao_num - cholesky_ao_num_guess = 2* ao_num * sum(mini_basis_size(int(nucl_charge(:)))) -END_PROVIDER - - BEGIN_PROVIDER [ integer, cholesky_ao_num ] -&BEGIN_PROVIDER [ double precision, cholesky_ao, (ao_num, ao_num, cholesky_ao_num_guess) ] - use mmap_module - implicit none - BEGIN_DOC - ! Cholesky vectors in AO basis: (ik|a): - ! = (ik|jl) = sum_a (ik|a).(a|jl) - END_DOC - - cholesky_ao_num = cholesky_ao_num_guess - - call direct_cholesky(cholesky_ao, ao_num*ao_num, cholesky_ao_num, ao_cholesky_threshold) - print *, 'Rank : ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' - -END_PROVIDER - BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, ao_num) ] implicit none BEGIN_DOC @@ -58,36 +14,55 @@ BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, END_PROVIDER -subroutine direct_cholesky(L, ndim, rank, tau) +BEGIN_PROVIDER [ integer, cholesky_ao_num ] +&BEGIN_PROVIDER [ double precision, cholesky_ao, (ao_num, ao_num, 1) ] implicit none BEGIN_DOC -! Cholesky-decomposed AOs. -! -! https://www.diva-portal.org/smash/get/diva2:396223/FULLTEXT01.pdf : -! Page 32, section 13.5 + ! Cholesky vectors in AO basis: (ik|a): + ! = (ik|jl) = sum_a (ik|a).(a|jl) + ! + ! Last dimension of cholesky_ao is cholesky_ao_num END_DOC - integer :: ndim - integer, intent(out) :: rank - double precision, intent(out) :: L(ndim, ndim) - double precision, intent(in) :: tau + + integer :: rank, ndim + double precision :: tau + double precision, pointer :: L(:,:), L_old(:,:) + double precision, parameter :: s = 1.d-2 double precision, parameter :: dscale = 1.d0 double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) - integer*8, allocatable :: Lset(:), Dset(:), addr(:,:), LDmap(:), DLmap(:) - integer*8, allocatable :: Lset_rev(:), Dset_rev(:) + integer, allocatable :: Lset(:), Dset(:), addr(:,:), LDmap(:), DLmap(:) + integer, allocatable :: Lset_rev(:), Dset_rev(:) - integer*8 :: i,j,k,m,p,q, qj, dj, p2, q2 - integer*8 :: N, np, nq + integer :: i,j,k,m,p,q, qj, dj, p2, q2 + integer :: N, np, nq double precision :: Dmax, Dmin, Qmax, f double precision, external :: get_ao_two_e_integral logical, external :: ao_two_e_integral_zero - integer :: block_size, iblock + integer :: block_size, iblock, ierr + + PROVIDE ao_two_e_integrals_in_map + deallocate(cholesky_ao) + + ndim = ao_num*ao_num + tau = ao_cholesky_threshold + + + allocate(L(ndim,1)) + + print *, '' + print *, 'Cholesky decomposition of AO integrals' + print *, '======================================' + print *, '' + print *, '============ =============' + print *, ' Rank Threshold' + print *, '============ =============' + - print *, 'Entering Cholesky' rank = 0 allocate( D(ndim), Lset(ndim), LDmap(ndim), DLmap(ndim), Dset(ndim) ) @@ -155,10 +130,40 @@ subroutine direct_cholesky(L, ndim, rank, tau) enddo ! d., e. - block_size = max(N,32) - allocate(Delta(np,nq), & - Ltmp_p(max(np,1),block_size), & - Ltmp_q(max(nq,1),block_size) ) + block_size = max(N,24) + + L_old => L + allocate(L(ndim,rank+nq), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': allocation failed : (Delta(np,nq))' + stop -1 + endif + + !$OMP PARALLEL DO PRIVATE(k) + do k=1,rank + L(:,k) = L_old(:,k) + enddo + !$OMP END PARALLEL DO + + deallocate(L_old) + + allocate(Delta(np,nq), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': allocation failed : (Delta(np,nq))' + stop -1 + endif + + allocate(Ltmp_p(np,block_size), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': allocation failed : (Ltmp_p(np,block_size))' + stop -1 + endif + + allocate(Ltmp_q(nq,block_size), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': allocation failed : (Ltmp_q(nq,block_size))' + stop -1 + endif !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q) @@ -176,19 +181,18 @@ subroutine direct_cholesky(L, ndim, rank, tau) !$OMP DO SCHEDULE(dynamic,8) do m=1,nq + Delta(:,m) = 0.d0 do k=1, nq ! Apply only to (k,m) pairs both in Dset p = DLmap(k) q = Lset_rev(addr(3,Dset(k))) if ((0 < q).and.(q < p)) cycle - if (ao_two_e_integral_zero( addr(1,Dset(k)), addr(1,Dset(m)), & + if (.not.ao_two_e_integral_zero( addr(1,Dset(k)), addr(1,Dset(m)), & addr(2,Dset(k)), addr(2,Dset(m)) ) ) then - Delta(p,m) = 0.d0 - else Delta(p,m) = get_ao_two_e_integral( addr(1,Dset(k)), addr(1,Dset(m)), & addr(2,Dset(k)), addr(2,Dset(m)), ao_integrals_map) + Delta(q,m) = Delta(p,m) endif - Delta(q,m) = Delta(p,m) enddo do k=1,np @@ -196,22 +200,22 @@ subroutine direct_cholesky(L, ndim, rank, tau) if (LDmap(k) /= 0) cycle q = Lset_rev(addr(3,Lset(k))) if ((0 < q).and.(q < k)) cycle - if (ao_two_e_integral_zero( addr(1,Lset(k)), addr(1,Dset(m)), & + if (.not.ao_two_e_integral_zero( addr(1,Lset(k)), addr(1,Dset(m)), & addr(2,Lset(k)), addr(2,Dset(m)) ) ) then - Delta(k,m) = 0.d0 - else Delta(k,m) = get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)), & addr(2,Lset(k)), addr(2,Dset(m)), ao_integrals_map) + Delta(q,m) = Delta(k,m) endif - Delta(q,m) = Delta(k,m) enddo enddo !$OMP END DO !$OMP END PARALLEL - call dgemm('N','T', int(np,4), int(nq,4), int(N,4), -1.d0, & - Ltmp_p, int(np,4), Ltmp_q, int(nq,4), 1.d0, Delta, int(np,4)) + if (N>0) then + call dgemm('N','T', np, nq, N, -1.d0, & + Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + endif ! f. Qmax = D(Dset(1)) @@ -242,14 +246,18 @@ subroutine direct_cholesky(L, ndim, rank, tau) endif enddo - L(:, rank) = 0.d0 + L(1:ndim, rank) = 0.d0 iblock = iblock+1 do p=1,np Ltmp_p(p,iblock) = Delta(p,dj) enddo - call dgemv('N', np, iblock-1, -1.d0, Ltmp_p, np, Ltmp_q(dj,1), nq, 1.d0, & + + ! iv. + if (iblock > 1) then + call dgemv('N', np, iblock-1, -1.d0, Ltmp_p, np, Ltmp_q(dj,1), nq, 1.d0, & Ltmp_p(1,iblock), 1) + endif ! iii. f = 1.d0/dsqrt(Qmax) @@ -269,27 +277,20 @@ subroutine direct_cholesky(L, ndim, rank, tau) enddo !$OMP END DO - ! iv. - -! !$OMP DO SCHEDULE(static) -! do m=1, nq -! do k=1, np -! Delta(k,m) = Delta(k,m) - Ltmp_p(k,iblock) * Ltmp_q(m,iblock) -! enddo -! enddo -! !$OMP END DO - !$OMP END PARALLEL Qmax = D(Dset(1)) - do q=1,np - Qmax = max(Qmax, D(Lset(q))) + do q=1,nq + Qmax = max(Qmax, D(Dset(q))) enddo enddo - print *, Qmax - deallocate(Delta, Ltmp_p, Ltmp_q) + print '(I10, 4X, ES12.3)', rank, Qmax + + deallocate(Delta, stat=ierr) + deallocate(Ltmp_p, stat=ierr) + deallocate(Ltmp_q, stat=ierr) ! i. N = N+j @@ -312,4 +313,15 @@ subroutine direct_cholesky(L, ndim, rank, tau) enddo -end + allocate(cholesky_ao(ao_num,ao_num,rank)) + call dcopy(ndim*rank, L, 1, cholesky_ao, 1) + deallocate(L) + cholesky_ao_num = rank + + print *, '============ =============' + print *, '' + print *, 'Rank : ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' + print *, '' + +END_PROVIDER + diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 40c57188..d23073b8 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -112,7 +112,7 @@ subroutine run_ccsd_space_orb ! Energy call ccsd_energy_space(nO,nV,tau,t1,energy) - write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,1pE10.2,A3,1pE10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' + write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' nb_iter = nb_iter + 1 if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then @@ -132,7 +132,7 @@ subroutine run_ccsd_space_orb print*,'' write(*,'(A15,F18.12,A3)') ' E(CCSD) = ', uncorr_energy+energy, ' Ha' write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha' - write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r + write(*,'(A15,ES10.2,A3)')' Conv = ', max_r print*,'' if (write_amplitudes) then diff --git a/src/ccsd/ccsd_spin_orb_sub.irp.f b/src/ccsd/ccsd_spin_orb_sub.irp.f index a267cc45..09d6a0fe 100644 --- a/src/ccsd/ccsd_spin_orb_sub.irp.f +++ b/src/ccsd/ccsd_spin_orb_sub.irp.f @@ -241,7 +241,7 @@ subroutine run_ccsd_spin_orb call ccsd_energy_spin(nO,nV,t1,t2,F_ov,v_oovv,energy) call wall_time(tfi) - write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,1pE10.2,A3,1pE10.2,A2)') ' | ',nb_iter,' | ', & + write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', & uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' if (cc_dev) then print*,'Total:',tfi-tbi,'s' @@ -266,7 +266,7 @@ subroutine run_ccsd_spin_orb print*,'' write(*,'(A15,F18.12,A3)') ' E(CCSD) = ', uncorr_energy+energy, ' Ha' write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha' - write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r + write(*,'(A15,ES10.2,A3)')' Conv = ', max_r print*,'' if (write_amplitudes) then diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f index dbbed19e..13fa4f1a 100644 --- a/src/ccsd/ccsd_t_space_orb_stoch.irp.f +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -210,9 +210,9 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ Pabc(:) = 1.d0/Pabc(:) print '(A)', '' - print '(A)', ' +----------------------+--------------+----------+' - print '(A)', ' | E(CCSD(T)) | Error | % |' - print '(A)', ' +----------------------+--------------+----------+' + print '(A)', ' ======================= ============== ==========' + print '(A)', ' E(CCSD(T)) Error % ' + print '(A)', ' ======================= ============== ==========' call wall_time(t00) @@ -257,7 +257,7 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ if (imin >= bounds(2,isample)) then cycle endif - ieta = binary_search(waccu,(eta + dble(isample-1))/dble(nbuckets),Nabc) + ieta = binary_search(waccu,(eta + dble(isample-1))/dble(nbuckets),Nabc)+1 if (sampled(ieta) == -1_8) then sampled(ieta) = 0_8 @@ -324,14 +324,14 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ energy = energy_det + energy_stoch - print '('' | '',F20.8, '' | '', E12.4,'' | '', F8.2,'' |'')', eccsd+energy, dsqrt(variance/(norm-1.d0)), 100.*real(Ncomputed)/real(Nabc) + print '('' '',F20.8, '' '', ES12.4,'' '', F8.2,'' '')', eccsd+energy, dsqrt(variance/(norm-1.d0)), 100.*real(Ncomputed)/real(Nabc) endif !$OMP END MASTER if (imin >= Nabc) exit enddo !$OMP END PARALLEL - print '(A)', ' +----------------------+--------------+----------+' + print '(A)', ' ======================= ============== ========== ' print '(A)', '' deallocate(X_vovv) diff --git a/src/cipsi/pt2_stoch_routines.irp.f b/src/cipsi/pt2_stoch_routines.irp.f index 7909007a..3b048c14 100644 --- a/src/cipsi/pt2_stoch_routines.irp.f +++ b/src/cipsi/pt2_stoch_routines.irp.f @@ -591,7 +591,7 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_ time-time0 ! Old print - !print '(I10, X, F12.6, X, G10.3, X, F10.6, X, G10.3, X, F10.6, X, G10.3, X, F10.1,1pE16.6,1pE16.6)', c, & + !print '(I10, X, F12.6, X, G10.3, X, F10.6, X, G10.3, X, F10.6, X, G10.3, X, F10.1,ES16.6,ES16.6)', c, & ! pt2_data % pt2(pt2_stoch_istate) +E, & ! pt2_data_err % pt2(pt2_stoch_istate), & ! pt2_data % variance(pt2_stoch_istate), & diff --git a/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f b/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f index 73608720..0dc939cb 100644 --- a/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f +++ b/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f @@ -331,7 +331,7 @@ subroutine davidson_general_ext_rout_diag_dressed(u_in,H_jj,Dress_jj,energies,sz !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/dav_general_mat/dav_double_dress_ext_rout.irp.f b/src/dav_general_mat/dav_double_dress_ext_rout.irp.f index e59d21d1..24f4fa10 100644 --- a/src/dav_general_mat/dav_double_dress_ext_rout.irp.f +++ b/src/dav_general_mat/dav_double_dress_ext_rout.irp.f @@ -405,7 +405,7 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/dav_general_mat/dav_dressed_ext_rout.irp.f b/src/dav_general_mat/dav_dressed_ext_rout.irp.f index c045aa1a..cedaaf0a 100644 --- a/src/dav_general_mat/dav_dressed_ext_rout.irp.f +++ b/src/dav_general_mat/dav_dressed_ext_rout.irp.f @@ -398,7 +398,7 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_di !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/dav_general_mat/dav_ext_rout.irp.f b/src/dav_general_mat/dav_ext_rout.irp.f index 2621e3a9..deb7e3a9 100644 --- a/src/dav_general_mat/dav_ext_rout.irp.f +++ b/src/dav_general_mat/dav_ext_rout.irp.f @@ -316,7 +316,7 @@ subroutine davidson_general_ext_rout(u_in,H_jj,energies,sze,N_st,N_st_diag_in,co !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/dav_general_mat/dav_general.irp.f b/src/dav_general_mat/dav_general.irp.f index cd9124e6..9940bf1e 100644 --- a/src/dav_general_mat/dav_general.irp.f +++ b/src/dav_general_mat/dav_general.irp.f @@ -327,7 +327,7 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/davidson/diagonalization_h_dressed.irp.f b/src/davidson/diagonalization_h_dressed.irp.f index 26853df9..b7179c18 100644 --- a/src/davidson/diagonalization_h_dressed.irp.f +++ b/src/davidson/diagonalization_h_dressed.irp.f @@ -457,7 +457,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_st_dia !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/davidson/diagonalization_hcsf_dressed.irp.f b/src/davidson/diagonalization_hcsf_dressed.irp.f index 0c3c6f92..fa8aff80 100644 --- a/src/davidson/diagonalization_hcsf_dressed.irp.f +++ b/src/davidson/diagonalization_hcsf_dressed.irp.f @@ -477,7 +477,7 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index 45258c1c..7b559925 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -611,7 +611,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter-1, to_print(1:3,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,ES11.3))') iter-1, to_print(1:3,1:N_st) endif ! Check convergence diff --git a/src/davidson/diagonalization_nonsym_h_dressed.irp.f b/src/davidson/diagonalization_nonsym_h_dressed.irp.f index 3ff060a6..96ca84ab 100644 --- a/src/davidson/diagonalization_nonsym_h_dressed.irp.f +++ b/src/davidson/diagonalization_nonsym_h_dressed.irp.f @@ -436,7 +436,7 @@ subroutine davidson_diag_nonsym_hjj(dets_in, u_in, H_jj, energies, dim_in, sze, !don't print continue else - write(*, '(1X, I3, 1X, 100(1X, F16.10, 1X, E11.3))') iter-1, to_print(1:2,1:N_st) + write(*, '(1X, I3, 1X, 100(1X, F16.10, 1X, ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/determinants/dipole_moments.irp.f b/src/determinants/dipole_moments.irp.f index 06fca0cd..e445c56b 100644 --- a/src/determinants/dipole_moments.irp.f +++ b/src/determinants/dipole_moments.irp.f @@ -66,9 +66,9 @@ END_PROVIDER write(*,'(i16)',advance='no') i end do write(*,*) '' - write(*,'(A17,100(1pE16.8))') 'x_dipole_moment = ',x_dipole_moment - write(*,'(A17,100(1pE16.8))') 'y_dipole_moment = ',y_dipole_moment - write(*,'(A17,100(1pE16.8))') 'z_dipole_moment = ',z_dipole_moment + write(*,'(A17,100(ES16.8))') 'x_dipole_moment = ',x_dipole_moment + write(*,'(A17,100(ES16.8))') 'y_dipole_moment = ',y_dipole_moment + write(*,'(A17,100(ES16.8))') 'z_dipole_moment = ',z_dipole_moment !print*, 'x_dipole_moment = ',x_dipole_moment !print*, 'y_dipole_moment = ',y_dipole_moment !print*, 'z_dipole_moment = ',z_dipole_moment diff --git a/src/ezfio_files/ezfio.irp.f b/src/ezfio_files/ezfio.irp.f index e18b2378..7e414a04 100644 --- a/src/ezfio_files/ezfio.irp.f +++ b/src/ezfio_files/ezfio.irp.f @@ -5,7 +5,7 @@ BEGIN_PROVIDER [ character*(1024), ezfio_filename ] ! variable if it is set, or as the 1st argument of the command line. END_DOC - PROVIDE mpi_initialized + PROVIDE mpi_initialized output_wall_time_0 integer :: i diff --git a/src/mo_optimization/first_gradient_opt.irp.f b/src/mo_optimization/first_gradient_opt.irp.f index d6918a00..f08b9d1f 100644 --- a/src/mo_optimization/first_gradient_opt.irp.f +++ b/src/mo_optimization/first_gradient_opt.irp.f @@ -111,7 +111,7 @@ subroutine first_gradient_opt(n,v_grad) if (debug) then print*,'Matrix containing the gradient :' do i = 1, mo_num - write(*,'(100(E12.5))') A(i,1:mo_num) + write(*,'(100(ES12.5))') A(i,1:mo_num) enddo endif diff --git a/src/tc_bi_ortho/print_tc_dump.irp.f b/src/tc_bi_ortho/print_tc_dump.irp.f index 868de444..37dfe051 100644 --- a/src/tc_bi_ortho/print_tc_dump.irp.f +++ b/src/tc_bi_ortho/print_tc_dump.irp.f @@ -62,7 +62,7 @@ subroutine KMat_tilde_dump() do j = 1, mo_num do i = 1, mo_num ! TCHint convention - write(33, '(E15.7, 4X, 4(I4, 2X))') mo_bi_ortho_tc_two_e_chemist(j,i,l,k), i, j, k, l + write(33, '(ES15.7, 4X, 4(I4, 2X))') mo_bi_ortho_tc_two_e_chemist(j,i,l,k), i, j, k, l enddo enddo enddo @@ -71,7 +71,7 @@ subroutine KMat_tilde_dump() do j = 1, mo_num do i = 1, mo_num ! TCHint convention - write(33, '(E15.7, 4X, 4(I4, 2X))') mo_bi_ortho_tc_one_e(i,j), i, j, 0, 0 + write(33, '(ES15.7, 4X, 4(I4, 2X))') mo_bi_ortho_tc_one_e(i,j), i, j, 0, 0 enddo enddo @@ -128,7 +128,7 @@ subroutine ERI_dump() do k = 1, mo_num do j = 1, mo_num do i = 1, mo_num - write(33, '(4(I4, 2X), 4X, E15.7)') i, j, k, l, a1(i,j,k,l) + write(33, '(4(I4, 2X), 4X, ES15.7)') i, j, k, l, a1(i,j,k,l) enddo enddo enddo @@ -167,8 +167,8 @@ subroutine LMat_tilde_dump() !write(33, '(6(I4, 2X), 4X, E15.7)') i, j, k, l, m, n, integral ! TCHint convention if(dabs(integral).gt.1d-10) then - write(33, '(E15.7, 4X, 6(I4, 2X))') -integral/3.d0, i, j, k, l, m, n - !write(33, '(E15.7, 4X, 6(I4, 2X))') -integral/3.d0, l, m, n, i, j, k + write(33, '(ES15.7, 4X, 6(I4, 2X))') -integral/3.d0, i, j, k, l, m, n + !write(33, '(ES15.7, 4X, 6(I4, 2X))') -integral/3.d0, l, m, n, i, j, k endif enddo enddo diff --git a/src/tc_scf/molden_lr_mos.irp.f b/src/tc_scf/molden_lr_mos.irp.f index b86009ee..98c7b230 100644 --- a/src/tc_scf/molden_lr_mos.irp.f +++ b/src/tc_scf/molden_lr_mos.irp.f @@ -72,7 +72,7 @@ subroutine molden_lr write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00' do k = 1, ao_prim_num(i_ao) i_prim +=1 - write(i_unit_output,'(E20.10,2X,E20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) + write(i_unit_output,'(ES20.10,2X,ES20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) enddo l = i_ao do while ( ao_l(l) == ao_l(i_ao) ) @@ -170,7 +170,7 @@ subroutine molden_lr write (i_unit_output,*) 'Spin= Alpha' write (i_unit_output,*) 'Occup=', mo_occ(i) do j=1,ao_num - write(i_unit_output, '(I6,2X,E20.10)') j, mo_r_coef(iorder(j),i) + write(i_unit_output, '(I6,2X,ES20.10)') j, mo_r_coef(iorder(j),i) enddo write (i_unit_output,*) 'Sym= 1' @@ -178,7 +178,7 @@ subroutine molden_lr write (i_unit_output,*) 'Spin= Alpha' write (i_unit_output,*) 'Occup=', mo_occ(i) do j=1,ao_num - write(i_unit_output, '(I6,2X,E20.10)') j, mo_l_coef(iorder(j),i) + write(i_unit_output, '(I6,2X,ES20.10)') j, mo_l_coef(iorder(j),i) enddo enddo close(i_unit_output) @@ -235,7 +235,7 @@ subroutine molden_l() write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00' do k = 1, ao_prim_num(i_ao) i_prim +=1 - write(i_unit_output,'(E20.10,2X,E20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) + write(i_unit_output,'(ES20.10,2X,ES20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) enddo l = i_ao do while ( ao_l(l) == ao_l(i_ao) ) @@ -333,7 +333,7 @@ subroutine molden_l() write (i_unit_output,*) 'Spin= Alpha' write (i_unit_output,*) 'Occup=', mo_occ(i) do j=1,ao_num - write(i_unit_output, '(I6,2X,E20.10)') j, mo_l_coef(iorder(j),i) + write(i_unit_output, '(I6,2X,ES20.10)') j, mo_l_coef(iorder(j),i) enddo enddo close(i_unit_output) @@ -390,7 +390,7 @@ subroutine molden_r() write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00' do k = 1, ao_prim_num(i_ao) i_prim +=1 - write(i_unit_output,'(E20.10,2X,E20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) + write(i_unit_output,'(ES20.10,2X,ES20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) enddo l = i_ao do while ( ao_l(l) == ao_l(i_ao) ) @@ -488,7 +488,7 @@ subroutine molden_r() write (i_unit_output,*) 'Spin= Alpha' write (i_unit_output,*) 'Occup=', mo_occ(i) do j=1,ao_num - write(i_unit_output, '(I6,2X,E20.10)') j, mo_r_coef(iorder(j),i) + write(i_unit_output, '(I6,2X,ES20.10)') j, mo_r_coef(iorder(j),i) enddo enddo close(i_unit_output) diff --git a/src/tools/molden.irp.f b/src/tools/molden.irp.f index 830a141e..e5902a6f 100644 --- a/src/tools/molden.irp.f +++ b/src/tools/molden.irp.f @@ -44,7 +44,7 @@ program molden write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00' do k = 1, ao_prim_num(i_ao) i_prim +=1 - write(i_unit_output,'(E20.10,2X,E20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) + write(i_unit_output,'(ES20.10,2X,ES20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) enddo l = i_ao do while ( ao_l(l) == ao_l(i_ao) ) @@ -142,7 +142,7 @@ program molden write (i_unit_output,*) 'Spin= Alpha' write (i_unit_output,*) 'Occup=', mo_occ(i) do j=1,ao_num - write(i_unit_output, '(I6,2X,E20.10)') j, mo_coef(iorder(j),i) + write(i_unit_output, '(I6,2X,ES20.10)') j, mo_coef(iorder(j),i) enddo enddo close(i_unit_output) diff --git a/src/tools/print_ci_vectors.irp.f b/src/tools/print_ci_vectors.irp.f index 97dfdc0b..d5f86213 100644 --- a/src/tools/print_ci_vectors.irp.f +++ b/src/tools/print_ci_vectors.irp.f @@ -28,7 +28,7 @@ subroutine routine do i = 1, N_det print *, 'Determinant ', i call debug_det(psi_det(1,1,i),N_int) - print '(4E20.12,X)', (psi_coef(i,k), k=1,N_states) + print '(4ES20.12,X)', (psi_coef(i,k), k=1,N_states) print *, '' print *, '' enddo diff --git a/src/utils/format_w_error.irp.f b/src/utils/format_w_error.irp.f index 7f7458b6..c253456e 100644 --- a/src/utils/format_w_error.irp.f +++ b/src/utils/format_w_error.irp.f @@ -39,7 +39,7 @@ subroutine format_w_error(value,error,size_nb,max_nb_digits,format_value,str_err write(str_size,'(I3)') size_nb ! Error - write(str_exp,'(1pE20.0)') error + write(str_exp,'(ES20.0)') error str_error = trim(adjustl(str_exp)) ! Number of digit: Y (FX.Y) from the exponent diff --git a/src/utils_trust_region/rotation_matrix_iterative.irp.f b/src/utils_trust_region/rotation_matrix_iterative.irp.f index f268df04..db3d5c99 100644 --- a/src/utils_trust_region/rotation_matrix_iterative.irp.f +++ b/src/utils_trust_region/rotation_matrix_iterative.irp.f @@ -73,7 +73,7 @@ subroutine rotation_matrix_iterative(m,X,R) !print*,'R' !do i = 1, m - ! write(*,'(10(E12.5))') R(i,:) + ! write(*,'(10(ES12.5))') R(i,:) !enddo do i = 1, m @@ -82,7 +82,7 @@ subroutine rotation_matrix_iterative(m,X,R) !print*,'RRT' !do i = 1, m - ! write(*,'(10(E12.5))') RRT(i,:) + ! write(*,'(10(ES12.5))') RRT(i,:) !enddo max_elem = 0d0 diff --git a/src/utils_trust_region/trust_region_optimal_lambda.irp.f b/src/utils_trust_region/trust_region_optimal_lambda.irp.f index b7dcf875..e98bbfb7 100644 --- a/src/utils_trust_region/trust_region_optimal_lambda.irp.f +++ b/src/utils_trust_region/trust_region_optimal_lambda.irp.f @@ -336,7 +336,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 endif - !write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 + !write(*,'(a,ES12.5,a,ES12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 ! Newton's step y = -(1d0/DABS(d_2))*d_1 @@ -345,7 +345,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) if (DABS(y) > alpha) then y = alpha * (y/DABS(y)) ! preservation of the sign of y endif - !write(*,'(a,E12.5)') ' Step length: ', y + !write(*,'(a,ES12.5)') ' Step length: ', y ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2 @@ -414,7 +414,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) else alpha = 0.25d0 * alpha endif - !write(*,'(a,E12.5)') ' New trust length alpha: ', alpha + !write(*,'(a,ES12.5)') ' New trust length alpha: ', alpha ! cancellaion of the step if rho < 0.1 if (rho_2 < thresh_rho_2) then !0.1d0) then From 119779595aba655ce1effe2f7cb93ea26701c226 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 4 Jul 2023 23:43:26 +0200 Subject: [PATCH 209/337] Accelerate Cholesky CCSD --- src/ccsd/ccsd_space_orb_sub.irp.f | 13 +---- src/utils_cc/mo_integrals_cc.irp.f | 91 ++++++++++++++++++++++++------ 2 files changed, 76 insertions(+), 28 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index d23073b8..3c9a2cfc 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -1549,19 +1549,12 @@ subroutine compute_B1_gam(nO,nV,t1,t2,B1,gam) double precision, allocatable :: X_vvvo(:,:,:), Y_vvvv(:,:,:) allocate(X_vvvo(nV,nV,nO), Y_vvvv(nV,nV,nV)) ! ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + call gen_v_space(cc_nVa,cc_nVa,cc_nVa,1, cc_list_vir,cc_list_vir,cc_list_vir,(/ gam /), B1) + !$omp parallel & !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo,gam) & !$omp private(a,b,beta) & !$omp default(none) - !$omp do - do beta = 1, nV - do b = 1, nV - do a = 1, nV - B1(a,b,beta) = cc_space_v_vvvv(a,b,beta,gam) - enddo - enddo - enddo - !$omp end do nowait do i = 1, nO !$omp do do b = 1, nV @@ -1569,7 +1562,7 @@ subroutine compute_B1_gam(nO,nV,t1,t2,B1,gam) X_vvvo(a,b,i) = cc_space_v_vvov(a,b,i,gam) enddo enddo - !$omp end do nowait + !$omp end do enddo !$omp end parallel diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f index dafcf7af..2e7ecdd4 100644 --- a/src/utils_cc/mo_integrals_cc.irp.f +++ b/src/utils_cc/mo_integrals_cc.irp.f @@ -48,32 +48,86 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v) integer :: i1,i2,i3,i4,idx1,idx2,idx3,idx4,k if (do_ao_cholesky) then - double precision, allocatable :: buffer(:,:,:) - !$OMP PARALLEL & - !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_num,cholesky_mo_transp,cholesky_ao_num) & - !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k,buffer)& - !$OMP DEFAULT(NONE) - allocate(buffer(mo_num,mo_num,mo_num)) + double precision, allocatable :: buffer(:,:,:,:) + double precision, allocatable :: v1(:,:,:), v2(:,:,:) + allocate(v1(cholesky_ao_num,n1,n3), v2(cholesky_ao_num,n2,n4)) + allocate(buffer(n1,n3,n2,n4)) + + !$OMP PARALLEL PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k) !$OMP DO - do i4 = 1, n4 + do i3=1,n3 + idx3 = list3(i3) + do i1=1,n1 + idx1 = list1(i1) + do k=1,cholesky_ao_num + v1(k,i1,i3) = cholesky_mo_transp(k,idx1,idx3) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP DO + do i4=1,n4 idx4 = list4(i4) - call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, & - cholesky_mo_transp, cholesky_ao_num, & - cholesky_mo_transp(1,1,idx4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num) - do i2 = 1, n2 + do i2=1,n2 idx2 = list2(i2) - do i3 = 1, n3 - idx3 = list3(i3) + do k=1,cholesky_ao_num + v2(k,i2,i4) = cholesky_mo_transp(k,idx2,idx4) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP BARRIER + !$OMP END PARALLEL + + call dgemm('T','N', n1*n3, n2*n4, cholesky_ao_num, 1.d0, & + v1, cholesky_ao_num, & + v2, cholesky_ao_num, 0.d0, buffer, n1*n3) + + deallocate(v1,v2) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 do i1 = 1, n1 - idx1 = list1(i1) - v(i1,i2,i3,i4) = buffer(idx1,idx3,idx2) + v(i1,i2,i3,i4) = buffer(i1,i3,i2,i4) enddo enddo enddo enddo - !$OMP END DO - deallocate(buffer) - !$OMP END PARALLEL + !$OMP END PARALLEL DO + +! !$OMP PARALLEL & +! !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,cholesky_mo_transp,cholesky_ao_num,v1) & +! !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k,buffer,v2)& +! !$OMP DEFAULT(NONE) +! allocate(buffer(n1,n3,n2), v2(cholesky_ao_num,n2)) +! !$OMP DO +! do i4 = 1, n4 +! idx4 = list4(i4) +! do i2=1,n2 +! idx2 = list2(i2) +! do k=1,cholesky_ao_num +! v2(k,i2) = cholesky_mo_transp(k,idx2,idx4) +! enddo +! enddo +! call dgemm('T','N', n1*n3, n2, cholesky_ao_num, 1.d0, & +! v1, cholesky_ao_num, & +! v2, cholesky_ao_num, 0.d0, buffer, n1*n3) +! do i3 = 1, n3 +! do i2 = 1, n2 +! do i1 = 1, n1 +! v(i1,i2,i3,i4) = buffer(i1,i3,i2) +! enddo +! enddo +! enddo +! enddo +! !$OMP END DO +! deallocate(buffer, v2) +! !$OMP END PARALLEL +! deallocate(v1) else double precision :: get_two_e_integral @@ -112,6 +166,7 @@ BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)] if (do_ao_cholesky) then integer :: i1,i2,i3,i4 double precision, allocatable :: buffer(:,:,:) + call set_multiple_levels_omp(.False.) !$OMP PARALLEL & !$OMP SHARED(cc_space_v,mo_num,cholesky_mo_transp,cholesky_ao_num) & !$OMP PRIVATE(i1,i2,i3,i4,k,buffer)& From 94b1ae138b999517a62aa5ae0bcb9ab7fb00db77 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 5 Jul 2023 00:07:46 +0200 Subject: [PATCH 210/337] Cleaning --- src/ao_two_e_ints/cholesky.irp.f | 4 ++-- src/ccsd/ccsd_space_orb_sub.irp.f | 16 +++++++++++++++- src/utils_cc/mo_integrals_cc.irp.f | 30 ------------------------------ 3 files changed, 17 insertions(+), 33 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 18180efb..98652d8f 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -29,7 +29,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] double precision, pointer :: L(:,:), L_old(:,:) - double precision, parameter :: s = 1.d-2 + double precision, parameter :: s = 3.d-2 double precision, parameter :: dscale = 1.d0 double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) @@ -135,7 +135,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] L_old => L allocate(L(ndim,rank+nq), stat=ierr) if (ierr /= 0) then - print *, irp_here, ': allocation failed : (Delta(np,nq))' + print *, irp_here, ': allocation failed : (L(ndim,rank+nq))' stop -1 endif diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 3c9a2cfc..1d77180e 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -1549,12 +1549,26 @@ subroutine compute_B1_gam(nO,nV,t1,t2,B1,gam) double precision, allocatable :: X_vvvo(:,:,:), Y_vvvv(:,:,:) allocate(X_vvvo(nV,nV,nO), Y_vvvv(nV,nV,nV)) ! ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) - call gen_v_space(cc_nVa,cc_nVa,cc_nVa,1, cc_list_vir,cc_list_vir,cc_list_vir,(/ gam /), B1) + + call gen_v_space(cc_nVa,cc_nVa,cc_nVa,1, & + cc_list_vir,cc_list_vir,cc_list_vir,(/ cc_list_vir(gam) /), B1) + !$omp parallel & !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo,gam) & !$omp private(a,b,beta) & !$omp default(none) + +! !$omp do +! do beta = 1, nV +! do b = 1, nV +! do a = 1, nV +! B1(a,b,beta) = cc_space_v_vvvv(a,b,beta,gam) +! enddo +! enddo +! enddo +! !$omp end do nowait + do i = 1, nO !$omp do do b = 1, nV diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f index 2e7ecdd4..2db614b4 100644 --- a/src/utils_cc/mo_integrals_cc.irp.f +++ b/src/utils_cc/mo_integrals_cc.irp.f @@ -99,36 +99,6 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v) enddo !$OMP END PARALLEL DO -! !$OMP PARALLEL & -! !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,cholesky_mo_transp,cholesky_ao_num,v1) & -! !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k,buffer,v2)& -! !$OMP DEFAULT(NONE) -! allocate(buffer(n1,n3,n2), v2(cholesky_ao_num,n2)) -! !$OMP DO -! do i4 = 1, n4 -! idx4 = list4(i4) -! do i2=1,n2 -! idx2 = list2(i2) -! do k=1,cholesky_ao_num -! v2(k,i2) = cholesky_mo_transp(k,idx2,idx4) -! enddo -! enddo -! call dgemm('T','N', n1*n3, n2, cholesky_ao_num, 1.d0, & -! v1, cholesky_ao_num, & -! v2, cholesky_ao_num, 0.d0, buffer, n1*n3) -! do i3 = 1, n3 -! do i2 = 1, n2 -! do i1 = 1, n1 -! v(i1,i2,i3,i4) = buffer(i1,i3,i2) -! enddo -! enddo -! enddo -! enddo -! !$OMP END DO -! deallocate(buffer, v2) -! !$OMP END PARALLEL -! deallocate(v1) - else double precision :: get_two_e_integral From 0132eb87fe786f39ee4e9326844829229716c19d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 5 Jul 2023 02:40:59 +0200 Subject: [PATCH 211/337] Symmetry in cholesky --- src/ao_two_e_ints/cholesky.irp.f | 64 ++++++++++++++++++++++---------- 1 file changed, 45 insertions(+), 19 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 98652d8f..f4746144 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -29,7 +29,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] double precision, pointer :: L(:,:), L_old(:,:) - double precision, parameter :: s = 3.d-2 + double precision, parameter :: s = 1.d-1 double precision, parameter :: dscale = 1.d0 double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) @@ -45,6 +45,8 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] integer :: block_size, iblock, ierr + integer(omp_lock_kind), allocatable :: lock(:) + PROVIDE ao_two_e_integrals_in_map deallocate(cholesky_ao) @@ -66,8 +68,11 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] rank = 0 allocate( D(ndim), Lset(ndim), LDmap(ndim), DLmap(ndim), Dset(ndim) ) - allocate( Lset_rev(ndim), Dset_rev(ndim) ) + allocate( Lset_rev(ndim), Dset_rev(ndim), lock(ndim) ) allocate( addr(3,ndim) ) + do k=1,ndim + call omp_init_lock(lock(k)) + enddo ! 1. k=0 @@ -113,7 +118,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] i = i+1 ! b. - Dmin = max(s*Dmax, tau) + Dmin = max(s*Dmax,tau) ! c. nq=0 @@ -165,7 +170,9 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] stop -1 endif - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q) + Delta(:,:) = 0.d0 + + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q,j) !$OMP DO do k=1,N @@ -181,20 +188,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] !$OMP DO SCHEDULE(dynamic,8) do m=1,nq - Delta(:,m) = 0.d0 - do k=1, nq - ! Apply only to (k,m) pairs both in Dset - p = DLmap(k) - q = Lset_rev(addr(3,Dset(k))) - if ((0 < q).and.(q < p)) cycle - if (.not.ao_two_e_integral_zero( addr(1,Dset(k)), addr(1,Dset(m)), & - addr(2,Dset(k)), addr(2,Dset(m)) ) ) then - Delta(p,m) = get_ao_two_e_integral( addr(1,Dset(k)), addr(1,Dset(m)), & - addr(2,Dset(k)), addr(2,Dset(m)), ao_integrals_map) - Delta(q,m) = Delta(p,m) - endif - enddo - + call omp_set_lock(lock(m)) do k=1,np ! Apply only to (k,m) pairs where k is not in Dset if (LDmap(k) /= 0) cycle @@ -204,9 +198,37 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] addr(2,Lset(k)), addr(2,Dset(m)) ) ) then Delta(k,m) = get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)), & addr(2,Lset(k)), addr(2,Dset(m)), ao_integrals_map) - Delta(q,m) = Delta(k,m) + if (q /= 0) Delta(q,m) = Delta(k,m) endif enddo + + j = Dset_rev(addr(3,Dset(m))) + if ((0 < j).and.(j < m)) then + call omp_unset_lock(lock(m)) + cycle + endif + + if ((j /= m).and.(j /= 0)) then + call omp_set_lock(lock(j)) + endif + do k=1,nq + ! Apply only to (k,m) pairs both in Dset + p = DLmap(k) + q = Lset_rev(addr(3,Dset(k))) + if ((0 < q).and.(q < p)) cycle + if (.not.ao_two_e_integral_zero( addr(1,Dset(k)), addr(1,Dset(m)), & + addr(2,Dset(k)), addr(2,Dset(m)) ) ) then + Delta(p,m) = get_ao_two_e_integral( addr(1,Dset(k)), addr(1,Dset(m)), & + addr(2,Dset(k)), addr(2,Dset(m)), ao_integrals_map) + if (q /= 0) Delta(q,m) = Delta(p,m) + if (j /= 0) Delta(p,j) = Delta(p,m) + if (q*j /= 0) Delta(q,j) = Delta(p,m) + endif + enddo + call omp_unset_lock(lock(m)) + if ((j /= m).and.(j /= 0)) then + call omp_unset_lock(lock(j)) + endif enddo !$OMP END DO @@ -313,6 +335,10 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] enddo + do k=1,ndim + call omp_destroy_lock(lock(k)) + enddo + allocate(cholesky_ao(ao_num,ao_num,rank)) call dcopy(ndim*rank, L, 1, cholesky_ao, 1) deallocate(L) From 9293f360d51d31248d2edcd9cffeed16d90924f1 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 5 Jul 2023 09:09:12 +0200 Subject: [PATCH 212/337] RELEASE_NOTES.org --- RELEASE_NOTES.org | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/RELEASE_NOTES.org b/RELEASE_NOTES.org index 3bd02898..a0e6d104 100644 --- a/RELEASE_NOTES.org +++ b/RELEASE_NOTES.org @@ -10,7 +10,8 @@ - Added many types of integrals - Accelerated four-index transformation - Added transcorrelated SCF - - Added transcorrelated CIPSI + - Added bi-orthonormal transcorrelated CIPSI + - Added Cholesky decomposition of AO integrals - Added CCSD and CCSD(T) - Added MO localization - Changed coupling parameters for ROHF @@ -20,7 +21,7 @@ - Removed cryptokit dependency in OCaml - Using now standard convention in RDM - Added molecular properties - - [ ] Added GTOs with complex exponent + - Added GTOs with complex exponent *** TODO: take from dev - Updated version of f77-zmq From 41a369dd687fd498917c675930f169e736d766a0 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 5 Jul 2023 17:43:31 +0200 Subject: [PATCH 213/337] Optimized 4idx with Cholesky --- src/mo_two_e_ints/mo_bi_integrals.irp.f | 47 ++++++++++++++++--------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index a461504e..b15d9745 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -465,31 +465,34 @@ subroutine add_integrals_to_map_cholesky integer :: size_buffer, n_integrals size_buffer = min(mo_num*mo_num*mo_num,16000000) - double precision, allocatable :: Vtmp(:,:,:,:) + double precision, allocatable :: Vtmp(:,:,:) integer(key_kind) , allocatable :: buffer_i(:) real(integral_kind), allocatable :: buffer_value(:) if (.True.) then ! In-memory transformation - allocate (Vtmp(mo_num,mo_num,mo_num,mo_num)) + call set_multiple_levels_omp(.False.) - call dgemm('N','T',mo_num*mo_num,mo_num*mo_num,cholesky_ao_num,1.d0, & - cholesky_mo, mo_num*mo_num, & - cholesky_mo, mo_num*mo_num, 0.d0, & - Vtmp, mo_num*mo_num) - - !$OMP PARALLEL PRIVATE(i,j,k,l,n_integrals,buffer_value, buffer_i) + !$OMP PARALLEL PRIVATE(i,j,k,l,n_integrals,buffer_value, buffer_i, Vtmp) allocate (buffer_i(size_buffer), buffer_value(size_buffer)) n_integrals = 0 + + allocate (Vtmp(mo_num,mo_num,mo_num)) + !$OMP DO do l=1,mo_num + + call dgemm('T','N',mo_num*mo_num,mo_num,cholesky_ao_num,1.d0, & + cholesky_mo_transp, cholesky_ao_num, & + cholesky_mo_transp(1,1,l), cholesky_ao_num, 0.d0, & + Vtmp, mo_num*mo_num) do k=1,l do j=1,mo_num do i=1,j - if (abs(Vtmp(i,j,k,l)) > mo_integrals_threshold) then + if (abs(Vtmp(i,j,k)) > mo_integrals_threshold) then n_integrals += 1 - buffer_value(n_integrals) = Vtmp(i,j,k,l) + buffer_value(n_integrals) = Vtmp(i,j,k) !DIR$ FORCEINLINE call mo_two_e_integrals_index(i,k,j,l,buffer_i(n_integrals)) if (n_integrals == size_buffer) then @@ -503,10 +506,9 @@ subroutine add_integrals_to_map_cholesky enddo !$OMP END DO call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals) - deallocate(buffer_i, buffer_value) + deallocate(buffer_i, buffer_value, Vtmp) !$OMP END PARALLEL - deallocate(Vtmp) call map_unique(mo_integrals_map) endif @@ -1350,16 +1352,29 @@ END_PROVIDER ! mo_two_e_integrals_jj_anti(i,j) = J_ij - K_ij END_DOC - integer :: i,j + integer :: i,j,k double precision :: get_two_e_integral if (do_ao_cholesky) then + double precision, allocatable :: buffer(:,:) + allocate (buffer(cholesky_ao_num,mo_num)) + do k=1,cholesky_ao_num + do i=1,mo_num + buffer(k,i) = cholesky_mo_transp(k,i,i) + enddo + enddo + call dgemm('T','N',mo_num,mo_num,cholesky_ao_num,1.d0, & + buffer, cholesky_ao_num, buffer, cholesky_ao_num, 0.d0, mo_two_e_integrals_jj, mo_num) + deallocate(buffer) + do j=1,mo_num do i=1,mo_num - !TODO: use dgemm - mo_two_e_integrals_jj(i,j) = sum(cholesky_mo_transp(:,i,i)*cholesky_mo_transp(:,j,j)) - mo_two_e_integrals_jj_exchange(i,j) = sum(cholesky_mo_transp(:,i,j)*cholesky_mo_transp(:,j,i)) + mo_two_e_integrals_jj_exchange(i,j) = 0.d0 + do k=1,cholesky_ao_num + mo_two_e_integrals_jj_exchange(i,j) = mo_two_e_integrals_jj_exchange(i,j) + & + cholesky_mo_transp(k,i,j)*cholesky_mo_transp(k,j,i) + enddo enddo enddo From 5a0c8de5a39390a63b29d2748fa6a92cb00107ea Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 5 Jul 2023 19:12:03 +0200 Subject: [PATCH 214/337] Accelerated cholesky AO-MO transformation --- src/mo_two_e_ints/cholesky.irp.f | 38 ++++++++++++-------------------- 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 32c0dccd..7cfbaa58 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -4,16 +4,18 @@ BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_ao_num ! Cholesky vectors in MO basis END_DOC - integer :: k + integer :: k, i, j call set_multiple_levels_omp(.False.) - print *, 'AO->MO Transformation of Cholesky vectors' !$OMP PARALLEL DO PRIVATE(k) do k=1,cholesky_ao_num - call ao_to_mo(cholesky_ao(1,1,k),ao_num,cholesky_mo(1,1,k),mo_num) + do j=1,mo_num + do i=1,mo_num + cholesky_mo(i,j,k) = cholesky_mo_transp(k,i,j) + enddo + enddo enddo !$OMP END PARALLEL DO - print *, '' END_PROVIDER @@ -23,27 +25,15 @@ BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_ao_num, mo_num, ! Cholesky vectors in MO basis END_DOC - integer :: i,j,k - double precision, allocatable :: buffer(:,:) + double precision, allocatable :: X(:,:,:) + print *, 'AO->MO Transformation of Cholesky vectors' - print *, 'AO->MO Transformation of Cholesky vectors .' - - call set_multiple_levels_omp(.False.) - !$OMP PARALLEL PRIVATE(i,j,k,buffer) - allocate(buffer(mo_num,mo_num)) - !$OMP DO SCHEDULE(static) - do k=1,cholesky_ao_num - call ao_to_mo(cholesky_ao(1,1,k),ao_num,buffer,mo_num) - do j=1,mo_num - do i=1,mo_num - cholesky_mo_transp(k,i,j) = buffer(i,j) - enddo - enddo - enddo - !$OMP END DO - deallocate(buffer) - !$OMP END PARALLEL - print *, '' + allocate(X(mo_num,cholesky_ao_num,ao_num)) + call dgemm('T','N', ao_num*cholesky_ao_num, mo_num, ao_num, 1.d0, & + cholesky_ao, ao_num, mo_coef, ao_num, 0.d0, X, ao_num*cholesky_ao_num) + call dgemm('T','N', cholesky_ao_num*mo_num, mo_num, ao_num, 1.d0, & + X, ao_num, mo_coef, ao_num, 0.d0, cholesky_mo_transp, cholesky_ao_num*mo_num) + deallocate(X) END_PROVIDER From e82220a6a414bd20eac08d5bca584ad0fb315495 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 6 Jul 2023 02:12:42 +0200 Subject: [PATCH 215/337] Working on Cholesky CCSD --- src/ccsd/ccsd_space_orb_sub.irp.f | 26 +- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 1395 +++++++++++++++++++++++ src/mo_two_e_ints/mo_bi_integrals.irp.f | 81 +- src/tools/four_idx_transform.irp.f | 7 + src/utils/fortran_mmap.c | 14 +- src/utils/mmap.f90 | 25 +- src/utils_cc/mo_integrals_cc.irp.f | 85 +- 7 files changed, 1542 insertions(+), 91 deletions(-) create mode 100644 src/ccsd/ccsd_space_orb_sub_chol.irp.f diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 1d77180e..76c9351e 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -85,13 +85,23 @@ subroutine run_ccsd_space_orb do while (not_converged) - call compute_H_oo(nO,nV,t1,t2,tau,H_oo) - call compute_H_vv(nO,nV,t1,t2,tau,H_vv) - call compute_H_vo(nO,nV,t1,t2,H_vo) - ! Residue - call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) - call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) +! if (do_ao_cholesky) then + if (.False.) then + call compute_H_oo_chol(nO,nV,t1,t2,tau,H_oo) + call compute_H_vv_chol(nO,nV,t1,t2,tau,H_vv) + call compute_H_vo_chol(nO,nV,t1,t2,H_vo) + + call compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + call compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + else + call compute_H_oo(nO,nV,t1,t2,tau,H_oo) + call compute_H_vv(nO,nV,t1,t2,tau,H_vv) + call compute_H_vo(nO,nV,t1,t2,H_vo) + + call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + endif max_r = max(max_r1,max_r2) ! Update @@ -839,6 +849,10 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) ! allocate(B1(nV,nV,nV,nV)) ! call compute_B1(nO,nV,t1,t2,B1) +! call dgemm('N','N',nO*nO,nV*nV,nV*nV, & +! 1d0, tau, size(tau,1) * size(tau,2), & +! B1 , size(B1_gam,1) * size(B1_gam,2), & +! 1d0, r2, size(r2,1) * size(r2,2)) allocate(B1_gam(nV,nV,nV)) do gam=1,nV call compute_B1_gam(nO,nV,t1,t2,B1_gam,gam) diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f new file mode 100644 index 00000000..190c163b --- /dev/null +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -0,0 +1,1395 @@ +subroutine ccsd_energy_space_chol(nO,nV,tau,t1,energy) + + implicit none + + integer, intent(in) :: nO, nV + double precision, intent(in) :: tau(nO,nO,nV,nV) + double precision, intent(in) :: t1(nO,nV) + double precision, intent(out) :: energy + + ! internal + integer :: i,j,a,b + double precision :: e + + energy = 0d0 + !$omp parallel & + !$omp shared(nO,nV,energy,tau,t1,& + !$omp cc_space_f_vo,cc_space_w_oovv) & + !$omp private(i,j,a,b,e) & + !$omp default(none) + e = 0d0 + !$omp do + do a = 1, nV + do i = 1, nO + e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a) + enddo + enddo + !$omp end do nowait + !$omp do + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + e = e + tau(i,j,a,b) * cc_space_w_oovv(i,j,a,b) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp critical + energy = energy + e + !$omp end critical + !$omp end parallel + +end + +! Tau + +subroutine update_tau_space_chol(nO,nV,t1,t2,tau) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + + ! out + double precision, intent(out) :: tau(nO,nO,nV,nV) + + ! internal + integer :: i,j,a,b + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tau,t2,t1) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a) * t1(j,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + +! R1 + +subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) + double precision, intent(in) :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO) + + ! out + double precision, intent(out) :: r1(nO,nV), max_r1 + + ! internal + integer :: u,i,j,beta,a,b + + !$omp parallel & + !$omp shared(nO,nV,r1,cc_space_f_ov) & + !$omp private(u,beta) & + !$omp default(none) + !$omp do + do beta = 1, nV + do u = 1, nO + r1(u,beta) = cc_space_f_ov(u,beta) + enddo + enddo + !$omp end do + !$omp end parallel + + double precision, allocatable :: X_oo(:,:) + allocate(X_oo(nO,nO)) + call dgemm('N','N', nO, nO, nV, & + -2d0, t1 , size(t1,1), & + cc_space_f_vo, size(cc_space_f_vo,1), & + 0d0, X_oo , size(X_oo,1)) + + call dgemm('T','N', nO, nV, nO, & + 1d0, X_oo, size(X_oo,2), & + t1 , size(t1,1), & + 1d0, r1 , size(r1,1)) + deallocate(X_oo) + + call dgemm('N','N', nO, nV, nV, & + 1d0, t1 , size(t1,1), & + H_vv, size(H_vv,1), & + 1d0, r1 , size(r1,1)) + + call dgemm('N','N', nO, nV, nO, & + -1d0, H_oo, size(H_oo,1), & + t1 , size(t1,1), & + 1d0, r1, size(r1,1)) + + double precision, allocatable :: X_voov(:,:,:,:) + allocate(X_voov(nV, nO, nO, nV)) + + !$omp parallel & + !$omp shared(nO,nV,X_voov,t2,t1) & + !$omp private(u,beta,i,a) & + !$omp default(none) + !$omp do + do beta = 1, nV + do u = 1, nO + do i = 1, nO + do a = 1, nV + X_voov(a,i,u,beta) = 2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemv('T', nV*nO, nO*nV, & + 1d0, X_voov, size(X_voov,1) * size(X_voov,2), & + H_vo , 1, & + 1d0, r1 , 1) + + deallocate(X_voov) + + double precision, allocatable :: X_ovov(:,:,:,:) + allocate(X_ovov(nO, nV, nO, nV)) + + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_ovov,cc_space_v_voov,X_ovov) & + !$omp private(u,beta,i,a) & + !$omp default(none) + !$omp do + do beta = 1, nV + do u = 1, nO + do a = 1, nv + do i = 1, nO + X_ovov(i,a,u,beta) = 2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemv('T', nO*nV, nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + t1 , 1, & + 1d0, r1 , 1) + + deallocate(X_ovov) + + double precision, allocatable :: W_vvov(:,:,:,:), T_vvoo(:,:,:,:) + allocate(W_vvov(nV,nV,nO,nV), T_vvoo(nV,nV,nO,nO)) + + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau) & + !$omp private(b,beta,i,a) & + !$omp default(none) + !$omp do + do beta = 1, nV + do i = 1, nO + do b = 1, nV + do a = 1, nV + W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do + do u = 1, nO + do i = 1, nO + do b = 1, nV + do a = 1, nV + T_vvoo(a,b,i,u) = tau(i,u,a,b) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp end parallel + + call dgemm('T','N',nO,nV,nO*nV*nV, & + 1d0, T_vvoo, size(T_vvoo,1) * size(T_vvoo,2) * size(T_vvoo,3), & + W_vvov, size(W_vvov,1) * size(W_vvov,2) * size(W_vvov,3), & + 1d0, r1 , size(r1,1)) + + deallocate(W_vvov,T_vvoo) + + double precision, allocatable :: W_oovo(:,:,:,:) + allocate(W_oovo(nO,nO,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_vooo,W_oovo) & + !$omp private(u,a,i,j) & + !$omp default(none) + do u = 1, nO + !$omp do + do a = 1, nV + do j = 1, nO + do i = 1, nO + W_oovo(i,j,a,u) = 2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i) + enddo + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + + call dgemm('T','N', nO, nV, nO*nO*nV, & + -1d0, W_oovo, size(W_oovo,1) * size(W_oovo,2) * size(W_oovo,3), & + tau , size(tau,1) * size(tau,2) * size(tau,3), & + 1d0, r1 , size(r1,1)) + + deallocate(W_oovo) + + max_r1 = 0d0 + do a = 1, nV + do i = 1, nO + max_r1 = max(dabs(r1(i,a)), max_r1) + enddo + enddo + + ! Change the sign for consistency with the code in spin orbitals + !$omp parallel & + !$omp shared(nO,nV,r1) & + !$omp private(a,i) & + !$omp default(none) + !$omp do + do a = 1, nV + do i = 1, nO + r1(i,a) = -r1(i,a) + enddo + enddo + !$omp end do + !$omp end parallel + +end + +! H_oo + +subroutine compute_H_oo_chol(nO,nV,t1,t2,tau,H_oo) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(out) :: H_oo(nO, nO) + + integer :: a,tmp_a,k,b,l,c,d,tmp_c,tmp_d,i,j,u + + ! H_oo(u,i) = cc_space_f_oo(u,i) + !$omp parallel & + !$omp shared(nO,H_oo,cc_space_f_oo) & + !$omp private(i,u) & + !$omp default(none) + !$omp do + do i = 1, nO + do u = 1, nO + H_oo(u,i) = cc_space_f_oo(u,i) + enddo + enddo + !$omp end do + !$omp end parallel + + ! H_oo(u,i) += cc_space_w_oovv(i,j,a,b) * tau(u,j,a,b) + ! H_oo(u,i) += tau(u,j,a,b) * cc_space_w_oovv(i,j,a,b) + call dgemm('N','T', nO, nO, nO*nV*nV, & + 1d0, tau , size(tau,1), & + cc_space_w_oovv, size(cc_space_w_oovv,1), & + 1d0, H_oo , size(H_oo,1)) + +end + +! H_vv + +subroutine compute_H_vv_chol(nO,nV,t1,t2,tau,H_vv) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(out) :: H_vv(nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u, beta + + double precision, allocatable :: tmp_tau(:,:,:,:) + + allocate(tmp_tau(nV,nO,nO,nV)) + + ! H_vv(a,beta) = cc_space_f_vv(a,beta) + !$omp parallel & + !$omp shared(nV,nO,H_vv,cc_space_f_vv,tmp_tau,tau) & + !$omp private(a,beta,i,j,b) & + !$omp default(none) + !$omp do + do beta = 1, nV + do a = 1, nV + H_vv(a,beta) = cc_space_f_vv(a,beta) + enddo + enddo + !$omp end do nowait + + !$omp do + do beta = 1, nV + do j = 1, nO + do i = 1, nO + do b = 1, nV + tmp_tau(b,i,j,beta) = tau(i,j,beta,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nV,nV,nO*nO*nV, & + -1d0, cc_space_w_vvoo, size(cc_space_w_vvoo,1), & + tmp_tau , size(tmp_tau,1) * size(tmp_tau,2) * size(tmp_tau,3), & + 1d0, H_vv , size(H_vv,1)) + + deallocate(tmp_tau) + +end + +! H_vo + +subroutine compute_H_vo_chol(nO,nV,t1,t2,H_vo) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: H_vo(nV, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u, beta + + double precision, allocatable :: w(:,:,:,:) + + allocate(w(nV,nO,nO,nV)) + + !$omp parallel & + !$omp shared(nV,nO,H_vo,cc_space_f_vo,w,cc_space_w_vvoo,t1) & + !$omp private(a,beta,i,j,b) & + !$omp default(none) + !$omp do + do i = 1, nO + do a = 1, nV + H_vo(a,i) = cc_space_f_vo(a,i) + enddo + enddo + !$omp end do nowait + + ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b) + ! H_vo(a,i) = H_vo(a,i) + w(a,i,j,b) * t1(j,b) + + !$omp do + do b = 1, nV + do j = 1, nO + do i = 1, nO + do a = 1, nV + w(a,i,j,b) = cc_space_w_vvoo(a,b,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemv('N',nV*nO, nO*nV, & + 1d0, w , size(w,1) * size(w,2), & + t1 , 1, & + 1d0, H_vo, 1) + + deallocate(w) + +end + +! R2 + +subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) + double precision, intent(in) :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO) + + ! out + double precision, intent(out) :: r2(nO,nO,nV,nV), max_r2 + + ! internal + double precision, allocatable :: g_occ(:,:), g_vir(:,:), J1(:,:,:,:), K1(:,:,:,:) + double precision, allocatable :: A1(:,:,:,:) + integer :: u,v,i,j,beta,gam,a,b + + allocate(g_occ(nO,nO), g_vir(nV,nV)) + allocate(J1(nO,nV,nV,nO), K1(nO,nV,nO,nV)) + allocate(A1(nO,nO,nO,nO)) + + call compute_g_occ_chol(nO,nV,t1,t2,H_oo,g_occ) + call compute_g_vir_chol(nO,nV,t1,t2,H_vv,g_vir) + call compute_A1_chol(nO,nV,t1,t2,tau,A1) + call compute_J1_chol(nO,nV,t1,t2,cc_space_v_ovvo,cc_space_v_ovoo, & + cc_space_v_vvvo,cc_space_v_vvoo,J1) + call compute_K1_chol(nO,nV,t1,t2,cc_space_v_ovoo,cc_space_v_vvoo, & + cc_space_v_ovov,cc_space_v_vvov,K1) + + ! Residual + !r2 = 0d0 + + !$omp parallel & + !$omp shared(nO,nV,r2,cc_space_v_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = cc_space_v_oovv(u,v,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nO,nV*nV,nO*nO, & + 1d0, A1, size(A1,1) * size(A1,2), & + tau, size(tau,1) * size(tau,2), & + 1d0, r2, size(r2,1) * size(r2,2)) + + double precision, dimension(:,:,:,:), allocatable :: r2_chem, tmp, tau_chem + double precision, dimension(:,:,:,:), allocatable :: B1 + + allocate(B1(nV,nV,nV,nV)) + call compute_B1_chol(nO,nV,t1,B1,cholesky_ao_num) + call dgemm('N','N',nO*nO,nV*nV,nV*nV, & + 1d0, tau, size(tau,1) * size(tau,2), & + B1 , size(B1 ,1) * size(B1 ,2), & + 1d0, r2, size(r2 ,1) * size(r2 ,2)) + + double precision, allocatable :: X_oovv(:,:,:,:),Y_oovv(:,:,:,:) + allocate(X_oovv(nO,nO,nV,nV),Y_oovv(nO,nO,nV,nV)) + + !$omp parallel & + !$omp shared(nO,nV,t2,X_oovv) & + !$omp private(u,v,gam,a) & + !$omp default(none) + !$omp do + do a = 1, nV + do gam = 1, nV + do v = 1, nO + do u = 1, nO + X_oovv(u,v,gam,a) = t2(u,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nO*nV,nV,nV, & + 1d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), & + g_vir, size(g_vir,1), & + 0d0, Y_oovv, size(Y_oovv,1) * size(Y_oovv,2) * size(Y_oovv,3)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Y_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(u,v,beta,gam) + Y_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO,nO*nV*nV,nO, & + 1d0, g_occ , size(g_occ,1), & + t2 , size(t2,1), & + 0d0, X_oovv, size(X_oovv,1)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,beta,gam) - X_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_oovv) + + double precision, allocatable :: X_vovv(:,:,:,:) + allocate(X_vovv(nV,nO,nV,nV)) + + !$omp parallel & + !$omp shared(nO,nV,X_vovv,cc_space_v_ovvv) & + !$omp private(u,a,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do u = 1, nO + do a = 1, nV + X_vovv(a,u,beta,gam) = cc_space_v_ovvv(u,a,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO,nO*nV*nV,nV, & + 1d0, t1 , size(t1,1), & + X_vovv, size(X_vovv,1), & + 0d0, Y_oovv, size(Y_oovv,1)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Y_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(v,u,beta,gam) + Y_oovv(u,v,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + double precision, allocatable :: X_vovo(:,:,:,:), Y_vovv(:,:,:,:) + allocate(X_vovo(nV,nO,nV,nO), Y_vovv(nV,nO,nV,nV),X_oovv(nO,nO,nV,nV)) + + !$omp parallel & + !$omp shared(nO,nV,X_vovo,cc_space_v_ovov) & + !$omp private(u,v,gam,i) & + !$omp default(none) + do i = 1, nO + !$omp do + do gam = 1, nV + do u = 1, nO + do a = 1, nV + X_vovo(a,u,gam,i) = cc_space_v_ovov(u,a,i,gam) + enddo + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + + call dgemm('N','N',nV*nO*nV,nV,nO, & + 1d0, X_vovo, size(X_vovo,1) * size(X_vovo,2) * size(X_vovo,3), & + t1 , size(t1,1), & + 0d0, Y_vovv, size(Y_vovv,1) * size(Y_vovv,2) * size(Y_vovv,3)) + + call dgemm('N','N',nO,nO*nV*nV,nV, & + 1d0, t1, size(t1,1), & + Y_vovv, size(Y_vovv,1), & + 0d0, X_oovv, size(X_oovv,1)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(v,u,gam,beta) - X_oovv(u,v,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vovo,Y_vovv) + + call dgemm('N','N',nO*nO*nV,nV,nO, & + 1d0, cc_space_v_oovo, size(cc_space_v_oovo,1) * size(cc_space_v_oovo,2) * size(cc_space_v_oovo,3), & + t1 , size(t1,1), & + 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,beta,gam) - X_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + double precision, allocatable :: Y_oovo(:,:,:,:) + allocate(X_vovo(nV,nO,nV,nO), Y_oovo(nO,nO,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,X_vovo,cc_space_v_ovvo) & + !$omp private(a,v,gam,i) & + !$omp default(none) + do i = 1, nO + !$omp do + do gam = 1, nV + do v = 1, nO + do a = 1, nV + X_vovo(a,v,gam,i) = cc_space_v_ovvo(v,a,gam,i) + enddo + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + + call dgemm('N','N',nO,nO*nV*nO,nV, & + 1d0, t1, size(t1,1), & + X_vovo, size(X_vovo,1), & + 0d0, Y_oovo, size(Y_oovo,1)) + + call dgemm('N','N',nO*nO*nV, nV, nO, & + 1d0, Y_oovo, size(Y_oovo,1) * size(Y_oovo,2) * size(Y_oovo,3), & + t1 , size(t1,1), & + 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,gam,beta) - X_oovv(v,u,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vovo,Y_oovo) + + double precision, allocatable :: X_ovvo(:,:,:,:), Y_voov(:,:,:,:), Z_ovov(:,:,:,:) + allocate(X_ovvo(nO,nV,nV,nO), Y_voov(nV,nO,nO,nV),Z_ovov(nO,nV,nO,nV)) + !$omp parallel & + !$omp shared(nO,nV,X_ovvo,Y_voov,K1,J1,t2) & + !$omp private(u,v,gam,beta,i,a) & + !$omp default(none) + do i = 1, nO + !$omp do + do a = 1, nV + do beta = 1, nV + do u = 1, nO + X_ovvo(u,beta,a,i) = (J1(u,a,beta,i) - 0.5d0 * K1(u,a,i,beta)) + enddo + enddo + enddo + !$omp end do nowait + enddo + + !$omp do + do gam = 1, nV + do v = 1, nO + do i = 1, nO + do a = 1, nV + Y_voov(a,i,v,gam) = 2d0 * t2(i,v,a,gam) - t2(i,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N', nO*nV,nO*nV,nV*nO, & + 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & + Y_voov, size(Y_voov,1) * size(Y_voov,2), & + 0d0, Z_ovov, size(Z_ovov,1) * size(Z_ovov,2)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Z_ovov(u,beta,v,gam) + Z_ovov(v,gam,u,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_ovvo,Y_voov) + + double precision, allocatable :: X_ovov(:,:,:,:),Y_ovov(:,:,:,:) + allocate(X_ovov(nO,nV,nO,nV),Y_ovov(nO,nV,nO,nV)) + !$omp parallel & + !$omp shared(nO,nV,r2,K1,X_ovov,Y_ovov,t2) & + !$omp private(u,a,i,beta,gam) & + !$omp default(none) + !$omp do + do beta = 1, nV + do u = 1, nO + do a = 1, nV + do i = 1, nO + X_ovov(i,a,u,beta) = 0.5d0 * K1(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do + do gam = 1, nV + do v = 1, nO + do a = 1, nV + do i = 1, nO + Y_ovov(i,a,v,gam) = t2(i,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('T','N',nO*nV,nO*nV,nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - Z_ovov(u,beta,v,gam) - Z_ovov(v,gam,u,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + !$omp parallel & + !$omp shared(nO,nV,K1,X_ovov,Z_ovov,t2) & + !$omp private(u,v,gam,beta,i,a) & + !$omp default(none) + !$omp do + do a = 1, nV + do i = 1, nO + do gam = 1, nV + do u = 1, nO + X_ovov(u,gam,i,a) = K1(u,a,i,gam) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do + do beta = 1, nV + do v = 1, nO + do a = 1, nV + do i = 1, nO + Z_ovov(i,a,v,beta) = t2(i,v,beta,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nV,nO*nV,nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - Z_ovov(u,gam,v,beta) - Z_ovov(v,beta,u,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_ovov,Y_ovov,Z_ovov) + + ! Change the sign for consistency with the code in spin orbitals + !$omp parallel & + !$omp shared(nO,nV,r2) & + !$omp private(i,j,a,b) & + !$omp default(none) + !$omp do + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + r2(i,j,a,b) = -r2(i,j,a,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + max_r2 = 0d0 + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + max_r2 = max(r2(i,j,a,b), max_r2) + enddo + enddo + enddo + enddo + + deallocate(g_occ,g_vir,J1,K1,A1) + +end + +! A1 + +subroutine compute_A1_chol(nO,nV,t1,t2,tau,A1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(out) :: A1(nO, nO, nO, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta + + double precision, allocatable :: X_vooo(:,:,:,:), Y_oooo(:,:,:,:) + allocate(X_vooo(nV,nO,nO,nO), Y_oooo(nO,nO,nO,nO)) + + ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + !$omp parallel & + !$omp shared(nO,nV,A1,cc_space_v_oooo,cc_space_v_ovoo,X_vooo) & + !$omp private(u,v,i,j) & + !$omp default(none) + !$omp do collapse(2) + do j = 1, nO + do i = 1, nO + do v = 1, nO + do u = 1, nO + A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + enddo + enddo + enddo + enddo + !$omp end do nowait + + ! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) & + + !$omp do collapse(2) + do j = 1, nO + do i = 1, nO + do u = 1, nO + do a = 1, nV + X_vooo(a,u,i,j) = cc_space_v_ovoo(u,a,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N', nO, nO*nO*nO, nV, & + 1d0, t1 , size(t1,1), & + X_vooo, size(X_vooo,1), & + 0d0, Y_oooo, size(Y_oooo,1)) + + !$omp parallel & + !$omp shared(nO,nV,A1,Y_oooo) & + !$omp private(u,v,i,j) & + !$omp default(none) + !$omp do collapse(2) + do j = 1, nO + do i = 1, nO + do v = 1, nO + do u = 1, nO + A1(u,v,i,j) = A1(u,v,i,j) + Y_oooo(v,u,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_vooo,Y_oooo) + + ! A1(u,v,i,j) += cc_space_v_vooo(a,v,i,j) * t1(u,a) + call dgemm('N','N', nO, nO*nO*nO, nV, & + 1d0, t1 , size(t1,1), & + cc_space_v_vooo, size(cc_space_v_vooo,1), & + 1d0, A1 , size(A1,1)) + + ! A1(u,v,i,j) += cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b) + call dgemm('N','N', nO*nO, nO*nO, nV*nV, & + 1d0, tau , size(tau,1) * size(tau,2), & + cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), & + 1d0, A1 , size(A1,1) * size(A1,2)) + +end + +! B1 +subroutine compute_B1_chol(nO,nV,t1,B1,ldb) + + implicit none + + integer, intent(in) :: nO,nV,ldb + double precision, intent(in) :: t1(nO, nV) + double precision, intent(out) :: B1(nV, nV, nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + do gam = 1, nV + do beta = 1, nV + do b = 1, nV + do a = 1, nV + B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + + do i = 1, nO + B1(a,b,beta,gam) = B1(a,b,beta,gam) & + - cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & + - cc_space_v_vvov(a,b,i,gam) * t1(i,beta) + enddo + + enddo + enddo + enddo + enddo + +end + +! g_occ + +subroutine compute_g_occ_chol(nO,nV,t1,t2,H_oo,g_occ) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV), H_oo(nO, nO) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: g_occ(nO, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + call dgemm('N','N',nO,nO,nV, & + 1d0, t1, size(t1,1), & + cc_space_f_vo, size(cc_space_f_vo,1), & + 0d0, g_occ, size(g_occ,1)) + + !$omp parallel & + !$omp shared(nO,nV,g_occ,H_oo, cc_space_v_ovoo,t1) & + !$omp private(i,j,a,u) & + !$omp default(none) + !$omp do + do i = 1, nO + do u = 1, nO + g_occ(u,i) = g_occ(u,i) + H_oo(u,i) + enddo + enddo + !$omp end do + + !$omp do + do i = 1, nO + do j = 1, nO + do a = 1, nV + do u = 1, nO + g_occ(u,i) = g_occ(u,i) + (2d0 * cc_space_v_ovoo(u,a,i,j) - cc_space_v_ovoo(u,a,j,i)) * t1(j,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + +end + +! g_vir + +subroutine compute_g_vir_chol(nO,nV,t1,t2,H_vv,g_vir) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV), H_vv(nV, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: g_vir(nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + call dgemm('N','N',nV,nV,nO, & + -1d0, cc_space_f_vo , size(cc_space_f_vo,1), & + t1 , size(t1,1), & + 0d0, g_vir, size(g_vir,1)) + + !$omp parallel & + !$omp shared(nO,nV,g_vir,H_vv, cc_space_v_vvvo,t1) & + !$omp private(i,b,a,beta) & + !$omp default(none) + !$omp do + do beta = 1, nV + do a = 1, nV + g_vir(a,beta) = g_vir(a,beta) + H_vv(a,beta) + enddo + enddo + !$omp end do + + !$omp do + do beta = 1, nV + do i = 1, nO + do b = 1, nV + do a = 1, nV + g_vir(a,beta) = g_vir(a,beta) + (2d0 * cc_space_v_vvvo(a,b,beta,i) - cc_space_v_vvvo(b,a,beta,i)) * t1(i,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + +end + +! J1 + +subroutine compute_J1_chol(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: v_ovvo(nO,nV,nV,nO), v_ovoo(nO,nV,nO,nO) + double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO) + double precision, intent(out) :: J1(nO, nV, nV, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + double precision, allocatable :: X_ovoo(:,:,:,:), Y_ovov(:,:,:,:) + allocate(X_ovoo(nO,nV,nO,nO),Y_ovov(nO,nV,nO,nV)) + + !$omp parallel & + !$omp shared(nO,nV,J1,v_ovvo,v_ovoo,X_ovoo) & + !$omp private(i,j,a,u,beta) & + !$omp default(none) + do i = 1, nO + !$omp do + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = v_ovvo(u,a,beta,i) + enddo + enddo + enddo + !$omp end do nowait + enddo + + !$omp do collapse(2) + do j = 1, nO + do i = 1, nO + do a = 1, nV + do u = 1, nO + X_ovoo(u,a,i,j) = v_ovoo(u,a,j,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nV*nO,nV,nO, & + -1d0, X_ovoo, size(X_ovoo,1) * size(X_ovoo,2) * size(X_ovoo,3), & + t1 , size(t1,1), & + 0d0, Y_ovov, size(Y_ovov,1) * size(Y_ovov,2) * size(Y_ovov,3)) + + !$omp parallel & + !$omp shared(nO,nV,J1,Y_ovov) & + !$omp private(i,beta,a,u) & + !$omp default(none) + do i = 1, nO + !$omp do + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Y_ovov(u,a,i,beta) + enddo + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + deallocate(X_ovoo) + + ! v_vvvo(b,a,beta,i) * t1(u,b) + call dgemm('N','N',nO,nV*nV*nO,nV, & + 1d0, t1 , size(t1,1), & + v_vvvo, size(v_vvvo,1), & + 1d0, J1 , size(J1,1)) + + !- cc_space_v_vvoo(a,b,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) & + double precision, allocatable :: X_voov(:,:,:,:), Z_ovvo(:,:,:,:) + allocate(X_voov(nV,nO,nO,nV), Z_ovvo(nO,nV,nV,nO)) + !$omp parallel & + !$omp shared(nO,nV,t2,t1,Y_ovov,X_voov,v_vvoo) & + !$omp private(i,beta,a,u,b,j) & + !$omp default(none) + !$omp do + do b = 1, nV + do j = 1, nO + do beta = 1, nV + do u = 1, nO + Y_ovov(u,beta,j,b) = 0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do + do b = 1, nV + do j = 1, nO + do i = 1, nO + do a = 1, nV + X_voov(a,i,j,b) = v_vvoo(a,b,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','T',nO*nV,nV*nO,nO*nV, & + -1d0, Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + X_voov, size(X_voov,1) * size(X_voov,2), & + 0d0, Z_ovvo, size(Z_ovvo,1) * size(Z_ovvo,2)) + deallocate(X_voov) + + double precision, allocatable :: X_ovvo(:,:,:,:), Y_vovo(:,:,:,:) + allocate(X_ovvo(nO,nV,nV,nO),Y_vovo(nV,nO,nV,nO)) + !$omp parallel & + !$omp shared(nO,nV,J1,Z_ovvo,t2,Y_vovo,v_vvoo,X_ovvo) & + !$omp private(i,beta,a,u,j,b) & + !$omp default(none) + do i = 1, nO + !$omp do + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Z_ovvo(u,beta,a,i) + enddo + enddo + enddo + !$omp end do nowait + enddo + + !+ 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) + do j = 1, nO + !$omp do + do b = 1, nV + do i = 1, nO + do a = 1, nV + Y_vovo(a,i,b,j) = 0.5d0 * (2d0 * v_vvoo(a,b,i,j) - v_vvoo(b,a,i,j)) + enddo + enddo + enddo + !$omp end do nowait + enddo + + do j = 1, nO + !$omp do + do b = 1, nV + do beta = 1, nV + do u = 1, nO + X_ovvo(u,beta,b,j) = t2(u,j,beta,b) + enddo + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + + call dgemm('N','T',nO*nV,nV*nO,nV*nO, & + 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & + Y_vovo, size(Y_vovo,1) * size(Y_vovo,2), & + 0d0, Z_ovvo, size(Z_ovvo,1) * size(Z_ovvo,2)) + + !$omp parallel & + !$omp shared(nO,nV,J1,Z_ovvo) & + !$omp private(i,beta,a,u) & + !$omp default(none) + do i = 1, nO + !$omp do + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Z_ovvo(u,beta,a,i) + enddo + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + + deallocate(X_ovvo,Z_ovvo,Y_ovov) + +end + +! K1 + +subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: v_vvoo(nV,nV,nO,nO), v_ovov(nO,nV,nO,nV) + double precision, intent(in) :: v_vvov(nV,nV,nO,nV), v_ovoo(nO,nV,nO,nO) + double precision, intent(out) :: K1(nO, nV, nO, nV) + + double precision, allocatable :: X(:,:,:,:), Y(:,:,:,:), Z(:,:,:,:) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + allocate(X(nV,nO,nV,nO),Y(nO,nV,nV,nO),Z(nO,nV,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,K1,X,Y,v_vvoo,v_ovov,t1,t2) & + !$omp private(i,beta,a,u,j,b) & + !$omp default(none) + !$omp do + do beta = 1, nV + do i = 1, nO + do a = 1, nV + do u = 1, nO + K1(u,a,i,beta) = v_ovov(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + do i = 1, nO + !$omp do + do a = 1, nV + do j = 1, nO + do b = 1, nV + X(b,j,a,i) = - v_vvoo(b,a,i,j) + enddo + enddo + enddo + !$omp end do nowait + enddo + + do j = 1, nO + !$omp do + do b = 1, nV + do beta = 1, nV + do u = 1, nO + Y(u,beta,b,j) = 0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta) + enddo + enddo + enddo + !$omp end do + enddo + !$omp end parallel + + call dgemm('N','N',nO*nV*nO,nV,nO, & + -1d0, v_ovoo, size(v_ovoo,1) * size(v_ovoo,2) * size(v_ovoo,3), & + t1 , size(t1,1), & + 1d0, K1 , size(K1,1) * size(K1,2) * size(K1,3)) + + call dgemm('N','N',nO,nV*nO*nV,nV, & + 1d0, t1 , size(t1,1), & + v_vvov, size(v_vvov,1), & + 1d0, K1 , size(K1,1)) + + ! Y(u,beta,b,j) * X(b,j,a,i) = Z(u,beta,a,i) + call dgemm('N','N',nV*nO,nO*nV,nV*nO, & + 1d0, Y, size(Y,1) * size(Y,2), & + X, size(X,1) * size(X,2), & + 0d0, Z, size(Z,1) * size(Z,2)) + + !$omp parallel & + !$omp shared(nO,nV,K1,Z) & + !$omp private(i,beta,a,u) & + !$omp default(none) + !$omp do + do beta = 1, nV + do i = 1, nO + do a = 1, nV + do u = 1, nO + K1(u,a,i,beta) = K1(u,a,i,beta) + Z(u,beta,a,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X,Y,Z) + +end diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index b15d9745..af40e571 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -166,11 +166,9 @@ subroutine four_idx_dgemm deallocate (a1) + call map_sort(mo_integrals_map) call map_unique(mo_integrals_map) - integer*8 :: get_mo_map_size, mo_map_size - mo_map_size = get_mo_map_size() - end subroutine subroutine add_integrals_to_map(mask_ijkl) @@ -250,7 +248,7 @@ subroutine add_integrals_to_map(mask_ijkl) call wall_time(wall_1) - size_buffer = min(ao_num*ao_num*ao_num,8000000) + size_buffer = min(ao_num*ao_num,8000000) print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+& ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core' @@ -443,11 +441,6 @@ subroutine add_integrals_to_map(mask_ijkl) !$OMP END PARALLEL call map_merge(mo_integrals_map) - call wall_time(wall_2) - call cpu_time(cpu_2) - integer*8 :: get_mo_map_size, mo_map_size - mo_map_size = get_mo_map_size() - deallocate(list_ijkl) @@ -463,55 +456,55 @@ subroutine add_integrals_to_map_cholesky integer :: i,j,k,l,m integer :: size_buffer, n_integrals - size_buffer = min(mo_num*mo_num*mo_num,16000000) + size_buffer = min(mo_num*mo_num,16000000) double precision, allocatable :: Vtmp(:,:,:) integer(key_kind) , allocatable :: buffer_i(:) real(integral_kind), allocatable :: buffer_value(:) - if (.True.) then - ! In-memory transformation + call set_multiple_levels_omp(.False.) - call set_multiple_levels_omp(.False.) + !$OMP PARALLEL DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,k,l,n_integrals,buffer_value, buffer_i, Vtmp) + allocate (buffer_i(size_buffer), buffer_value(size_buffer)) + allocate (Vtmp(mo_num,mo_num,mo_num)) + n_integrals = 0 - !$OMP PARALLEL PRIVATE(i,j,k,l,n_integrals,buffer_value, buffer_i, Vtmp) - allocate (buffer_i(size_buffer), buffer_value(size_buffer)) - n_integrals = 0 + !$OMP DO SCHEDULE(dynamic) + do l=1,mo_num + call dgemm('T','N',mo_num*mo_num,mo_num,cholesky_ao_num,1.d0, & + cholesky_mo_transp, cholesky_ao_num, & + cholesky_mo_transp(1,1,l), cholesky_ao_num, 0.d0, & + Vtmp, mo_num*mo_num) - allocate (Vtmp(mo_num,mo_num,mo_num)) - - !$OMP DO - do l=1,mo_num - - call dgemm('T','N',mo_num*mo_num,mo_num,cholesky_ao_num,1.d0, & - cholesky_mo_transp, cholesky_ao_num, & - cholesky_mo_transp(1,1,l), cholesky_ao_num, 0.d0, & - Vtmp, mo_num*mo_num) - do k=1,l - do j=1,mo_num - do i=1,j - if (abs(Vtmp(i,j,k)) > mo_integrals_threshold) then - n_integrals += 1 - buffer_value(n_integrals) = Vtmp(i,j,k) - !DIR$ FORCEINLINE - call mo_two_e_integrals_index(i,k,j,l,buffer_i(n_integrals)) - if (n_integrals == size_buffer) then - call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals) - n_integrals = 0 - endif + do k=1,l + do j=1,mo_num + do i=1,j + if (dabs(Vtmp(i,j,k)) > mo_integrals_threshold) then + n_integrals = n_integrals + 1 + buffer_value(n_integrals) = Vtmp(i,j,k) + !DIR$ FORCEINLINE + call mo_two_e_integrals_index(i,k,j,l,buffer_i(n_integrals)) + if (n_integrals == size_buffer) then + call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals) + n_integrals = 0 endif - enddo + endif enddo enddo enddo - !$OMP END DO + enddo + !$OMP END DO NOWAIT + + if (n_integrals > 0) then call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals) - deallocate(buffer_i, buffer_value, Vtmp) - !$OMP END PARALLEL - - call map_unique(mo_integrals_map) - endif + deallocate(buffer_i, buffer_value, Vtmp) + !$OMP BARRIER + !$OMP END PARALLEL + + call map_sort(mo_integrals_map) + call map_unique(mo_integrals_map) end diff --git a/src/tools/four_idx_transform.irp.f b/src/tools/four_idx_transform.irp.f index 92e87cad..f7520e71 100644 --- a/src/tools/four_idx_transform.irp.f +++ b/src/tools/four_idx_transform.irp.f @@ -14,6 +14,13 @@ program four_idx_transform io_mo_two_e_integrals = 'Write' SOFT_TOUCH io_mo_two_e_integrals + if (.true.) then + PROVIDE ao_two_e_integrals_in_map + endif + if (do_ao_cholesky) then + PROVIDE cholesky_mo_transp + FREE cholesky_ao + endif if (.true.) then PROVIDE mo_two_e_integrals_in_map endif diff --git a/src/utils/fortran_mmap.c b/src/utils/fortran_mmap.c index 52df2476..71426002 100644 --- a/src/utils/fortran_mmap.c +++ b/src/utils/fortran_mmap.c @@ -22,11 +22,7 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) perror("Error opening mmap file for reading"); exit(EXIT_FAILURE); } - map = mmap(NULL, bytes, PROT_READ, MAP_SHARED | MAP_HUGETLB, fd, 0); - if (map == MAP_FAILED) { - /* try again without huge pages */ - map = mmap(NULL, bytes, PROT_READ, MAP_SHARED, fd, 0); - } + map = mmap(NULL, bytes, PROT_READ, MAP_SHARED, fd, 0); } else { @@ -53,16 +49,12 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) exit(EXIT_FAILURE); } - map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_HUGETLB, fd, 0); - if (map == MAP_FAILED) { - /* try again without huge pages */ - map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - } + map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); } if (map == MAP_FAILED) { close(fd); - printf("%s:\n", filename); + printf("%s: %lu\n", filename, bytes); perror("Error mmapping the file"); exit(EXIT_FAILURE); } diff --git a/src/utils/mmap.f90 b/src/utils/mmap.f90 index 49147283..caabc6f1 100644 --- a/src/utils/mmap.f90 +++ b/src/utils/mmap.f90 @@ -46,7 +46,14 @@ module mmap_module integer(c_size_t) :: length integer(c_int) :: fd_ - length = PRODUCT( shape(:) ) * bytes + integer :: i + + length = int(bytes,8) + do i=1,size(shape) + length = length * shape(i) + enddo +print *, 'map_length: ', length + if (read_only) then map = c_mmap_fortran( trim(filename)//char(0), length, fd_, 1) else @@ -66,7 +73,13 @@ module mmap_module integer(c_size_t) :: length integer(c_int) :: fd_ - length = PRODUCT( shape(:) ) * bytes + integer :: i + + length = int(bytes,8) + do i=1,size(shape) + length = length * shape(i) + enddo +print *, 'map_length: ', length fd_ = fd call c_munmap_fortran( length, fd_, map) end subroutine @@ -82,7 +95,13 @@ module mmap_module integer(c_size_t) :: length integer(c_int) :: fd_ - length = PRODUCT( shape(:) ) * bytes + integer :: i + + length = int(bytes,8) + do i=1,size(shape) + length = length * shape(i) + enddo +print *, 'map_length: ', length fd_ = fd call c_msync_fortran( length, fd_, map) end subroutine diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f index 2db614b4..62237229 100644 --- a/src/utils_cc/mo_integrals_cc.irp.f +++ b/src/utils_cc/mo_integrals_cc.irp.f @@ -53,33 +53,8 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v) allocate(v1(cholesky_ao_num,n1,n3), v2(cholesky_ao_num,n2,n4)) allocate(buffer(n1,n3,n2,n4)) - !$OMP PARALLEL PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k) - !$OMP DO - do i3=1,n3 - idx3 = list3(i3) - do i1=1,n1 - idx1 = list1(i1) - do k=1,cholesky_ao_num - v1(k,i1,i3) = cholesky_mo_transp(k,idx1,idx3) - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP DO - do i4=1,n4 - idx4 = list4(i4) - do i2=1,n2 - idx2 = list2(i2) - do k=1,cholesky_ao_num - v2(k,i2,i4) = cholesky_mo_transp(k,idx2,idx4) - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP BARRIER - !$OMP END PARALLEL + call gen_v_space_chol(n1,n3,list1,list3,v1,cholesky_ao_num) + call gen_v_space_chol(n2,n4,list2,list4,v2,cholesky_ao_num) call dgemm('T','N', n1*n3, n2*n4, cholesky_ao_num, 1.d0, & v1, cholesky_ao_num, & @@ -129,6 +104,30 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v) end +subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) + + implicit none + + integer, intent(in) :: n1,n3,ldv + integer, intent(in) :: list1(n1),list3(n3) + double precision, intent(out) :: v(ldv,n1,n3) + + integer :: i1,i3,idx1,idx3,k + + !$OMP PARALLEL DO PRIVATE(i1,i3,idx1,idx3,k) + do i3=1,n3 + idx3 = list3(i3) + do i1=1,n1 + idx1 = list1(i1) + do k=1,cholesky_ao_num + v(k,i1,i3) = cholesky_mo_transp(k,idx1,idx3) + enddo + enddo + enddo + !$OMP END PARALLEL DO + +end + ! full BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)] @@ -345,6 +344,38 @@ BEGIN_PROVIDER [double precision, cc_space_v_vvvv, (cc_nVa, cc_nVa, cc_nVa, cc_n END_PROVIDER +BEGIN_PROVIDER [double precision, cc_space_v_vv_chol, (cholesky_ao_num, cc_nVa, cc_nVa)] + + implicit none + + call gen_v_space_chol(cc_nVa, cc_nVa, cc_list_vir, cc_list_vir, cc_space_v_vv_chol, cholesky_ao_num) + +END_PROVIDER + +BEGIN_PROVIDER [double precision, cc_space_v_vo_chol, (cholesky_ao_num, cc_nVa, cc_nOa)] + + implicit none + + call gen_v_space_chol(cc_nVa, cc_nOa, cc_list_vir, cc_list_occ, cc_space_v_vo_chol, cholesky_ao_num) + +END_PROVIDER + +BEGIN_PROVIDER [double precision, cc_space_v_ov_chol, (cholesky_ao_num, cc_nOa, cc_nVa)] + + implicit none + + call gen_v_space_chol(cc_nOa, cc_nVa, cc_list_occ, cc_list_vir, cc_space_v_ov_chol, cholesky_ao_num) + +END_PROVIDER + +BEGIN_PROVIDER [double precision, cc_space_v_oo_chol, (cholesky_ao_num, cc_nOa, cc_nOa)] + + implicit none + + call gen_v_space_chol(cc_nOa, cc_nOa, cc_list_occ, cc_list_occ, cc_space_v_oo_chol, cholesky_ao_num) + +END_PROVIDER + ! ppqq BEGIN_PROVIDER [double precision, cc_space_v_ppqq, (cc_n_mo, cc_n_mo)] From a2c4a74d926e0017b701d1f6f510b2bf9a751f74 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 6 Jul 2023 16:21:37 +0200 Subject: [PATCH 216/337] Fixed writing MOs for large sizes' --- src/ao_two_e_ints/cholesky.irp.f | 12 ++++++++++-- src/mo_two_e_ints/cholesky.irp.f | 6 +++++- src/mo_two_e_ints/mo_bi_integrals.irp.f | 13 ++++++++++++- src/utils/map_functions.irp.f | 4 ++++ src/utils/mmap.f90 | 3 --- 5 files changed, 31 insertions(+), 7 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index f4746144..ce05de5b 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -339,8 +339,16 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] call omp_destroy_lock(lock(k)) enddo - allocate(cholesky_ao(ao_num,ao_num,rank)) - call dcopy(ndim*rank, L, 1, cholesky_ao, 1) + allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': Allocation failed' + stop -1 + endif + !$OMP PARALLEL DO PRIVATE(k) + do k=1,rank + call dcopy(ndim, L(1,k), 1, cholesky_ao(1,1,k), 1) + enddo + !$OMP END PARALLEL DO deallocate(L) cholesky_ao_num = rank diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 7cfbaa58..3a868cbe 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -26,9 +26,13 @@ BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_ao_num, mo_num, END_DOC double precision, allocatable :: X(:,:,:) + integer :: ierr print *, 'AO->MO Transformation of Cholesky vectors' - allocate(X(mo_num,cholesky_ao_num,ao_num)) + allocate(X(mo_num,cholesky_ao_num,ao_num), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': Allocation failed' + endif call dgemm('T','N', ao_num*cholesky_ao_num, mo_num, ao_num, 1.d0, & cholesky_ao, ao_num, mo_coef, ao_num, 0.d0, X, ao_num*cholesky_ao_num) call dgemm('T','N', cholesky_ao_num*mo_num, mo_num, ao_num, 1.d0, & diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index af40e571..0ed6f373 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -90,6 +90,10 @@ subroutine four_idx_dgemm double precision, allocatable :: a1(:,:,:,:) double precision, allocatable :: a2(:,:,:,:) + if (ao_num > 1289) then + print *, irp_here, ': Integer overflow in ao_num**3' + endif + allocate (a1(ao_num,ao_num,ao_num,ao_num)) print *, 'Getting AOs' @@ -103,6 +107,7 @@ subroutine four_idx_dgemm enddo !$OMP END PARALLEL DO + print *, '1st transformation' ! 1st transformation allocate (a2(ao_num,ao_num,ao_num,mo_num)) @@ -456,7 +461,7 @@ subroutine add_integrals_to_map_cholesky integer :: i,j,k,l,m integer :: size_buffer, n_integrals - size_buffer = min(mo_num*mo_num,16000000) + size_buffer = min(mo_num*mo_num*mo_num,16000000) double precision, allocatable :: Vtmp(:,:,:) integer(key_kind) , allocatable :: buffer_i(:) @@ -575,6 +580,9 @@ subroutine add_integrals_to_map_three_indices(mask_ijk) return endif + if (ao_num > 1289) then + print *, irp_here, ': Integer overflow in ao_num**3' + endif size_buffer = min(ao_num*ao_num*ao_num,16000000) print*, 'Providing the molecular integrals ' print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+& @@ -850,6 +858,9 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl) call bitstring_to_list( mask_ijkl(1,3), list_ijkl(1,3), n_k, N_int ) call bitstring_to_list( mask_ijkl(1,4), list_ijkl(1,4), n_l, N_int ) + if (ao_num > 1289) then + print *, irp_here, ': Integer overflow in ao_num**3' + endif size_buffer = min(ao_num*ao_num*ao_num,16000000) print*, 'Providing the molecular integrals ' print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+& diff --git a/src/utils/map_functions.irp.f b/src/utils/map_functions.irp.f index cd3b28a8..97d0e8bf 100644 --- a/src/utils/map_functions.irp.f +++ b/src/utils/map_functions.irp.f @@ -11,6 +11,10 @@ subroutine map_save_to_disk(filename,map) integer*8 :: n_elements n_elements = int(map % n_elements,8) + if (n_elements <= 0) then + print *, 'Unable to write map to disk: n_elements = ', n_elements + stop -1 + endif if (map % consolidated) then diff --git a/src/utils/mmap.f90 b/src/utils/mmap.f90 index caabc6f1..41e60224 100644 --- a/src/utils/mmap.f90 +++ b/src/utils/mmap.f90 @@ -52,7 +52,6 @@ module mmap_module do i=1,size(shape) length = length * shape(i) enddo -print *, 'map_length: ', length if (read_only) then map = c_mmap_fortran( trim(filename)//char(0), length, fd_, 1) @@ -79,7 +78,6 @@ print *, 'map_length: ', length do i=1,size(shape) length = length * shape(i) enddo -print *, 'map_length: ', length fd_ = fd call c_munmap_fortran( length, fd_, map) end subroutine @@ -101,7 +99,6 @@ print *, 'map_length: ', length do i=1,size(shape) length = length * shape(i) enddo -print *, 'map_length: ', length fd_ = fd call c_msync_fortran( length, fd_, map) end subroutine From e35f847341cf6094886cd675d5bf37b8e752c652 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 6 Jul 2023 17:51:59 +0200 Subject: [PATCH 217/337] Enabled direct integrals in Cholesky --- src/ao_two_e_ints/cholesky.irp.f | 43 +++++++++++++++++++------ src/ao_two_e_ints/two_e_integrals.irp.f | 3 +- src/mo_two_e_ints/mo_bi_integrals.irp.f | 4 ++- src/tools/four_idx_transform.irp.f | 7 ---- 4 files changed, 38 insertions(+), 19 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index ce05de5b..f7eae638 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -43,11 +43,15 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] double precision, external :: get_ao_two_e_integral logical, external :: ao_two_e_integral_zero + double precision, external :: ao_two_e_integral integer :: block_size, iblock, ierr integer(omp_lock_kind), allocatable :: lock(:) + PROVIDE nucl_coord - PROVIDE ao_two_e_integrals_in_map + if (.not.do_direct_integrals) then + PROVIDE ao_two_e_integrals_in_map + endif deallocate(cholesky_ao) ndim = ao_num*ao_num @@ -85,13 +89,22 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] enddo enddo - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) - do i=1,ndim - D(i) = get_ao_two_e_integral(addr(1,i), addr(1,i), & - addr(2,i), addr(2,i), & - ao_integrals_map) - enddo - !$OMP END PARALLEL DO + if (do_direct_integrals) then + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) + do i=1,ndim + D(i) = ao_two_e_integral(addr(1,i), addr(2,i), & + addr(1,i), addr(2,i)) + enddo + !$OMP END PARALLEL DO + else + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) + do i=1,ndim + D(i) = get_ao_two_e_integral(addr(1,i), addr(1,i), & + addr(2,i), addr(2,i), & + ao_integrals_map) + enddo + !$OMP END PARALLEL DO + endif Dmax = maxval(D) @@ -196,8 +209,13 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] if ((0 < q).and.(q < k)) cycle if (.not.ao_two_e_integral_zero( addr(1,Lset(k)), addr(1,Dset(m)), & addr(2,Lset(k)), addr(2,Dset(m)) ) ) then - Delta(k,m) = get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)), & + if (do_direct_integrals) then + Delta(k,m) = ao_two_e_integral(addr(1,Lset(k)), addr(2,Lset(k)), & + addr(1,Dset(m)), addr(2,Dset(m))) + else + Delta(k,m) = get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)), & addr(2,Lset(k)), addr(2,Dset(m)), ao_integrals_map) + endif if (q /= 0) Delta(q,m) = Delta(k,m) endif enddo @@ -218,8 +236,13 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] if ((0 < q).and.(q < p)) cycle if (.not.ao_two_e_integral_zero( addr(1,Dset(k)), addr(1,Dset(m)), & addr(2,Dset(k)), addr(2,Dset(m)) ) ) then - Delta(p,m) = get_ao_two_e_integral( addr(1,Dset(k)), addr(1,Dset(m)), & + if (do_direct_integrals) then + Delta(p,m) = ao_two_e_integral(addr(1,Dset(k)), addr(2,Dset(k)), & + addr(1,Dset(m)), addr(2,Dset(m))) + else + Delta(p,m) = get_ao_two_e_integral( addr(1,Dset(k)), addr(1,Dset(m)), & addr(2,Dset(k)), addr(2,Dset(m)), ao_integrals_map) + endif if (q /= 0) Delta(q,m) = Delta(p,m) if (j /= 0) Delta(p,j) = Delta(p,m) if (q*j /= 0) Delta(q,j) = Delta(p,m) diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f index 85ff5bcf..0c70aae5 100644 --- a/src/ao_two_e_ints/two_e_integrals.irp.f +++ b/src/ao_two_e_ints/two_e_integrals.irp.f @@ -1232,7 +1232,8 @@ subroutine compute_ao_integrals_jl(j,l,n_integrals,buffer_i,buffer_value) logical, external :: ao_two_e_integral_zero integer :: i,k - double precision :: ao_two_e_integral,cpu_1,cpu_2, wall_1, wall_2 + double precision, external :: ao_two_e_integral + double precision :: cpu_1,cpu_2, wall_1, wall_2 double precision :: integral, wall_0 double precision :: thr integer :: kk, m, j1, i1 diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index 0ed6f373..0d3fe176 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -37,7 +37,9 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ] call map_load_from_disk(trim(ezfio_filename)//'/work/mo_ints',mo_integrals_map) print*, 'MO integrals provided' return - else + endif + + if (.not. do_direct_integrals) then PROVIDE ao_two_e_integrals_in_map endif diff --git a/src/tools/four_idx_transform.irp.f b/src/tools/four_idx_transform.irp.f index f7520e71..92e87cad 100644 --- a/src/tools/four_idx_transform.irp.f +++ b/src/tools/four_idx_transform.irp.f @@ -14,13 +14,6 @@ program four_idx_transform io_mo_two_e_integrals = 'Write' SOFT_TOUCH io_mo_two_e_integrals - if (.true.) then - PROVIDE ao_two_e_integrals_in_map - endif - if (do_ao_cholesky) then - PROVIDE cholesky_mo_transp - FREE cholesky_ao - endif if (.true.) then PROVIDE mo_two_e_integrals_in_map endif From 905d88529f1b3a2711951cd59585dbea2b398fea Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 7 Jul 2023 17:42:20 +0200 Subject: [PATCH 218/337] Reduced memory in cholesky SCF --- src/ao_two_e_ints/cholesky.irp.f | 68 ++++++++++++++----- src/ao_two_e_ints/two_e_integrals.irp.f | 90 +++++++++++++++++++++---- src/ccsd/ccsd_t_space_orb_abc.irp.f | 2 +- src/determinants/density_matrix.irp.f | 4 +- src/determinants/h_apply.irp.f | 2 +- src/determinants/s2.irp.f | 2 +- src/hartree_fock/fock_matrix_hf.irp.f | 90 ++++++++++++++++--------- src/utils/memory.irp.f | 2 +- 8 files changed, 193 insertions(+), 67 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index f7eae638..d0fa735d 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -29,7 +29,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] double precision, pointer :: L(:,:), L_old(:,:) - double precision, parameter :: s = 1.d-1 + double precision :: s double precision, parameter :: dscale = 1.d0 double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) @@ -47,6 +47,11 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] integer :: block_size, iblock, ierr integer(omp_lock_kind), allocatable :: lock(:) + + double precision :: rss + double precision, external :: memory_of_double, memory_of_int + + PROVIDE nucl_coord if (.not.do_direct_integrals) then @@ -57,6 +62,9 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] ndim = ao_num*ao_num tau = ao_cholesky_threshold + rss = 6.d0 * memory_of_double(ndim) + & + 6.d0 * memory_of_int(ndim) + call check_mem(rss, irp_here) allocate(L(ndim,1)) @@ -97,7 +105,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] enddo !$OMP END PARALLEL DO else - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(guided) do i=1,ndim D(i) = get_ao_two_e_integral(addr(1,i), addr(1,i), & addr(2,i), addr(2,i), & @@ -130,21 +138,49 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] ! a. i = i+1 - ! b. - Dmin = max(s*Dmax,tau) + logical :: memory_ok + memory_ok = .False. - ! c. - nq=0 - LDmap = 0 - DLmap = 0 - do p=1,np - if ( D(Lset(p)) > Dmin ) then - nq = nq+1 - Dset(nq) = Lset(p) - Dset_rev(Dset(nq)) = nq - LDmap(p) = nq - DLmap(nq) = p + s = 1.d-2 + + ! Inrease s until the arrays fit in memory + do + + ! b. + Dmin = max(s*Dmax,tau) + + ! c. + nq=0 + LDmap = 0 + DLmap = 0 + do p=1,np + if ( D(Lset(p)) > Dmin ) then + nq = nq+1 + Dset(nq) = Lset(p) + Dset_rev(Dset(nq)) = nq + LDmap(p) = nq + DLmap(nq) = p + endif + enddo + + call resident_memory(rss) + rss = rss & + + np*memory_of_double(nq) & ! Delta(np,nq) + + (rank+nq)* memory_of_double(ndim) & ! L(ndim,rank+nq) + + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + ! Ltmp_q(nq,block_size) + + if (rss > qp_max_mem) then + s = s*2.d0 + else + exit endif + + if (nq == 0) then + print *, 'Not enough memory. Reduce cholesky threshold' + stop -1 + endif + enddo ! d., e. @@ -198,7 +234,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] enddo !$OMP END DO - !$OMP DO SCHEDULE(dynamic,8) + !$OMP DO SCHEDULE(guided) do m=1,nq call omp_set_lock(lock(m)) diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f index 0c70aae5..f86fb269 100644 --- a/src/ao_two_e_ints/two_e_integrals.irp.f +++ b/src/ao_two_e_ints/two_e_integrals.irp.f @@ -460,7 +460,7 @@ BEGIN_PROVIDER [ double precision, ao_two_e_integral_schwartz, (ao_num, ao_num) !$OMP PARALLEL DO PRIVATE(i,k) & !$OMP DEFAULT(NONE) & !$OMP SHARED (ao_num,ao_two_e_integral_schwartz) & - !$OMP SCHEDULE(dynamic) + !$OMP SCHEDULE(guided) do i=1,ao_num do k=1,i ao_two_e_integral_schwartz(i,k) = dsqrt(ao_two_e_integral(i,i,k,k)) @@ -975,7 +975,8 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt ! !DIR$ FORCEINLINE ! call multiply_poly(X,nx,B_10,2,d,nd) - call multiply_poly_c2(X,nx,B_10,d,nd) + !DIR$ FORCEINLINE + call multiply_poly_c2_inline_2e(X,nx,B_10,d,nd) nx = nd !DIR$ LOOP COUNT(8) @@ -998,7 +999,8 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt endif ! !DIR$ FORCEINLINE ! call multiply_poly(X,nx,B_00,2,d,nd) - call multiply_poly_c2(X,nx,B_00,d,nd) + !DIR$ FORCEINLINE + call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd) endif ny=0 @@ -1017,7 +1019,8 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt ! !DIR$ FORCEINLINE ! call multiply_poly(Y,ny,C_00,2,d,nd) - call multiply_poly_c2(Y,ny,C_00,d,nd) + !DIR$ FORCEINLINE + call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd) end recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) @@ -1057,7 +1060,8 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) ! !DIR$ FORCEINLINE ! call multiply_poly(X,nx,B_00,2,d,nd) - call multiply_poly_c2(X,nx,B_00,d,nd) + !DIR$ FORCEINLINE + call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd) ny=0 @@ -1069,7 +1073,8 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) ! !DIR$ FORCEINLINE ! call multiply_poly(Y,ny,C_00,2,d,nd) - call multiply_poly_c2(Y,ny,C_00,d,nd) + !DIR$ FORCEINLINE + call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd) end @@ -1098,7 +1103,8 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) ! !DIR$ FORCEINLINE ! call multiply_poly(X,nx,B_10,2,d,nd) - call multiply_poly_c2(X,nx,B_10,d,nd) + !DIR$ FORCEINLINE + call multiply_poly_c2_inline_2e(X,nx,B_10,d,nd) nx = nd !DIR$ LOOP COUNT(8) @@ -1118,7 +1124,8 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) ! !DIR$ FORCEINLINE ! call multiply_poly(X,nx,B_00,2,d,nd) - call multiply_poly_c2(X,nx,B_00,d,nd) + !DIR$ FORCEINLINE + call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd) ny=0 !DIR$ LOOP COUNT(8) @@ -1130,7 +1137,8 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) ! !DIR$ FORCEINLINE ! call multiply_poly(Y,ny,C_00,2,d,nd) - call multiply_poly_c2(Y,ny,C_00,d,nd) + !DIR$ FORCEINLINE + call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd) end recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) @@ -1177,9 +1185,9 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) Y(1) = D_00(1) Y(2) = D_00(2) -! !DIR$ FORCEINLINE ! call multiply_poly(Y,ny,D_00,2,d,nd) - call multiply_poly_c2(Y,ny,D_00,d,nd) + !DIR$ FORCEINLINE + call multiply_poly_c2_inline_2e(Y,ny,D_00,d,nd) return @@ -1199,7 +1207,8 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) ! !DIR$ FORCEINLINE ! call multiply_poly(X,nx,B_01,2,d,nd) - call multiply_poly_c2(X,nx,B_01,d,nd) + !DIR$ FORCEINLINE + call multiply_poly_c2_inline_2e(X,nx,B_01,d,nd) ny = 0 !DIR$ LOOP COUNT(6) @@ -1208,9 +1217,9 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) enddo call I_x2_pol_mult(c-1,B_10,B_01,B_00,C_00,D_00,Y,ny,dim) -! !DIR$ FORCEINLINE ! call multiply_poly(Y,ny,D_00,2,d,nd) - call multiply_poly_c2(Y,ny,D_00,d,nd) + !DIR$ FORCEINLINE + call multiply_poly_c2_inline_2e(Y,ny,D_00,d,nd) end select end @@ -1300,3 +1309,56 @@ subroutine multiply_poly_local(b,nb,c,nc,d,nd) end +!DIR$ FORCEINLINE +subroutine multiply_poly_c2_inline_2e(b,nb,c,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nb + integer, intent(out) :: nd + double precision, intent(in) :: b(0:nb), c(0:2) + double precision, intent(inout) :: d(0:nb+2) + + integer :: ndtmp + integer :: ib, ic, id, k + if(nb < 0) return !False if nb>=0 + + select case (nb) + case (0) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(1) * b(0) + d(2) = d(2) + c(2) * b(0) + + case (1) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(1) * b(1) + c(2) * b(0) + d(3) = d(3) + c(2) * b(1) + + case (2) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(0) * b(2) + c(1) * b(1) + c(2) * b(0) + d(3) = d(3) + c(1) * b(2) + c(2) * b(1) + d(4) = d(4) + c(2) * b(2) + + case default + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + do ib=2,nb + d(ib) = d(ib) + c(0) * b(ib) + c(1) * b(ib-1) + c(2) * b(ib-2) + enddo + d(nb+1) = d(nb+1) + c(1) * b(nb) + c(2) * b(nb-1) + d(nb+2) = d(nb+2) + c(2) * b(nb) + + end select + + do nd = nb+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index 1aab6bd7..12a71045 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -101,7 +101,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !$OMP PARALLEL PRIVATE(a,b,c,e) DEFAULT(SHARED) e = 0d0 - !$OMP DO SCHEDULE(dynamic) + !$OMP DO SCHEDULE(guided) do a = 1, nV do b = a+1, nV do c = b+1, nV diff --git a/src/determinants/density_matrix.irp.f b/src/determinants/density_matrix.irp.f index 1a1d92b5..ce4d96c2 100644 --- a/src/determinants/density_matrix.irp.f +++ b/src/determinants/density_matrix.irp.f @@ -117,7 +117,7 @@ END_PROVIDER !$OMP N_det_alpha_unique,N_det_beta_unique,irp_here) allocate(tmp_a(mo_num,mo_num,N_states), tmp_b(mo_num,mo_num,N_states) ) tmp_a = 0.d0 - !$OMP DO SCHEDULE(dynamic,64) + !$OMP DO SCHEDULE(guided) do k_a=1,N_det krow = psi_bilinear_matrix_rows(k_a) ASSERT (krow <= N_det_alpha_unique) @@ -173,7 +173,7 @@ END_PROVIDER deallocate(tmp_a) tmp_b = 0.d0 - !$OMP DO SCHEDULE(dynamic,64) + !$OMP DO SCHEDULE(guided) do k_b=1,N_det krow = psi_bilinear_matrix_transp_rows(k_b) ASSERT (krow <= N_det_alpha_unique) diff --git a/src/determinants/h_apply.irp.f b/src/determinants/h_apply.irp.f index 078c2104..65f1a832 100644 --- a/src/determinants/h_apply.irp.f +++ b/src/determinants/h_apply.irp.f @@ -250,7 +250,7 @@ subroutine remove_duplicates_in_psi_det(found_duplicates) enddo !$OMP END DO - !$OMP DO schedule(dynamic,1024) + !$OMP DO schedule(guided,64) do i=1,N_det-1 if (duplicate(i)) then cycle diff --git a/src/determinants/s2.irp.f b/src/determinants/s2.irp.f index 2c1a8757..6dc49526 100644 --- a/src/determinants/s2.irp.f +++ b/src/determinants/s2.irp.f @@ -317,7 +317,7 @@ subroutine get_uJ_s2_uI(psi_keys_tmp,psi_coefs_tmp,n,nmax_coefs,nmax_keys,s2,nst !$OMP SHARED (ll,jj,psi_keys_tmp,psi_coefs_tmp,N_int,n,nstates)& !$OMP REDUCTION(+:accu) allocate(idx(0:n)) - !$OMP DO SCHEDULE(dynamic) + !$OMP DO SCHEDULE(guided) do i = n,1,-1 ! Better OMP scheduling call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),N_int,s2_tmp) accu += psi_coefs_tmp(i,ll) * s2_tmp * psi_coefs_tmp(i,jj) diff --git a/src/hartree_fock/fock_matrix_hf.irp.f b/src/hartree_fock/fock_matrix_hf.irp.f index 8c6658c5..a5ab6a60 100644 --- a/src/hartree_fock/fock_matrix_hf.irp.f +++ b/src/hartree_fock/fock_matrix_hf.irp.f @@ -190,47 +190,75 @@ END_PROVIDER deallocate(X) - ao_two_e_integral_beta_chol = ao_two_e_integral_alpha_chol + if (elec_alpha_num > elec_beta_num) then + ao_two_e_integral_beta_chol = ao_two_e_integral_alpha_chol + endif - allocate(X2(ao_num,ao_num,cholesky_ao_num,2)) + double precision :: rss + double precision :: memory_of_double + integer :: iblock + integer, parameter :: block_size = 32 + + rss = memory_of_double(ao_num*ao_num) + call check_mem(2.d0*block_size*rss, irp_here) + allocate(X2(ao_num,ao_num,block_size,2)) + allocate(X3(ao_num,block_size,ao_num,2)) + ! ao_two_e_integral_alpha_chol (l,s) -= cholesky_ao(l,m,j) * SCF_density_matrix_ao_beta (m,n) * cholesky_ao(n,s,j) - call dgemm('N','N',ao_num,ao_num*cholesky_ao_num,ao_num, 1.d0, & - SCF_density_matrix_ao_alpha, ao_num, & - cholesky_ao, ao_num, 0.d0, & - X2(1,1,1,1), ao_num) + do iblock=1,cholesky_ao_num,block_size - call dgemm('N','N',ao_num,ao_num*cholesky_ao_num,ao_num, 1.d0, & - SCF_density_matrix_ao_beta, ao_num, & - cholesky_ao, ao_num, 0.d0, & - X2(1,1,1,2), ao_num) + call dgemm('N','N',ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size),ao_num, 1.d0, & + SCF_density_matrix_ao_alpha, ao_num, & + cholesky_ao(1,1,iblock), ao_num, 0.d0, & + X2(1,1,1,1), ao_num) - allocate(X3(ao_num,cholesky_ao_num,ao_num,2)) + if (elec_alpha_num > elec_beta_num) then + call dgemm('N','N',ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size),ao_num, 1.d0, & + SCF_density_matrix_ao_beta, ao_num, & + cholesky_ao(1,1,iblock), ao_num, 0.d0, & + X2(1,1,1,2), ao_num) + + do s=1,ao_num + do j=1,min(cholesky_ao_num-iblock+1,block_size) + do m=1,ao_num + X3(m,j,s,1) = X2(m,s,j,1) + X3(m,j,s,2) = X2(m,s,j,2) + enddo + enddo + enddo + + else + + do s=1,ao_num + do j=1,min(cholesky_ao_num-iblock+1,block_size) + do m=1,ao_num + X3(m,j,s,1) = X2(m,s,j,1) + enddo + enddo + enddo + endif + + call dgemm('N','N',ao_num,ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size), -1.d0, & + cholesky_ao(1,1,iblock), ao_num, & + X3(1,1,1,1), ao_num*block_size, 1.d0, & + ao_two_e_integral_alpha_chol, ao_num) + + if (elec_alpha_num > elec_beta_num) then + call dgemm('N','N',ao_num,ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size), -1.d0, & + cholesky_ao(1,1,iblock), ao_num, & + X3(1,1,1,2), ao_num*block_size, 1.d0, & + ao_two_e_integral_beta_chol, ao_num) + endif - do s=1,ao_num - do j=1,cholesky_ao_num - do m=1,ao_num - X3(m,j,s,1) = X2(m,s,j,1) - X3(m,j,s,2) = X2(m,s,j,2) - enddo - enddo enddo - deallocate(X2) - - call dgemm('N','N',ao_num,ao_num,ao_num*cholesky_ao_num, -1.d0, & - cholesky_ao, ao_num, & - X3(1,1,1,1), ao_num*cholesky_ao_num, 1.d0, & - ao_two_e_integral_alpha_chol, ao_num) - - call dgemm('N','N',ao_num,ao_num,ao_num*cholesky_ao_num, -1.d0, & - cholesky_ao, ao_num, & - X3(1,1,1,2), ao_num*cholesky_ao_num, 1.d0, & - ao_two_e_integral_beta_chol, ao_num) - - deallocate(X3) + if (elec_alpha_num == elec_beta_num) then + ao_two_e_integral_beta_chol = ao_two_e_integral_alpha_chol + endif + deallocate(X2,X3) END_PROVIDER diff --git a/src/utils/memory.irp.f b/src/utils/memory.irp.f index 115b2cbe..0cd2133e 100644 --- a/src/utils/memory.irp.f +++ b/src/utils/memory.irp.f @@ -5,7 +5,7 @@ BEGIN_PROVIDER [ integer, qp_max_mem ] END_DOC character*(128) :: env - qp_max_mem = 2000 + qp_max_mem = 500 call getenv('QP_MAXMEM',env) if (trim(env) /= '') then call lock_io() From 4237fa888f0f537f113825557a6f0c38c2efeaff Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 7 Jul 2023 19:05:46 +0200 Subject: [PATCH 219/337] Get total memory --- src/ao_two_e_ints/cholesky.irp.f | 15 +++++++++---- src/utils/memory.irp.f | 36 +++++++++++++++++++++++++++++++- 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index d0fa735d..4702c850 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -141,7 +141,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] logical :: memory_ok memory_ok = .False. - s = 1.d-2 + s = 0.1d0 ! Inrease s until the arrays fit in memory do @@ -176,7 +176,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] exit endif - if (nq == 0) then + if ((s > 1.d0).or.(nq == 0)) then print *, 'Not enough memory. Reduce cholesky threshold' stop -1 endif @@ -219,10 +219,15 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] stop -1 endif - Delta(:,:) = 0.d0 !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q,j) + !$OMP DO + do q=1,nq + Delta(:,q) = 0.d0 + enddo + !$OMP ENDDO NOWAIT + !$OMP DO do k=1,N do p=1,np @@ -232,7 +237,9 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] Ltmp_q(q,k) = L(Dset(q),k) enddo enddo - !$OMP END DO + !$OMP END DO NOWAIT + + !$OMP BARRIER !$OMP DO SCHEDULE(guided) do m=1,nq diff --git a/src/utils/memory.irp.f b/src/utils/memory.irp.f index 0cd2133e..7da283ec 100644 --- a/src/utils/memory.irp.f +++ b/src/utils/memory.irp.f @@ -4,8 +4,10 @@ BEGIN_PROVIDER [ integer, qp_max_mem ] ! Maximum memory in Gb END_DOC character*(128) :: env + integer, external :: get_total_available_memory - qp_max_mem = 500 + qp_max_mem = get_total_available_memory() + call write_int(6,qp_max_mem,'Total available memory (GB)') call getenv('QP_MAXMEM',env) if (trim(env) /= '') then call lock_io() @@ -122,3 +124,35 @@ subroutine print_memory_usage() '.. >>>>> [ RES MEM : ', rss , & ' GB ] [ VIRT MEM : ', mem, ' GB ] <<<<< ..' end + +integer function get_total_available_memory() result(res) + implicit none + BEGIN_DOC +! Returns the total available memory on the current machine + END_DOC + + character(len=128) :: line + integer :: status + integer :: iunit + integer*8, parameter :: KB = 1024 + integer*8, parameter :: GiB = 1024**3 + integer, external :: getUnitAndOpen + + iunit = getUnitAndOpen('/proc/meminfo','r') + + res = 512 + do + read(iunit, '(A)', END=10) line + if (line(1:10) == "MemTotal: ") then + read(line(11:), *, ERR=20) res + res = int((res*KB) / GiB,4) + exit + 20 continue + end if + end do + 10 continue + close(iunit) + +end function get_total_available_memory + + From 073aef70b8891d1027f14c8a3ca21d9261a81abe Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 7 Jul 2023 21:54:06 +0200 Subject: [PATCH 220/337] Inlined function in integrals --- src/ao_two_e_ints/two_e_integrals.irp.f | 466 ++++++++++++++++++++++-- 1 file changed, 429 insertions(+), 37 deletions(-) diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f index f86fb269..148ebb62 100644 --- a/src/ao_two_e_ints/two_e_integrals.irp.f +++ b/src/ao_two_e_ints/two_e_integrals.irp.f @@ -951,7 +951,7 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt double precision :: X(0:max_dim) double precision :: Y(0:max_dim) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y - integer :: nx, ix,iy,ny + integer :: nx, ix,iy,ny,ib ASSERT (a>2) !DIR$ LOOP COUNT(8) @@ -974,9 +974,43 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt enddo ! !DIR$ FORCEINLINE -! call multiply_poly(X,nx,B_10,2,d,nd) - !DIR$ FORCEINLINE - call multiply_poly_c2_inline_2e(X,nx,B_10,d,nd) +! call multiply_poly_c2_inline_2e(X,nx,B_10,d,nd) + if (nx >= 0) then + select case (nx) + case (0) + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(1) * X(0) + d(2) = d(2) + B_10(2) * X(0) + + case (1) + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0) + d(2) = d(2) + B_10(1) * X(1) + B_10(2) * X(0) + d(3) = d(3) + B_10(2) * X(1) + + case (2) + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0) + d(2) = d(2) + B_10(0) * X(2) + B_10(1) * X(1) + B_10(2) * X(0) + d(3) = d(3) + B_10(1) * X(2) + B_10(2) * X(1) + d(4) = d(4) + B_10(2) * X(2) + + case default + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0) + do ib=2,nx + d(ib) = d(ib) + B_10(0) * X(ib) + B_10(1) * X(ib-1) + B_10(2) * X(ib-2) + enddo + d(nx+1) = d(nx+1) + B_10(1) * X(nx) + B_10(2) * X(nx-1) + d(nx+2) = d(nx+2) + B_10(2) * X(nx) + + end select + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif nx = nd !DIR$ LOOP COUNT(8) @@ -997,10 +1031,47 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt X(ix) *= c enddo endif + ! !DIR$ FORCEINLINE -! call multiply_poly(X,nx,B_00,2,d,nd) - !DIR$ FORCEINLINE - call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd) +! call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd) + if(nx >= 0) then + + select case (nx) + case (0) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(2) * X(0) + + case (1) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(1) * X(1) + B_00(2) * X(0) + d(3) = d(3) + B_00(2) * X(1) + + case (2) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(0) * X(2) + B_00(1) * X(1) + B_00(2) * X(0) + d(3) = d(3) + B_00(1) * X(2) + B_00(2) * X(1) + d(4) = d(4) + B_00(2) * X(2) + + case default + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + do ib=2,nx + d(ib) = d(ib) + B_00(0) * X(ib) + B_00(1) * X(ib-1) + B_00(2) * X(ib-2) + enddo + d(nx+1) = d(nx+1) + B_00(1) * X(nx) + B_00(2) * X(nx-1) + d(nx+2) = d(nx+2) + B_00(2) * X(nx) + + end select + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif + endif ny=0 @@ -1018,9 +1089,45 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt endif ! !DIR$ FORCEINLINE -! call multiply_poly(Y,ny,C_00,2,d,nd) - !DIR$ FORCEINLINE - call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd) +! call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd) + if(ny >= 0) then + + select case (ny) + case (0) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(2) * Y(0) + + case (1) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(1) * Y(1) + C_00(2) * Y(0) + d(3) = d(3) + C_00(2) * Y(1) + + case (2) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(0) * Y(2) + C_00(1) * Y(1) + C_00(2) * Y(0) + d(3) = d(3) + C_00(1) * Y(2) + C_00(2) * Y(1) + d(4) = d(4) + C_00(2) * Y(2) + + case default + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + do ib=2,ny + d(ib) = d(ib) + C_00(0) * Y(ib) + C_00(1) * Y(ib-1) + C_00(2) * Y(ib-2) + enddo + d(ny+1) = d(ny+1) + C_00(1) * Y(ny) + C_00(2) * Y(ny-1) + d(ny+2) = d(ny+2) + C_00(2) * Y(ny) + + end select + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif + end recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) @@ -1037,7 +1144,7 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) double precision :: X(0:max_dim) double precision :: Y(0:max_dim) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y - integer :: nx, ix,iy,ny + integer :: nx, ix,iy,ny,ib if( (c<0).or.(nd<0) )then nd = -1 @@ -1059,9 +1166,44 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) endif ! !DIR$ FORCEINLINE -! call multiply_poly(X,nx,B_00,2,d,nd) - !DIR$ FORCEINLINE - call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd) +! call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd) + if(nx >= 0) then + + select case (nx) + case (0) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(2) * X(0) + + case (1) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(1) * X(1) + B_00(2) * X(0) + d(3) = d(3) + B_00(2) * X(1) + + case (2) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(0) * X(2) + B_00(1) * X(1) + B_00(2) * X(0) + d(3) = d(3) + B_00(1) * X(2) + B_00(2) * X(1) + d(4) = d(4) + B_00(2) * X(2) + + case default + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + do ib=2,nx + d(ib) = d(ib) + B_00(0) * X(ib) + B_00(1) * X(ib-1) + B_00(2) * X(ib-2) + enddo + d(nx+1) = d(nx+1) + B_00(1) * X(nx) + B_00(2) * X(nx-1) + d(nx+2) = d(nx+2) + B_00(2) * X(nx) + + end select + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif ny=0 @@ -1072,9 +1214,44 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) call I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in) ! !DIR$ FORCEINLINE -! call multiply_poly(Y,ny,C_00,2,d,nd) - !DIR$ FORCEINLINE - call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd) +! call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd) + if(ny >= 0) then + + select case (ny) + case (0) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(2) * Y(0) + + case (1) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(1) * Y(1) + C_00(2) * Y(0) + d(3) = d(3) + C_00(2) * Y(1) + + case (2) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(0) * Y(2) + C_00(1) * Y(1) + C_00(2) * Y(0) + d(3) = d(3) + C_00(1) * Y(2) + C_00(2) * Y(1) + d(4) = d(4) + C_00(2) * Y(2) + + case default + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + do ib=2,ny + d(ib) = d(ib) + C_00(0) * Y(ib) + C_00(1) * Y(ib-1) + C_00(2) * Y(ib-2) + enddo + d(ny+1) = d(ny+1) + C_00(1) * Y(ny) + C_00(2) * Y(ny-1) + d(ny+2) = d(ny+2) + C_00(2) * Y(ny) + + end select + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif end @@ -1092,7 +1269,7 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) double precision :: X(0:max_dim) double precision :: Y(0:max_dim) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y - integer :: nx, ix,iy,ny + integer :: nx, ix,iy,ny,ib !DIR$ LOOP COUNT(8) do ix=0,n_pt_in @@ -1102,9 +1279,44 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) call I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,X,nx,n_pt_in) ! !DIR$ FORCEINLINE -! call multiply_poly(X,nx,B_10,2,d,nd) - !DIR$ FORCEINLINE - call multiply_poly_c2_inline_2e(X,nx,B_10,d,nd) +! call multiply_poly_c2_inline_2e(X,nx,B_10,d,nd) + if(nx >= 0) then + + select case (nx) + case (0) + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(1) * X(0) + d(2) = d(2) + B_10(2) * X(0) + + case (1) + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0) + d(2) = d(2) + B_10(1) * X(1) + B_10(2) * X(0) + d(3) = d(3) + B_10(2) * X(1) + + case (2) + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0) + d(2) = d(2) + B_10(0) * X(2) + B_10(1) * X(1) + B_10(2) * X(0) + d(3) = d(3) + B_10(1) * X(2) + B_10(2) * X(1) + d(4) = d(4) + B_10(2) * X(2) + + case default + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0) + do ib=2,nx + d(ib) = d(ib) + B_10(0) * X(ib) + B_10(1) * X(ib-1) + B_10(2) * X(ib-2) + enddo + d(nx+1) = d(nx+1) + B_10(1) * X(nx) + B_10(2) * X(nx-1) + d(nx+2) = d(nx+2) + B_10(2) * X(nx) + + end select + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif nx = nd !DIR$ LOOP COUNT(8) @@ -1123,9 +1335,44 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) endif ! !DIR$ FORCEINLINE -! call multiply_poly(X,nx,B_00,2,d,nd) - !DIR$ FORCEINLINE - call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd) +! call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd) + if(nx >= 0) then + + select case (nx) + case (0) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(2) * X(0) + + case (1) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(1) * X(1) + B_00(2) * X(0) + d(3) = d(3) + B_00(2) * X(1) + + case (2) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(0) * X(2) + B_00(1) * X(1) + B_00(2) * X(0) + d(3) = d(3) + B_00(1) * X(2) + B_00(2) * X(1) + d(4) = d(4) + B_00(2) * X(2) + + case default + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + do ib=2,nx + d(ib) = d(ib) + B_00(0) * X(ib) + B_00(1) * X(ib-1) + B_00(2) * X(ib-2) + enddo + d(nx+1) = d(nx+1) + B_00(1) * X(nx) + B_00(2) * X(nx-1) + d(nx+2) = d(nx+2) + B_00(2) * X(nx) + + end select + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif ny=0 !DIR$ LOOP COUNT(8) @@ -1136,9 +1383,45 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) call I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in) ! !DIR$ FORCEINLINE -! call multiply_poly(Y,ny,C_00,2,d,nd) - !DIR$ FORCEINLINE - call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd) +! call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd) + if(ny >= 0) then + + select case (ny) + case (0) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(2) * Y(0) + + case (1) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(1) * Y(1) + C_00(2) * Y(0) + d(3) = d(3) + C_00(2) * Y(1) + + case (2) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(0) * Y(2) + C_00(1) * Y(1) + C_00(2) * Y(0) + d(3) = d(3) + C_00(1) * Y(2) + C_00(2) * Y(1) + d(4) = d(4) + C_00(2) * Y(2) + + case default + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + do ib=2,ny + d(ib) = d(ib) + C_00(0) * Y(ib) + C_00(1) * Y(ib-1) + C_00(2) * Y(ib-2) + enddo + d(ny+1) = d(ny+1) + C_00(1) * Y(ny) + C_00(2) * Y(ny-1) + d(ny+2) = d(ny+2) + C_00(2) * Y(ny) + + end select + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif + end recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) @@ -1155,7 +1438,7 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) integer :: nx, ix,ny double precision :: X(0:max_dim),Y(0:max_dim) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X, Y - integer :: i + integer :: i, ib select case (c) case (0) @@ -1185,9 +1468,46 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) Y(1) = D_00(1) Y(2) = D_00(2) -! call multiply_poly(Y,ny,D_00,2,d,nd) - !DIR$ FORCEINLINE - call multiply_poly_c2_inline_2e(Y,ny,D_00,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly_c2_inline_2e(Y,ny,D_00,d,nd) + if(ny >= 0) then + + select case (ny) + case (0) + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(1) * Y(0) + d(2) = d(2) + D_00(2) * Y(0) + + case (1) + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0) + d(2) = d(2) + D_00(1) * Y(1) + D_00(2) * Y(0) + d(3) = d(3) + D_00(2) * Y(1) + + case (2) + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0) + d(2) = d(2) + D_00(0) * Y(2) + D_00(1) * Y(1) + D_00(2) * Y(0) + d(3) = d(3) + D_00(1) * Y(2) + D_00(2) * Y(1) + d(4) = d(4) + D_00(2) * Y(2) + + case default + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0) + do ib=2,ny + d(ib) = d(ib) + D_00(0) * Y(ib) + D_00(1) * Y(ib-1) + D_00(2) * Y(ib-2) + enddo + d(ny+1) = d(ny+1) + D_00(1) * Y(ny) + D_00(2) * Y(ny-1) + d(ny+2) = d(ny+2) + D_00(2) * Y(ny) + + end select + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif + return @@ -1206,9 +1526,44 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) enddo ! !DIR$ FORCEINLINE -! call multiply_poly(X,nx,B_01,2,d,nd) - !DIR$ FORCEINLINE - call multiply_poly_c2_inline_2e(X,nx,B_01,d,nd) +! call multiply_poly_c2_inline_2e(X,nx,B_01,d,nd) + if(nx >= 0) then + + select case (nx) + case (0) + d(0) = d(0) + B_01(0) * X(0) + d(1) = d(1) + B_01(1) * X(0) + d(2) = d(2) + B_01(2) * X(0) + + case (1) + d(0) = d(0) + B_01(0) * X(0) + d(1) = d(1) + B_01(0) * X(1) + B_01(1) * X(0) + d(2) = d(2) + B_01(1) * X(1) + B_01(2) * X(0) + d(3) = d(3) + B_01(2) * X(1) + + case (2) + d(0) = d(0) + B_01(0) * X(0) + d(1) = d(1) + B_01(0) * X(1) + B_01(1) * X(0) + d(2) = d(2) + B_01(0) * X(2) + B_01(1) * X(1) + B_01(2) * X(0) + d(3) = d(3) + B_01(1) * X(2) + B_01(2) * X(1) + d(4) = d(4) + B_01(2) * X(2) + + case default + d(0) = d(0) + B_01(0) * X(0) + d(1) = d(1) + B_01(0) * X(1) + B_01(1) * X(0) + do ib=2,nx + d(ib) = d(ib) + B_01(0) * X(ib) + B_01(1) * X(ib-1) + B_01(2) * X(ib-2) + enddo + d(nx+1) = d(nx+1) + B_01(1) * X(nx) + B_01(2) * X(nx-1) + d(nx+2) = d(nx+2) + B_01(2) * X(nx) + + end select + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif ny = 0 !DIR$ LOOP COUNT(6) @@ -1217,9 +1572,46 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) enddo call I_x2_pol_mult(c-1,B_10,B_01,B_00,C_00,D_00,Y,ny,dim) -! call multiply_poly(Y,ny,D_00,2,d,nd) - !DIR$ FORCEINLINE - call multiply_poly_c2_inline_2e(Y,ny,D_00,d,nd) +! !DIR$ FORCEINLINE +! call multiply_poly_c2_inline_2e(Y,ny,D_00,d,nd) + + if(ny >= 0) then + + select case (ny) + case (0) + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(1) * Y(0) + d(2) = d(2) + D_00(2) * Y(0) + + case (1) + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0) + d(2) = d(2) + D_00(1) * Y(1) + D_00(2) * Y(0) + d(3) = d(3) + D_00(2) * Y(1) + + case (2) + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0) + d(2) = d(2) + D_00(0) * Y(2) + D_00(1) * Y(1) + D_00(2) * Y(0) + d(3) = d(3) + D_00(1) * Y(2) + D_00(2) * Y(1) + d(4) = d(4) + D_00(2) * Y(2) + + case default + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0) + do ib=2,ny + d(ib) = d(ib) + D_00(0) * Y(ib) + D_00(1) * Y(ib-1) + D_00(2) * Y(ib-2) + enddo + d(ny+1) = d(ny+1) + D_00(1) * Y(ny) + D_00(2) * Y(ny-1) + d(ny+2) = d(ny+2) + D_00(2) * Y(ny) + + end select + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif end select end From 8c4a7226cdb7b594528db269cdb39c1d556b8bc6 Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 10 Jul 2023 11:32:06 +0200 Subject: [PATCH 221/337] minor changes --- src/casscf_cipsi/casscf.irp.f | 10 +++++----- src/casscf_cipsi/save_energy.irp.f | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/casscf_cipsi/casscf.irp.f b/src/casscf_cipsi/casscf.irp.f index a2f3c5a7..02954ebf 100644 --- a/src/casscf_cipsi/casscf.irp.f +++ b/src/casscf_cipsi/casscf.irp.f @@ -17,14 +17,14 @@ end subroutine run implicit none double precision :: energy_old, energy, pt2_max_before, ept2_before,delta_E - logical :: converged,state_following_casscf_save + logical :: converged,state_following_casscf_cipsi_save integer :: iteration converged = .False. energy = 0.d0 mo_label = "MCSCF" iteration = 1 - state_following_casscf_save = state_following_casscf + state_following_casscf_cipsi_save = state_following_casscf state_following_casscf = .True. touch state_following_casscf ept2_before = 0.d0 @@ -44,8 +44,8 @@ subroutine run call write_double(6,energy,'CAS-SCF energy = ') if(n_states == 1)then double precision :: E_PT2, PT2 - call ezfio_get_casscf_energy_pt2(E_PT2) - call ezfio_get_casscf_energy(PT2) + call ezfio_get_casscf_cipsi_energy_pt2(E_PT2) + call ezfio_get_casscf_cipsi_energy(PT2) PT2 -= E_PT2 call write_double(6,E_PT2,'E + PT2 energy = ') call write_double(6,PT2,' PT2 = ') @@ -98,7 +98,7 @@ subroutine run SOFT_TOUCH pt2_max endif if(iteration .gt. 3)then - state_following_casscf = state_following_casscf_save + state_following_casscf = state_following_casscf_cipsi_save soft_touch state_following_casscf endif endif diff --git a/src/casscf_cipsi/save_energy.irp.f b/src/casscf_cipsi/save_energy.irp.f index 8729c5af..18750c3d 100644 --- a/src/casscf_cipsi/save_energy.irp.f +++ b/src/casscf_cipsi/save_energy.irp.f @@ -4,6 +4,6 @@ subroutine save_energy(E,pt2) ! Saves the energy in |EZFIO|. END_DOC double precision, intent(in) :: E(N_states), pt2(N_states) - call ezfio_set_casscf_energy(E(1:N_states)) - call ezfio_set_casscf_energy_pt2(E(1:N_states)+pt2(1:N_states)) + call ezfio_set_casscf_cipsi_energy(E(1:N_states)) + call ezfio_set_casscf_cipsi_energy_pt2(E(1:N_states)+pt2(1:N_states)) end From 9ce6eb78c84802739286092b1f3860d981bac361 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 10 Jul 2023 12:40:56 +0200 Subject: [PATCH 222/337] Cholesky in CCSD --- src/ccsd/ccsd_space_orb_sub.irp.f | 99 ++++++++++++-- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 173 +++++++++++++------------ 2 files changed, 178 insertions(+), 94 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 76c9351e..04b7e955 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -9,7 +9,7 @@ subroutine run_ccsd_space_orb double precision :: uncorr_energy,energy, max_elem, max_r, max_r1, max_r2,ta,tb logical :: not_converged - double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:) + double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:), tau_x(:,:,:,:) double precision, allocatable :: t1(:,:), r1(:,:) double precision, allocatable :: H_oo(:,:), H_vv(:,:), H_vo(:,:) @@ -18,8 +18,6 @@ subroutine run_ccsd_space_orb integer(bit_kind) :: det(N_int,2) integer :: nO, nV, nOa, nVa -! PROVIDE mo_two_e_integrals_in_map - det = psi_det(:,:,cc_ref) print*,'Reference determinant:' call print_det(det,N_int) @@ -46,6 +44,7 @@ subroutine run_ccsd_space_orb allocate(t2(nO,nO,nV,nV), r2(nO,nO,nV,nV)) allocate(tau(nO,nO,nV,nV)) + allocate(tau_x(nO,nO,nV,nV)) allocate(t1(nO,nV), r1(nO,nV)) allocate(H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO)) @@ -67,10 +66,11 @@ subroutine run_ccsd_space_orb call guess_t1(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_f_ov,t1) call guess_t2(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_v_oovv,t2) call update_tau_space(nO,nV,t1,t2,tau) + call update_tau_x_space(nO,nV,tau,tau_x) !print*,'hf_energy', hf_energy call det_energy(det,uncorr_energy) print*,'Det energy', uncorr_energy - call ccsd_energy_space(nO,nV,tau,t1,energy) + call ccsd_energy_space_x(nO,nV,tau_x,t1,energy) print*,'Guess energy', uncorr_energy+energy, energy nb_iter = 0 @@ -86,11 +86,11 @@ subroutine run_ccsd_space_orb do while (not_converged) ! Residue -! if (do_ao_cholesky) then - if (.False.) then - call compute_H_oo_chol(nO,nV,t1,t2,tau,H_oo) - call compute_H_vv_chol(nO,nV,t1,t2,tau,H_vv) - call compute_H_vo_chol(nO,nV,t1,t2,H_vo) + if (do_ao_cholesky) then +! if (.False.) then + call compute_H_oo_chol(nO,nV,tau_x,H_oo) + call compute_H_vv_chol(nO,nV,tau_x,H_vv) + call compute_H_vo_chol(nO,nV,t1,H_vo) call compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) call compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) @@ -119,9 +119,10 @@ subroutine run_ccsd_space_orb endif call update_tau_space(nO,nV,t1,t2,tau) + call update_tau_x_space(nO,nV,tau,tau_x) ! Energy - call ccsd_energy_space(nO,nV,tau,t1,energy) + call ccsd_energy_space_x(nO,nV,tau_x,t1,energy) write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' nb_iter = nb_iter + 1 @@ -249,6 +250,51 @@ subroutine ccsd_energy_space(nO,nV,tau,t1,energy) end +subroutine ccsd_energy_space_x(nO,nV,tau_x,t1,energy) + + implicit none + + integer, intent(in) :: nO, nV + double precision, intent(in) :: tau_x(nO,nO,nV,nV) + double precision, intent(in) :: t1(nO,nV) + double precision, intent(out) :: energy + + ! internal + integer :: i,j,a,b + double precision :: e + + energy = 0d0 + !$omp parallel & + !$omp shared(nO,nV,energy,tau_x,t1,& + !$omp cc_space_f_vo,cc_space_v_oovv) & + !$omp private(i,j,a,b,e) & + !$omp default(none) + e = 0d0 + !$omp do + do a = 1, nV + do i = 1, nO + e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a) + enddo + enddo + !$omp end do nowait + !$omp do + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + e = e + tau_x(i,j,a,b) * cc_space_v_oovv(i,j,a,b) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp critical + energy = energy + e + !$omp end critical + !$omp end parallel + +end + ! Tau subroutine update_tau_space(nO,nV,t1,t2,tau) @@ -284,6 +330,39 @@ subroutine update_tau_space(nO,nV,t1,t2,tau) end +subroutine update_tau_x_space(nO,nV,tau,tau_x) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: tau(nO,nO,nV,nV) + + ! out + double precision, intent(out) :: tau_x(nO,nO,nV,nV) + + ! internal + integer :: i,j,a,b + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tau,tau_x) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + tau_x(i,j,a,b) = 2.d0*tau(i,j,a,b) - tau(i,j,b,a) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + ! R1 subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index 190c163b..0b9e123e 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -276,64 +276,88 @@ end ! H_oo -subroutine compute_H_oo_chol(nO,nV,t1,t2,tau,H_oo) +subroutine compute_H_oo_chol(nO,nV,tau_x,H_oo) implicit none integer, intent(in) :: nO,nV - double precision, intent(in) :: t1(nO, nV) - double precision, intent(in) :: t2(nO, nO, nV, nV) - double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(in) :: tau_x(nO, nO, nV, nV) double precision, intent(out) :: H_oo(nO, nO) - integer :: a,tmp_a,k,b,l,c,d,tmp_c,tmp_d,i,j,u + integer :: a,b,i,j,u,k - ! H_oo(u,i) = cc_space_f_oo(u,i) + double precision, allocatable :: tau_kau(:,:,:), tmp_vov(:,:,:) + + allocate(tau_kau(cholesky_ao_num,nV,nO)) !$omp parallel & - !$omp shared(nO,H_oo,cc_space_f_oo) & - !$omp private(i,u) & - !$omp default(none) + !$omp default(shared) & + !$omp private(i,u,j,k,a,b,tmp_vov) + allocate(tmp_vov(nV,nO,nV) ) + !$omp do + do u = 1, nO + do b=1,nV + do j=1,nO + do a=1,nV + tmp_vov(a,j,b) = tau_x(u,j,a,b) + enddo + enddo + enddo + call dgemm('N','T',cholesky_ao_num,nV,nO*nV,1.d0, & + cc_space_v_ov_chol, cholesky_ao_num, tmp_vov, nV, & + 0.d0, tau_kau(1,1,u), cholesky_ao_num) + enddo + !$omp end do nowait + deallocate(tmp_vov) !$omp do do i = 1, nO do u = 1, nO H_oo(u,i) = cc_space_f_oo(u,i) enddo enddo - !$omp end do - !$omp end parallel - - ! H_oo(u,i) += cc_space_w_oovv(i,j,a,b) * tau(u,j,a,b) - ! H_oo(u,i) += tau(u,j,a,b) * cc_space_w_oovv(i,j,a,b) - call dgemm('N','T', nO, nO, nO*nV*nV, & - 1d0, tau , size(tau,1), & - cc_space_w_oovv, size(cc_space_w_oovv,1), & - 1d0, H_oo , size(H_oo,1)) + !$omp end do nowait + !$omp barrier + !$omp end parallel + call dgemm('T', 'N', nO, nO, cholesky_ao_num*nV, 1.d0, & + tau_kau, cholesky_ao_num*nV, cc_space_v_vo_chol, cholesky_ao_num*nV, & + 1.d0, H_oo, nO) end ! H_vv -subroutine compute_H_vv_chol(nO,nV,t1,t2,tau,H_vv) +subroutine compute_H_vv_chol(nO,nV,tau_x,H_vv) implicit none integer, intent(in) :: nO,nV - double precision, intent(in) :: t1(nO, nV) - double precision, intent(in) :: t2(nO, nO, nV, nV) - double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(in) :: tau_x(nO, nO, nV, nV) double precision, intent(out) :: H_vv(nV, nV) - integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u, beta + integer :: a,b,i,j,u,k, beta - double precision, allocatable :: tmp_tau(:,:,:,:) + double precision, allocatable :: tau_kia(:,:,:), tmp_oov(:,:,:) - allocate(tmp_tau(nV,nO,nO,nV)) - - ! H_vv(a,beta) = cc_space_f_vv(a,beta) + allocate(tau_kia(cholesky_ao_num,nO,nV)) !$omp parallel & - !$omp shared(nV,nO,H_vv,cc_space_f_vv,tmp_tau,tau) & - !$omp private(a,beta,i,j,b) & - !$omp default(none) + !$omp default(shared) & + !$omp private(i,beta,j,k,a,b,tmp_oov) + allocate(tmp_oov(nO,nO,nV) ) + !$omp do + do a = 1, nV + do b=1,nV + do j=1,nO + do i=1,nO + tmp_oov(i,j,b) = tau_x(i,j,a,b) + enddo + enddo + enddo + call dgemm('N','T',cholesky_ao_num,nO,nO*nV,1.d0, & + cc_space_v_ov_chol, cholesky_ao_num, tmp_oov, nO, & + 0.d0, tau_kia(1,1,a), cholesky_ao_num) + enddo + !$omp end do nowait + deallocate(tmp_oov) + !$omp do do beta = 1, nV do a = 1, nV @@ -341,83 +365,64 @@ subroutine compute_H_vv_chol(nO,nV,t1,t2,tau,H_vv) enddo enddo !$omp end do nowait - - !$omp do - do beta = 1, nV - do j = 1, nO - do i = 1, nO - do b = 1, nV - tmp_tau(b,i,j,beta) = tau(i,j,beta,b) - enddo - enddo - enddo - enddo - !$omp end do - !$omp end parallel - - call dgemm('N','N',nV,nV,nO*nO*nV, & - -1d0, cc_space_w_vvoo, size(cc_space_w_vvoo,1), & - tmp_tau , size(tmp_tau,1) * size(tmp_tau,2) * size(tmp_tau,3), & - 1d0, H_vv , size(H_vv,1)) - - deallocate(tmp_tau) + !$omp barrier + !$omp end parallel + call dgemm('T', 'N', nV, nV, cholesky_ao_num*nO, -1.d0, & + tau_kia, cholesky_ao_num*nO, cc_space_v_ov_chol, cholesky_ao_num*nO, & + 1.d0, H_vv, nV) end ! H_vo - -subroutine compute_H_vo_chol(nO,nV,t1,t2,H_vo) +subroutine compute_H_vo_chol(nO,nV,t1,H_vo) implicit none integer, intent(in) :: nO,nV double precision, intent(in) :: t1(nO, nV) - double precision, intent(in) :: t2(nO, nO, nV, nV) double precision, intent(out) :: H_vo(nV, nO) - integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u, beta + integer :: a,b,i,j,u,k - double precision, allocatable :: w(:,:,:,:) - - allocate(w(nV,nO,nO,nV)) - - !$omp parallel & - !$omp shared(nV,nO,H_vo,cc_space_f_vo,w,cc_space_w_vvoo,t1) & - !$omp private(a,beta,i,j,b) & - !$omp default(none) - !$omp do - do i = 1, nO - do a = 1, nV + double precision, allocatable :: tmp_k(:), tmp(:,:,:), tmp2(:,:,:) + do i=1,nO + do a=1,nV H_vo(a,i) = cc_space_f_vo(a,i) enddo enddo - !$omp end do nowait - ! H_vo(a,i) = H_vo(a,i) + cc_space_w_vvoo(a,b,i,j) * t1(j,b) - ! H_vo(a,i) = H_vo(a,i) + w(a,i,j,b) * t1(j,b) + allocate(tmp_k(cholesky_ao_num)) + call dgemm('N', 'N', cholesky_ao_num, 1, nO*nV, 2.d0, & + cc_space_v_ov_chol, cholesky_ao_num, & + t1, nO*nV, 0.d0, tmp_k, cholesky_ao_num) - !$omp do - do b = 1, nV - do j = 1, nO - do i = 1, nO - do a = 1, nV - w(a,i,j,b) = cc_space_w_vvoo(a,b,i,j) - enddo + call dgemm('T','N',nV*nO,1,cholesky_ao_num,1.d0, & + cc_space_v_vo_chol, cholesky_ao_num, tmp_k, cholesky_ao_num, 1.d0, & + H_vo, nV*nO) + deallocate(tmp_k) + + allocate(tmp(cholesky_ao_num,nO,nO)) + allocate(tmp2(cholesky_ao_num,nO,nO)) + + call dgemm('N','T', cholesky_ao_num*nO, nO, nV, 1.d0, & + cc_space_v_ov_chol, cholesky_ao_num*nO, t1, nO, 0.d0, tmp, cholesky_ao_num*nO) + + do i=1,nO + do j=1,nO + do k=1,cholesky_ao_num + tmp2(k,j,i) = tmp(k,i,j) enddo enddo enddo - !$omp end do - !$omp end parallel + deallocate(tmp) - call dgemv('N',nV*nO, nO*nV, & - 1d0, w , size(w,1) * size(w,2), & - t1 , 1, & - 1d0, H_vo, 1) - - deallocate(w) + call dgemm('T','N', nV, nO, cholesky_ao_num*nO, -1.d0, & + cc_space_v_ov_chol, cholesky_ao_num*nO, tmp2, cholesky_ao_num*nO, & + 1.d0, H_vo, nV) end + ! R2 subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) @@ -1015,7 +1020,7 @@ subroutine compute_B1_chol(nO,nV,t1,B1,ldb) - cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & - cc_space_v_vvov(a,b,i,gam) * t1(i,beta) enddo - + enddo enddo enddo From 2d383d09c6381f4a421d39a45e27abe36fe99133 Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 10 Jul 2023 12:10:53 +0200 Subject: [PATCH 223/337] routine htilde_mu_mat_opt_bi_ortho works --- src/bi_ort_ints/three_body_ijmk.irp.f | 7 ++ src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f | 93 +++++++++++++++++++++++++ src/tc_bi_ortho/slater_tc_3e_slow.irp.f | 2 +- src/tc_bi_ortho/test_tc_fock.irp.f | 9 +-- src/tc_scf/fock_tc.irp.f | 24 +++---- 5 files changed, 114 insertions(+), 21 deletions(-) diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f index 0c02e4c5..669861b7 100644 --- a/src/bi_ort_ints/three_body_ijmk.irp.f +++ b/src/bi_ort_ints/three_body_ijmk.irp.f @@ -86,6 +86,13 @@ tmp_loc_1 = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) tmp_loc_2 = tmp_aux_2(ipoint,n) + tmp1(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,1,k,i) * tmp_loc_2 + tmp1(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,2,k,i) * tmp_loc_2 + tmp1(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,3,k,i) * tmp_loc_2 + tmp1(ipoint,4,n) = int2_grad1_u12_bimo_t(ipoint,1,n,n) * int2_grad1_u12_bimo_t(ipoint,1,k,i) & + + int2_grad1_u12_bimo_t(ipoint,2,n,n) * int2_grad1_u12_bimo_t(ipoint,2,k,i) & + + int2_grad1_u12_bimo_t(ipoint,3,n,n) * int2_grad1_u12_bimo_t(ipoint,3,k,i) + enddo enddo !$OMP END DO diff --git a/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f b/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f index 1d1b26cc..8524253a 100644 --- a/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f +++ b/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f @@ -90,3 +90,96 @@ subroutine htcdag_bi_ortho_calc_tdav_slow(v, u, N_st, sze) end +subroutine i_H_tc_psi_phi(key,keys,coef_l,coef_r,Nint,Ndet,Ndet_max,Nstate,i_H_chi_array,i_H_phi_array) + use bitmasks + implicit none + BEGIN_DOC +! Computes $\langle i|H|Phi \rangle = \sum_J c^R_J \langle i | H | J \rangle$. +! +! AND $\langle Chi|H| i \rangle = \sum_J c^L_J \langle J | H | i \rangle$. +! +! CONVENTION: i_H_phi_array(0) = total matrix element, +! +! i_H_phi_array(1) = one-electron matrix element, +! +! i_H_phi_array(2) = two-electron matrix element, +! +! i_H_phi_array(3) = three-electron matrix element, +! +! Uses filter_connected_i_H_psi0 to get all the $|J \rangle$ to which $|i \rangle$ +! is connected. +! +! The i_H_psi_minilist is much faster but requires to build the +! minilists. + END_DOC + integer, intent(in) :: Nint, Ndet,Ndet_max,Nstate + integer(bit_kind), intent(in) :: keys(Nint,2,Ndet) + integer(bit_kind), intent(in) :: key(Nint,2) + double precision, intent(in) :: coef_l(Ndet_max,Nstate),coef_r(Ndet_max,Nstate) + double precision, intent(out) :: i_H_chi_array(0:3,Nstate),i_H_phi_array(0:3,Nstate) + + integer :: i, ii,j + double precision :: phase + integer :: exc(0:2,2,2) + double precision :: hmono, htwoe, hthree, htot + integer, allocatable :: idx(:) + + ASSERT (Nint > 0) + ASSERT (N_int == Nint) + ASSERT (Nstate > 0) + ASSERT (Ndet > 0) + ASSERT (Ndet_max >= Ndet) + allocate(idx(0:Ndet)) + + i_H_chi_array = 0.d0 + i_H_phi_array = 0.d0 + + call filter_connected_i_H_psi0(keys,key,Nint,Ndet,idx) + if (Nstate == 1) then + + do ii=1,idx(0) + i = idx(ii) + ! computes + !DIR$ FORCEINLINE + call htilde_mu_mat_opt_bi_ortho(keys(1,1,i), key, Nint, hmono, htwoe, hthree, htot) + i_H_chi_array(0,1) = i_H_chi_array(0,1) + coef_l(i,1)*htot + i_H_chi_array(1,1) = i_H_chi_array(1,1) + coef_l(i,1)*hmono + i_H_chi_array(2,1) = i_H_chi_array(2,1) + coef_l(i,1)*htwoe + i_H_chi_array(3,1) = i_H_chi_array(3,1) + coef_l(i,1)*hthree + ! computes + !DIR$ FORCEINLINE + call htilde_mu_mat_opt_bi_ortho(key,keys(1,1,i), Nint, hmono, htwoe, hthree, htot) + i_H_phi_array(0,1) = i_H_phi_array(0,1) + coef_r(i,1)*htot + i_H_phi_array(1,1) = i_H_phi_array(1,1) + coef_r(i,1)*hmono + i_H_phi_array(2,1) = i_H_phi_array(2,1) + coef_r(i,1)*htwoe + i_H_phi_array(3,1) = i_H_phi_array(3,1) + coef_r(i,1)*hthree + enddo + + else + + do ii=1,idx(0) + i = idx(ii) + ! computes + !DIR$ FORCEINLINE + call htilde_mu_mat_opt_bi_ortho(keys(1,1,i), key, Nint, hmono, htwoe, hthree, htot) + do j = 1, Nstate + i_H_chi_array(0,j) = i_H_chi_array(0,j) + coef_l(i,j)*htot + i_H_chi_array(1,j) = i_H_chi_array(1,j) + coef_l(i,j)*hmono + i_H_chi_array(2,j) = i_H_chi_array(2,j) + coef_l(i,j)*htwoe + i_H_chi_array(3,j) = i_H_chi_array(3,j) + coef_l(i,j)*hthree + enddo + ! computes + !DIR$ FORCEINLINE + call htilde_mu_mat_opt_bi_ortho(key,keys(1,1,i), Nint, hmono, htwoe, hthree, htot) + do j = 1, Nstate + i_H_phi_array(0,j) = i_H_phi_array(0,j) + coef_r(i,j)*htot + i_H_phi_array(1,j) = i_H_phi_array(1,j) + coef_r(i,j)*hmono + i_H_phi_array(2,j) = i_H_phi_array(2,j) + coef_r(i,j)*htwoe + i_H_phi_array(3,j) = i_H_phi_array(3,j) + coef_r(i,j)*hthree + enddo + enddo + + endif + +end + diff --git a/src/tc_bi_ortho/slater_tc_3e_slow.irp.f b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f index 35abbbc4..cb33d343 100644 --- a/src/tc_bi_ortho/slater_tc_3e_slow.irp.f +++ b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f @@ -184,7 +184,7 @@ subroutine single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) ii = occ(i,s1) do j = i+1, Ne(s1) jj = occ(j,s1) -! ref = sym_3_e_int_from_6_idx_tensor(jj,ii,p1,jj,ii,h1) +! !ref = sym_3_e_int_from_6_idx_tensor(jj,ii,p1,jj,ii,h1) hthree += three_e_single_parrallel_spin(jj,ii,p1,h1) ! USES THE 4-IDX TENSOR enddo enddo diff --git a/src/tc_bi_ortho/test_tc_fock.irp.f b/src/tc_bi_ortho/test_tc_fock.irp.f index 182c03d7..f1a7cc0a 100644 --- a/src/tc_bi_ortho/test_tc_fock.irp.f +++ b/src/tc_bi_ortho/test_tc_fock.irp.f @@ -152,9 +152,7 @@ subroutine routine_tot() ! do i = 1, elec_num_tab(s1) ! do a = elec_num_tab(s1)+1, mo_num ! virtual do i = 1, elec_beta_num - do a = elec_beta_num+1, elec_alpha_num! virtual -! do i = elec_beta_num+1, elec_alpha_num -! do a = elec_alpha_num+1, mo_num! virtual + do a = elec_beta_num+1, mo_num! virtual print*,i,a det_i = ref_bitmask @@ -167,7 +165,7 @@ subroutine routine_tot() call htilde_mu_mat_bi_ortho_slow(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) print*,htilde_ij - if(dabs(htilde_ij).lt.1.d-10)cycle +! if(dabs(htilde_ij).lt.1.d-10)cycle print*, ' excited det' call debug_det(det_i, N_int) @@ -184,9 +182,12 @@ subroutine routine_tot() ! endif err_ai = dabs(dabs(ref) - dabs(new)) if(err_ai .gt. 1d-7) then + print*,'---------' print*,'s1 = ',s1 print*, ' warning on', i, a print*, ref,new,err_ai + print*,hmono, htwoe, hthree + print*,'---------' endif print*, ref,new,err_ai err_tot += err_ai diff --git a/src/tc_scf/fock_tc.irp.f b/src/tc_scf/fock_tc.irp.f index 0ae515bb..f4553f3e 100644 --- a/src/tc_scf/fock_tc.irp.f +++ b/src/tc_scf/fock_tc.irp.f @@ -208,10 +208,10 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num) ] if(three_body_h_tc) then !call wall_time(tt0) - !PROVIDE fock_a_tot_3e_bi_orth - !Fock_matrix_tc_mo_alpha += fock_a_tot_3e_bi_orth - PROVIDE fock_3e_uhf_mo_a - Fock_matrix_tc_mo_alpha += fock_3e_uhf_mo_a + PROVIDE fock_a_tot_3e_bi_orth + Fock_matrix_tc_mo_alpha += fock_a_tot_3e_bi_orth +! PROVIDE fock_3e_uhf_mo_a +! Fock_matrix_tc_mo_alpha += fock_3e_uhf_mo_a !call wall_time(tt1) !print*, ' 3-e term:', tt1-tt0 endif @@ -241,21 +241,13 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_beta, (mo_num,mo_num) ] if(bi_ortho) then - !allocate(tmp(ao_num,ao_num)) - !tmp = Fock_matrix_tc_ao_beta - !if(three_body_h_tc) then - ! tmp += fock_3e_uhf_ao_b - !endif - !call ao_to_mo_bi_ortho(tmp, size(tmp, 1), Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1)) - !deallocate(tmp) - call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) & , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) ) if(three_body_h_tc) then - !PROVIDE fock_b_tot_3e_bi_orth - !Fock_matrix_tc_mo_beta += fock_b_tot_3e_bi_orth - PROVIDE fock_3e_uhf_mo_b - Fock_matrix_tc_mo_beta += fock_3e_uhf_mo_b + PROVIDE fock_b_tot_3e_bi_orth + Fock_matrix_tc_mo_beta += fock_b_tot_3e_bi_orth +! PROVIDE fock_3e_uhf_mo_b +! Fock_matrix_tc_mo_beta += fock_3e_uhf_mo_b endif else From 44956060e7321b8bb76a0b13e83ae254039a8fa1 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 10 Jul 2023 17:06:34 +0200 Subject: [PATCH 224/337] Removed vvv arrays --- src/ccsd/ccsd_space_orb_sub.irp.f | 52 +++-- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 310 ++++++++++++++----------- 2 files changed, 209 insertions(+), 153 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 04b7e955..35e14313 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -92,7 +92,7 @@ subroutine run_ccsd_space_orb call compute_H_vv_chol(nO,nV,tau_x,H_vv) call compute_H_vo_chol(nO,nV,t1,H_vo) - call compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) call compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) else call compute_H_oo(nO,nV,t1,t2,tau,H_oo) @@ -538,25 +538,16 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) ! enddo ! enddo !enddo + + integer :: iblock, block_size, nVmax double precision, allocatable :: W_vvov(:,:,:,:), T_vvoo(:,:,:,:) - allocate(W_vvov(nV,nV,nO,nV), T_vvoo(nV,nV,nO,nO)) + block_size = 8 + allocate(W_vvov(nV,nV,nO,block_size), T_vvoo(nV,nV,nO,nO)) !$omp parallel & !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau) & !$omp private(b,beta,i,a) & !$omp default(none) - !$omp do - do beta = 1, nV - do i = 1, nO - do b = 1, nV - do a = 1, nV - W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) - enddo - enddo - enddo - enddo - !$omp end do nowait - !$omp do do u = 1, nO do i = 1, nO @@ -570,13 +561,35 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp end do nowait !$omp end parallel - call dgemm('T','N',nO,nV,nO*nV*nV, & - 1d0, T_vvoo, size(T_vvoo,1) * size(T_vvoo,2) * size(T_vvoo,3), & - W_vvov, size(W_vvov,1) * size(W_vvov,2) * size(W_vvov,3), & - 1d0, r1 , size(r1,1)) + do iblock = 1, nV, block_size + nVmax = min(block_size,nV-iblock+1) + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau,nVmax,iblock) & + !$omp private(b,i,a,beta) & + !$omp default(none) + !$omp do collapse(2) + do beta = iblock, iblock + nVmax - 1 + do i = 1, nO + do b = 1, nV + do a = 1, nV + W_vvov(a,b,i,beta-iblock+1) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp end parallel + + call dgemm('T','N',nO,nVmax,nO*nV*nV, & + 1d0, T_vvoo, nV*nV*nO, & + W_vvov, nO*nV*nV, & + 1d0, r1(1,iblock), nO) + enddo deallocate(W_vvov,T_vvoo) + + ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) ! r1(u,beta) = r1(u,beta) - W(i,j,a,u) * tau(i,j,a,beta) !do beta = 1, nV @@ -1640,11 +1653,12 @@ subroutine compute_B1_gam(nO,nV,t1,t2,B1,gam) ! enddo double precision, allocatable :: X_vvvo(:,:,:), Y_vvvv(:,:,:) + allocate(X_vvvo(nV,nV,nO), Y_vvvv(nV,nV,nV)) ! ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) call gen_v_space(cc_nVa,cc_nVa,cc_nVa,1, & - cc_list_vir,cc_list_vir,cc_list_vir,(/ cc_list_vir(gam) /), B1) + cc_list_vir,cc_list_vir,cc_list_vir,cc_list_vir(gam), B1) !$omp parallel & diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index 0b9e123e..50f5f603 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -185,25 +185,15 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) deallocate(X_ovov) + integer :: iblock, block_size, nVmax double precision, allocatable :: W_vvov(:,:,:,:), T_vvoo(:,:,:,:) - allocate(W_vvov(nV,nV,nO,nV), T_vvoo(nV,nV,nO,nO)) + block_size = 8 + allocate(W_vvov(nV,nV,nO,block_size), T_vvoo(nV,nV,nO,nO)) !$omp parallel & !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau) & !$omp private(b,beta,i,a) & !$omp default(none) - !$omp do - do beta = 1, nV - do i = 1, nO - do b = 1, nV - do a = 1, nV - W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) - enddo - enddo - enddo - enddo - !$omp end do nowait - !$omp do do u = 1, nO do i = 1, nO @@ -217,10 +207,30 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp end do nowait !$omp end parallel - call dgemm('T','N',nO,nV,nO*nV*nV, & - 1d0, T_vvoo, size(T_vvoo,1) * size(T_vvoo,2) * size(T_vvoo,3), & - W_vvov, size(W_vvov,1) * size(W_vvov,2) * size(W_vvov,3), & - 1d0, r1 , size(r1,1)) + do iblock = 1, nV, block_size + nVmax = min(block_size,nV-iblock+1) + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau,nVmax,iblock) & + !$omp private(b,i,a,beta) & + !$omp default(none) + !$omp do collapse(2) + do beta = iblock, iblock + nVmax - 1 + do i = 1, nO + do b = 1, nV + do a = 1, nV + W_vvov(a,b,i,beta-iblock+1) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp end parallel + + call dgemm('T','N',nO,nVmax,nO*nV*nV, & + 1d0, T_vvoo, nV*nV*nO, & + W_vvov, nO*nV*nV, & + 1d0, r1(1,iblock), nO) + enddo deallocate(W_vvov,T_vvoo) @@ -450,7 +460,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) call compute_g_vir_chol(nO,nV,t1,t2,H_vv,g_vir) call compute_A1_chol(nO,nV,t1,t2,tau,A1) call compute_J1_chol(nO,nV,t1,t2,cc_space_v_ovvo,cc_space_v_ovoo, & - cc_space_v_vvvo,cc_space_v_vvoo,J1) + cc_space_v_vvoo,J1) call compute_K1_chol(nO,nV,t1,t2,cc_space_v_ovoo,cc_space_v_vvoo, & cc_space_v_ovov,cc_space_v_vvov,K1) @@ -479,15 +489,54 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) tau, size(tau,1) * size(tau,2), & 1d0, r2, size(r2,1) * size(r2,2)) - double precision, dimension(:,:,:,:), allocatable :: r2_chem, tmp, tau_chem - double precision, dimension(:,:,:,:), allocatable :: B1 + integer :: block_size, iblock, k + block_size = 16 + double precision, dimension(:,:,:), allocatable :: B1, tmp_cc, tmpB1 - allocate(B1(nV,nV,nV,nV)) - call compute_B1_chol(nO,nV,t1,B1,cholesky_ao_num) - call dgemm('N','N',nO*nO,nV*nV,nV*nV, & + allocate(tmp_cc(cholesky_ao_num,nV,nV)) + call dgemm('N','N', cholesky_ao_num*nV, nV, nO, 1.d0, & + cc_space_v_vo_chol, cholesky_ao_num*nV, t1, nO, 0.d0, tmp_cc, cholesky_ao_num*nV) + + !$OMP PARALLEL PRIVATE(gam, iblock, B1, tmpB1, beta, b, a) + allocate(B1(nV,nV,block_size), tmpB1(nV,block_size,nV)) + !$OMP DO + do gam = 1, nV + do iblock = 1, nV, block_size + call dgemm('T', 'N', nV*min(block_size, nV-iblock+1), nV, cholesky_ao_num, & + -1.d0, cc_space_v_vv_chol(1,1,iblock), cholesky_ao_num, & + tmp_cc(1,1,gam), cholesky_ao_num, 0.d0, tmpB1, nV*block_size) + + call dgemm('T', 'N', nV*min(block_size, nV-iblock+1), nV, cholesky_ao_num, & + -1.d0, tmp_cc(1,1,iblock), cholesky_ao_num, & + cc_space_v_vv_chol(1,1,gam), cholesky_ao_num, 1.d0, tmpB1, nV*block_size) + + call dgemm('T','N', nV*min(block_size, nV-iblock+1), nV, cholesky_ao_num, 1.d0, & + cc_space_v_vv_chol(1,1,iblock), cholesky_ao_num, & + cc_space_v_vv_chol(1,1,gam), cholesky_ao_num, 1.d0, & + tmpB1, nV*block_size) + + do beta = iblock, min(nV, iblock+block_size-1) + do b = 1, nV + do a = 1, nV + B1(a,b,beta-iblock+1) = tmpB1(a,beta-iblock+1,b) + enddo + enddo + enddo + + call dgemm('N','N',nO*nO,min(block_size, nV-iblock+1),nV*nV, & 1d0, tau, size(tau,1) * size(tau,2), & B1 , size(B1 ,1) * size(B1 ,2), & - 1d0, r2, size(r2 ,1) * size(r2 ,2)) + 1d0, r2(1,1,iblock,gam), size(r2 ,1) * size(r2 ,2)) + enddo + + enddo + !$OMP ENDDO + + deallocate(B1, tmpB1) + !$OMP END PARALLEL + + deallocate(tmp_cc) + double precision, allocatable :: X_oovv(:,:,:,:),Y_oovv(:,:,:,:) allocate(X_oovv(nO,nO,nV,nV),Y_oovv(nO,nO,nV,nV)) @@ -556,29 +605,21 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) deallocate(X_oovv) double precision, allocatable :: X_vovv(:,:,:,:) - allocate(X_vovv(nV,nO,nV,nV)) - !$omp parallel & - !$omp shared(nO,nV,X_vovv,cc_space_v_ovvv) & - !$omp private(u,a,gam,beta) & - !$omp default(none) - !$omp do - do gam = 1, nV - do beta = 1, nV - do u = 1, nO - do a = 1, nV - X_vovv(a,u,beta,gam) = cc_space_v_ovvv(u,a,beta,gam) - enddo - enddo + allocate(X_vovv(nV,nO,nV,block_size)) + do iblock = 1, nV, block_size + do gam = iblock, min(nV, iblock+block_size-1) + call dgemm('T','N',nV, nO*nV, cholesky_ao_num, 1.d0, & + cc_space_v_vv_chol(1,1,gam), cholesky_ao_num, cc_space_v_ov_chol, & + cholesky_ao_num, 0.d0, X_vovv(1,1,1,gam-iblock+1), nV) + enddo - enddo - !$omp end do - !$omp end parallel - - call dgemm('N','N',nO,nO*nV*nV,nV, & + call dgemm('N','N',nO,nO*nV*min(block_size, nV-iblock+1),nV, & 1d0, t1 , size(t1,1), & X_vovv, size(X_vovv,1), & - 0d0, Y_oovv, size(Y_oovv,1)) + 0d0, Y_oovv(1,1,1,iblock), size(Y_oovv,1)) + + enddo !$omp parallel & !$omp shared(nO,nV,r2,Y_oovv) & @@ -597,38 +638,27 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end do !$omp end parallel - double precision, allocatable :: X_vovo(:,:,:,:), Y_vovv(:,:,:,:) - allocate(X_vovo(nV,nO,nV,nO), Y_vovv(nV,nO,nV,nV),X_oovv(nO,nO,nV,nV)) + double precision, allocatable :: X_ovvo(:,:,:,:) + double precision, allocatable :: tcc(:,:,:), tcc2(:,:,:) + allocate(tcc2(cholesky_ao_num,nV,nO), X_ovvo(nO,nV,nV,nO)) + allocate(tcc(cholesky_ao_num,nO,nV)) + + call dgemm('N','T', cholesky_ao_num*nV, nO, nV, 1.d0, & + cc_space_v_vv_chol, cholesky_ao_num*nV, t1, nO, & + 0.d0, tcc2, cholesky_ao_num*nV) + + call dgemm('N','N', cholesky_ao_num*nO, nV, nO, 1.d0, & + cc_space_v_oo_chol, cholesky_ao_num*nO, t1, nO, & + 0.d0, tcc, cholesky_ao_num*nO) + + call dgemm('T','N', nO*nV, nV*nO, cholesky_ao_num, 1.d0, & + tcc, cholesky_ao_num, tcc2, cholesky_ao_num, 0.d0, & + X_ovvo, nO*nV) + + deallocate(tcc, tcc2) !$omp parallel & - !$omp shared(nO,nV,X_vovo,cc_space_v_ovov) & - !$omp private(u,v,gam,i) & - !$omp default(none) - do i = 1, nO - !$omp do - do gam = 1, nV - do u = 1, nO - do a = 1, nV - X_vovo(a,u,gam,i) = cc_space_v_ovov(u,a,i,gam) - enddo - enddo - enddo - !$omp end do nowait - enddo - !$omp end parallel - - call dgemm('N','N',nV*nO*nV,nV,nO, & - 1d0, X_vovo, size(X_vovo,1) * size(X_vovo,2) * size(X_vovo,3), & - t1 , size(t1,1), & - 0d0, Y_vovv, size(Y_vovv,1) * size(Y_vovv,2) * size(Y_vovv,3)) - - call dgemm('N','N',nO,nO*nV*nV,nV, & - 1d0, t1, size(t1,1), & - Y_vovv, size(Y_vovv,1), & - 0d0, X_oovv, size(X_oovv,1)) - - !$omp parallel & - !$omp shared(nO,nV,r2,X_oovv) & + !$omp shared(nO,nV,r2,X_ovvo) & !$omp private(u,v,gam,beta) & !$omp default(none) !$omp do @@ -636,7 +666,18 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) do beta = 1, nV do v = 1, nO do u = 1, nO - r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(v,u,gam,beta) - X_oovv(u,v,beta,gam) + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_ovvo(u,beta,gam,v) + enddo + enddo + enddo + enddo + !$omp end do + !$omp do + do beta = 1, nV + do gam = 1, nV + do v = 1, nO + do u = 1, nO + r2(v,u,gam,beta) = r2(v,u,gam,beta) - X_ovvo(u,beta,gam,v) enddo enddo enddo @@ -644,7 +685,9 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end do !$omp end parallel - deallocate(X_vovo,Y_vovv) + deallocate(X_ovvo) + !----- + allocate(X_oovv(nO,nO,nV,nV)) call dgemm('N','N',nO*nO*nV,nV,nO, & 1d0, cc_space_v_oovo, size(cc_space_v_oovo,1) * size(cc_space_v_oovo,2) * size(cc_space_v_oovo,3), & @@ -668,7 +711,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end do !$omp end parallel - double precision, allocatable :: Y_oovo(:,:,:,:) + double precision, allocatable :: X_vovo(:,:,:,:), Y_oovo(:,:,:,:) allocate(X_vovo(nV,nO,nV,nO), Y_oovo(nO,nO,nV,nO)) !$omp parallel & @@ -717,7 +760,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) deallocate(X_vovo,Y_oovo) - double precision, allocatable :: X_ovvo(:,:,:,:), Y_voov(:,:,:,:), Z_ovov(:,:,:,:) + double precision, allocatable :: Y_voov(:,:,:,:), Z_ovov(:,:,:,:) allocate(X_ovvo(nO,nV,nV,nO), Y_voov(nV,nO,nO,nV),Z_ovov(nO,nV,nO,nV)) !$omp parallel & !$omp shared(nO,nV,X_ovvo,Y_voov,K1,J1,t2) & @@ -772,8 +815,9 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) deallocate(X_ovvo,Y_voov) - double precision, allocatable :: X_ovov(:,:,:,:),Y_ovov(:,:,:,:) + double precision, allocatable :: Y_ovov(:,:,:,:), X_ovov(:,:,:,:) allocate(X_ovov(nO,nV,nO,nV),Y_ovov(nO,nV,nO,nV)) + !$omp parallel & !$omp shared(nO,nV,r2,K1,X_ovov,Y_ovov,t2) & !$omp private(u,a,i,beta,gam) & @@ -998,36 +1042,6 @@ subroutine compute_A1_chol(nO,nV,t1,t2,tau,A1) end -! B1 -subroutine compute_B1_chol(nO,nV,t1,B1,ldb) - - implicit none - - integer, intent(in) :: nO,nV,ldb - double precision, intent(in) :: t1(nO, nV) - double precision, intent(out) :: B1(nV, nV, nV, nV) - - integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam - - do gam = 1, nV - do beta = 1, nV - do b = 1, nV - do a = 1, nV - B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) - - do i = 1, nO - B1(a,b,beta,gam) = B1(a,b,beta,gam) & - - cc_space_v_vvvo(a,b,beta,i) * t1(i,gam) & - - cc_space_v_vvov(a,b,i,gam) * t1(i,beta) - enddo - - enddo - enddo - enddo - enddo - -end - ! g_occ subroutine compute_g_occ_chol(nO,nV,t1,t2,H_oo,g_occ) @@ -1091,44 +1105,52 @@ subroutine compute_g_vir_chol(nO,nV,t1,t2,H_vv,g_vir) t1 , size(t1,1), & 0d0, g_vir, size(g_vir,1)) - !$omp parallel & - !$omp shared(nO,nV,g_vir,H_vv, cc_space_v_vvvo,t1) & - !$omp private(i,b,a,beta) & - !$omp default(none) - !$omp do + double precision, allocatable :: tmp_k(:), tmp_vo(:,:,:), tmp_vo2(:,:,:) + allocate(tmp_k(cholesky_ao_num)) + call dgemm('N','N', cholesky_ao_num, 1, nO*nV, 1.d0, & + cc_space_v_ov_chol, cholesky_ao_num, t1, nO*nV, 0.d0, tmp_k, cholesky_ao_num) + + call dgemm('T','N', nV*nV, 1, cholesky_ao_num, 2.d0, & + cc_space_v_vv_chol, cholesky_ao_num, tmp_k, cholesky_ao_num, 1.d0, & + g_vir, nV*nV) + deallocate(tmp_k) + + allocate(tmp_vo(cholesky_ao_num,nV,nO)) + call dgemm('N','T',cholesky_ao_num*nV, nO, nV, 1.d0, & + cc_space_v_vv_chol, cholesky_ao_num*nV, t1, nO, 0.d0, tmp_vo, cholesky_ao_num*nV) + + allocate(tmp_vo2(cholesky_ao_num,nO,nV)) + do beta=1,nV + do i=1,nO + do k=1,cholesky_ao_num + tmp_vo2(k,i,beta) = -tmp_vo(k,beta,i) + enddo + enddo + enddo + deallocate(tmp_vo) + do beta = 1, nV do a = 1, nV g_vir(a,beta) = g_vir(a,beta) + H_vv(a,beta) enddo enddo - !$omp end do - !$omp do - do beta = 1, nV - do i = 1, nO - do b = 1, nV - do a = 1, nV - g_vir(a,beta) = g_vir(a,beta) + (2d0 * cc_space_v_vvvo(a,b,beta,i) - cc_space_v_vvvo(b,a,beta,i)) * t1(i,b) - enddo - enddo - enddo - enddo - !$omp end do - !$omp end parallel + call dgemm('T','N', nV, nV, nO*cholesky_ao_num, 1.d0, & + cc_space_v_ov_chol, cholesky_ao_num*nO, & + tmp_vo2, cholesky_ao_num*nO, 1.d0, g_vir, nV) end ! J1 -subroutine compute_J1_chol(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) - +subroutine compute_J1_chol(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvoo,J1) implicit none integer, intent(in) :: nO,nV double precision, intent(in) :: t1(nO, nV) double precision, intent(in) :: t2(nO, nO, nV, nV) double precision, intent(in) :: v_ovvo(nO,nV,nV,nO), v_ovoo(nO,nV,nO,nO) - double precision, intent(in) :: v_vvvo(nV,nV,nV,nO), v_vvoo(nV,nV,nO,nO) + double precision, intent(in) :: v_vvoo(nV,nV,nO,nO) double precision, intent(out) :: J1(nO, nV, nV, nO) integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam @@ -1188,11 +1210,31 @@ subroutine compute_J1_chol(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvvo,v_vvoo,J1) !$omp end parallel deallocate(X_ovoo) - ! v_vvvo(b,a,beta,i) * t1(u,b) - call dgemm('N','N',nO,nV*nV*nO,nV, & - 1d0, t1 , size(t1,1), & - v_vvvo, size(v_vvvo,1), & - 1d0, J1 , size(J1,1)) + double precision, allocatable :: tmp_cc(:,:,:), J1_tmp(:,:,:,:) + allocate(tmp_cc(cholesky_ao_num,nV,nO), J1_tmp(nV,nO,nV,nO)) + + call dgemm('N','T', cholesky_ao_num*nV, nO, nV, 1.d0, & + cc_space_v_vv_chol, cholesky_ao_num*nV, & + t1, nO, & + 0.d0, tmp_cc, cholesky_ao_num*nV) + + call dgemm('T','N', nV*nO, nV*nO, cholesky_ao_num, 1.d0, & + tmp_cc, cholesky_ao_num, cc_space_v_vo_chol, cholesky_ao_num, & + 0.d0, J1_tmp, nV*nO) + + deallocate(tmp_cc) + + do i=1,nO + do b=1,nV + do a=1,nV + do u=1,nO + J1(u,a,b,i) = J1(u,a,b,i) + J1_tmp(b,u,a,i) + enddo + enddo + enddo + enddo + + deallocate(J1_tmp) !- cc_space_v_vvoo(a,b,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) & double precision, allocatable :: X_voov(:,:,:,:), Z_ovvo(:,:,:,:) From 8729a7ca451bde897be57d61358643bb3cd84229 Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 10 Jul 2023 23:24:12 +0200 Subject: [PATCH 225/337] inactive --> virtual one-e term gradient ok --- src/casscf_tc_bi/grad_dm.irp.f | 78 +++++++++++++++++++++++ src/casscf_tc_bi/grad_old.irp.f | 109 ++++++++++++++++++++++++++++++++ src/casscf_tc_bi/gradient.irp.f | 94 +++++++++++++++++++++++++++ 3 files changed, 281 insertions(+) create mode 100644 src/casscf_tc_bi/grad_dm.irp.f create mode 100644 src/casscf_tc_bi/grad_old.irp.f create mode 100644 src/casscf_tc_bi/gradient.irp.f diff --git a/src/casscf_tc_bi/grad_dm.irp.f b/src/casscf_tc_bi/grad_dm.irp.f new file mode 100644 index 00000000..0fc2e4eb --- /dev/null +++ b/src/casscf_tc_bi/grad_dm.irp.f @@ -0,0 +1,78 @@ + BEGIN_PROVIDER [real*8, gradvec_tc_r, (0:3,nMonoEx)] +&BEGIN_PROVIDER [real*8, gradvec_tc_l, (0:3,nMonoEx)] + implicit none + integer :: ii,tt,aa,indx + integer :: i,t,a,fff + double precision :: res_l(0:3), res_r(0:3) + gradvec_tc_l = 0.d0 + gradvec_tc_r = 0.d0 + do i=1,n_core_inact_orb + ii=list_core_inact(i) + do t=1,n_act_orb + tt=list_act(t) + indx = mat_idx_c_a(i,t) + call gradvec_tc_it(ii,tt,res_l) + call gradvec_tc_it(tt,ii,res_r) + do fff = 0,3 + gradvec_tc_l(fff,indx)=res_l(fff) + gradvec_tc_r(fff,indx)=res_r(fff) + enddo + end do + end do + + do i=1,n_core_inact_orb + ii=list_core_inact(i) + do a=1,n_virt_orb + indx = mat_idx_c_v(i,a) + aa=list_virt(a) + call gradvec_tc_ia(ii,aa,res_l) + call gradvec_tc_ia(aa,ii,res_r) + do fff = 0,3 + gradvec_tc_l(fff,indx)=res_l(fff) + gradvec_tc_r(fff,indx)=res_r(fff) + enddo + end do + end do + + do t=1,n_act_orb + do a=1,n_virt_orb + indx = mat_idx_a_v(i,a) +! gradvec_tc_l(indx)=gradvec_ta(t,a) + end do + end do +END_PROVIDER + +subroutine gradvec_tc_ia(i,a,res) + implicit none + BEGIN_DOC +! doubly occupied --> virtual TC gradient +! +! Corresponds to + END_DOC + integer, intent(in) :: i,a + double precision, intent(out) :: res(0:3) + res = 0.d0 + res(1) = -2 * mo_bi_ortho_tc_one_e(i,a) + +end + +subroutine gradvec_tc_it(i,t,res) + implicit none + BEGIN_DOC +! doubly occupied --> active TC gradient +! +! Corresponds to + END_DOC + integer, intent(in) :: i,t + double precision, intent(out) :: res(0:3) + integer :: rr,r,ss,s + double precision :: dm + res = 0.d0 + res(1) = -2 * mo_bi_ortho_tc_one_e(i,t) + do rr = 1, n_act_orb + r = list_act(rr) + dm = tc_transition_matrix_mo(t,r,1,1) + res(1) += mo_bi_ortho_tc_one_e(i,r) * dm + enddo + +end diff --git a/src/casscf_tc_bi/grad_old.irp.f b/src/casscf_tc_bi/grad_old.irp.f new file mode 100644 index 00000000..6610dee3 --- /dev/null +++ b/src/casscf_tc_bi/grad_old.irp.f @@ -0,0 +1,109 @@ + + BEGIN_PROVIDER [real*8, gradvec_detail_right_old, (0:3,nMonoEx)] +&BEGIN_PROVIDER [real*8, gradvec_detail_left_old, (0:3,nMonoEx)] + BEGIN_DOC + ! calculate the orbital gradient by hand, i.e. for + ! each determinant I we determine the string E_pq |I> (alpha and beta + ! separately) and generate + ! sum_I c_I is then the pq component of the orbital + ! gradient + ! E_pq = a^+_pa_q + a^+_Pa_Q + END_DOC + implicit none + integer :: ii,tt,aa,indx,ihole,ipart,istate,ll + real*8 :: res_l(0:3), res_r(0:3) + + do ii = 1, n_core_inact_orb + ihole = list_core_inact(ii) + do aa = 1, n_virt_orb + ipart = list_virt(aa) + indx = mat_idx_c_v(ii,aa) + call calc_grad_elem_h_tc(ihole,ipart,res_l, res_r) + do ll = 0, 3 + gradvec_detail_left_old (ll,indx)=res_l(ll) + gradvec_detail_right_old(ll,indx)=res_r(ll) + enddo + enddo + enddo +! do indx=1,nMonoEx +! ihole=excit(1,indx) +! ipart=excit(2,indx) +! call calc_grad_elem_h_tc(ihole,ipart,res_l, res_r) +! do ll = 0, 3 +! gradvec_detail_left_old (ll,indx)=res_l(ll) +! gradvec_detail_right_old(ll,indx)=res_r(ll) +! enddo +! end do + + real*8 :: norm_grad_left, norm_grad_right + norm_grad_left=0.d0 + norm_grad_right=0.d0 + do indx=1,nMonoEx + norm_grad_left+=gradvec_detail_left_old(0,indx)*gradvec_detail_left_old(0,indx) + norm_grad_right+=gradvec_detail_right_old(0,indx)*gradvec_detail_right_old(0,indx) + end do + norm_grad_left=sqrt(norm_grad_left) + norm_grad_right=sqrt(norm_grad_right) +! if (bavard) then + write(6,*) + write(6,*) ' Norm of the LEFT orbital gradient (via <0|EH|0>) : ', norm_grad_left + write(6,*) ' Norm of the RIGHT orbital gradient (via <0|HE|0>) : ', norm_grad_right + write(6,*) +! endif + + +END_PROVIDER + +subroutine calc_grad_elem_h_tc(ihole,ipart,res_l, res_r) + BEGIN_DOC + ! eq 18 of Siegbahn et al, Physica Scripta 1980 + ! we calculate res_l = , and res_r = + ! q=hole, p=particle + ! res_l(0) = total matrix element + ! res_l(1) = one-electron part + ! res_l(2) = two-electron part + ! res_l(3) = three-electron part + END_DOC + implicit none + integer, intent(in) :: ihole,ipart + double precision, intent(out) :: res_l(0:3), res_r(0:3) + integer :: mu,iii,ispin,ierr,nu,istate,ll + integer(bit_kind), allocatable :: det_mu(:,:),det_mu_ex(:,:) + real*8 :: i_H_chi_array(0:3,N_states),i_H_phi_array(0:3,N_states),phase + allocate(det_mu(N_int,2)) + allocate(det_mu_ex(N_int,2)) + + res_l=0.D0 + res_r=0.D0 + +! print*,'in i_h_psi' +! print*,ihole,ipart + do mu=1,n_det + ! get the string of the determinant + call det_extract(det_mu,mu,N_int) + do ispin=1,2 + ! do the monoexcitation on it + call det_copy(det_mu,det_mu_ex,N_int) + call do_signed_mono_excitation(det_mu,det_mu_ex,nu & + ,ihole,ipart,ispin,phase,ierr) + if (ierr.eq.1) then + + call i_H_tc_psi_phi(det_mu_ex,psi_det,psi_l_coef_bi_ortho,psi_r_coef_bi_ortho,N_int & + ,N_det,N_det,N_states,i_H_chi_array,i_H_phi_array) +! print*,i_H_chi_array(1,1),i_H_phi_array(1,1) + do istate=1,N_states + do ll = 0,3 + res_l(ll)+=i_H_chi_array(ll,istate)*psi_r_coef_bi_ortho(mu,istate)*phase + res_r(ll)+=i_H_phi_array(ll,istate)*psi_l_coef_bi_ortho(mu,istate)*phase + enddo + end do + end if + end do + end do + + ! state-averaged gradient + res_l*=1.d0/dble(N_states) + res_r*=1.d0/dble(N_states) + +end + diff --git a/src/casscf_tc_bi/gradient.irp.f b/src/casscf_tc_bi/gradient.irp.f new file mode 100644 index 00000000..630bd891 --- /dev/null +++ b/src/casscf_tc_bi/gradient.irp.f @@ -0,0 +1,94 @@ +use bitmasks + +BEGIN_PROVIDER [ integer, nMonoEx ] + BEGIN_DOC + ! Number of single excitations + END_DOC + implicit none + nMonoEx=n_core_inact_orb*n_act_orb+n_core_inact_orb*n_virt_orb+n_act_orb*n_virt_orb +END_PROVIDER + + BEGIN_PROVIDER [integer, n_c_a_prov] +&BEGIN_PROVIDER [integer, n_c_v_prov] +&BEGIN_PROVIDER [integer, n_a_v_prov] + implicit none + n_c_a_prov = n_core_inact_orb * n_act_orb + n_c_v_prov = n_core_inact_orb * n_virt_orb + n_a_v_prov = n_act_orb * n_virt_orb + END_PROVIDER + + BEGIN_PROVIDER [integer, excit, (2,nMonoEx)] +&BEGIN_PROVIDER [character*3, excit_class, (nMonoEx)] +&BEGIN_PROVIDER [integer, list_idx_c_a, (3,n_c_a_prov) ] +&BEGIN_PROVIDER [integer, list_idx_c_v, (3,n_c_v_prov) ] +&BEGIN_PROVIDER [integer, list_idx_a_v, (3,n_a_v_prov) ] +&BEGIN_PROVIDER [integer, mat_idx_c_a, (n_core_inact_orb,n_act_orb) +&BEGIN_PROVIDER [integer, mat_idx_c_v, (n_core_inact_orb,n_virt_orb) +&BEGIN_PROVIDER [integer, mat_idx_a_v, (n_act_orb,n_virt_orb) + BEGIN_DOC + ! a list of the orbitals involved in the excitation + END_DOC + + implicit none + integer :: i,t,a,ii,tt,aa,indx,indx_tmp + indx=0 + indx_tmp = 0 + do ii=1,n_core_inact_orb + i=list_core_inact(ii) + do tt=1,n_act_orb + t=list_act(tt) + indx+=1 + excit(1,indx)=i + excit(2,indx)=t + excit_class(indx)='c-a' + indx_tmp += 1 + list_idx_c_a(1,indx_tmp) = indx + list_idx_c_a(2,indx_tmp) = ii + list_idx_c_a(3,indx_tmp) = tt + mat_idx_c_a(ii,tt) = indx + end do + end do + + indx_tmp = 0 + do ii=1,n_core_inact_orb + i=list_core_inact(ii) + do aa=1,n_virt_orb + a=list_virt(aa) + indx+=1 + excit(1,indx)=i + excit(2,indx)=a + excit_class(indx)='c-v' + indx_tmp += 1 + list_idx_c_v(1,indx_tmp) = indx + list_idx_c_v(2,indx_tmp) = ii + list_idx_c_v(3,indx_tmp) = aa + mat_idx_c_v(ii,aa) = indx + end do + end do + + indx_tmp = 0 + do tt=1,n_act_orb + t=list_act(tt) + do aa=1,n_virt_orb + a=list_virt(aa) + indx+=1 + excit(1,indx)=t + excit(2,indx)=a + excit_class(indx)='a-v' + indx_tmp += 1 + list_idx_a_v(1,indx_tmp) = indx + list_idx_a_v(2,indx_tmp) = tt + list_idx_a_v(3,indx_tmp) = aa + mat_idx_a_v(tt,aa) = indx + end do + end do + +! if (bavard) then + write(6,*) ' Filled the table of the Monoexcitations ' + do indx=1,nMonoEx + write(6,*) ' ex ',indx,' : ',excit(1,indx),' -> ' & + ,excit(2,indx),' ',excit_class(indx) + end do +! end if + +END_PROVIDER From b4a2e9bd7648bb11f3dc379cd33673fe74a65102 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 10 Jul 2023 23:32:05 +0200 Subject: [PATCH 226/337] Fixed cholesky for tiny thresholds --- src/ao_two_e_ints/cholesky.irp.f | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 4702c850..128aa483 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -134,17 +134,14 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] i = 0 ! 5. - do while (Dmax > tau) + do while ( (Dmax > tau).and.(rank < ndim) ) ! a. i = i+1 - logical :: memory_ok - memory_ok = .False. - s = 0.1d0 ! Inrease s until the arrays fit in memory - do + do ! b. Dmin = max(s*Dmax,tau) @@ -153,6 +150,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] nq=0 LDmap = 0 DLmap = 0 + Dset_rev = 0 do p=1,np if ( D(Lset(p)) > Dmin ) then nq = nq+1 @@ -180,7 +178,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] print *, 'Not enough memory. Reduce cholesky threshold' stop -1 endif - + enddo ! d., e. @@ -197,11 +195,11 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] do k=1,rank L(:,k) = L_old(:,k) enddo - !$OMP END PARALLEL DO + !$OMP END PARALLEL DO deallocate(L_old) - allocate(Delta(np,nq), stat=ierr) + allocate(Delta(np,nq), stat=ierr) if (ierr /= 0) then print *, irp_here, ': allocation failed : (Delta(np,nq))' stop -1 @@ -228,7 +226,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] enddo !$OMP ENDDO NOWAIT - !$OMP DO + !$OMP DO do k=1,N do p=1,np Ltmp_p(p,k) = L(Lset(p),k) @@ -364,7 +362,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] Ltmp_q(q,iblock) = L(Dset(q), rank) enddo !$OMP END DO - + !$OMP END PARALLEL Qmax = D(Dset(1)) @@ -381,7 +379,7 @@ BEGIN_PROVIDER [ integer, cholesky_ao_num ] deallocate(Ltmp_q, stat=ierr) ! i. - N = N+j + N = rank ! j. Dmax = D(Lset(1)) From 326dbe77408eecd626e57608aeb43d9f5d597114 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 10 Jul 2023 23:32:43 +0200 Subject: [PATCH 227/337] Removed vvov --- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index 50f5f603..b804792f 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -1409,11 +1409,23 @@ subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) t1 , size(t1,1), & 1d0, K1 , size(K1,1) * size(K1,2) * size(K1,3)) - call dgemm('N','N',nO,nV*nO*nV,nV, & - 1d0, t1 , size(t1,1), & - v_vvov, size(v_vvov,1), & - 1d0, K1 , size(K1,1)) + double precision, allocatable :: K1tmp(:,:,:,:), t1v(:,:,:) + allocate(K1tmp(nO,nO,nV,nV), t1v(cholesky_ao_num,nO,nO)) +! call dgemm('N','N',nO,nV*nO*nV,nV, & +! 1d0, t1 , size(t1,1), & +! v_vvov, size(v_vvov,1), & +! 1d0, K1 , size(K1,1)) + + call dgemm('N','T', cholesky_ao_num*nO, nO, nV, 1.d0, & + cc_space_v_ov_chol, cholesky_ao_num*nO, t1, nO, 0.d0, & + t1v, cholesky_ao_num*nO) + + call dgemm('T','N', nO*nO, nV*nV, cholesky_ao_num, 1.d0, & + t1v, cholesky_ao_num, cc_space_v_vv_chol, cholesky_ao_num, 0.d0, & + K1tmp, nO*nO) + + deallocate(t1v) ! Y(u,beta,b,j) * X(b,j,a,i) = Z(u,beta,a,i) call dgemm('N','N',nV*nO,nO*nV,nV*nO, & 1d0, Y, size(Y,1) * size(Y,2), & @@ -1421,7 +1433,7 @@ subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) 0d0, Z, size(Z,1) * size(Z,2)) !$omp parallel & - !$omp shared(nO,nV,K1,Z) & + !$omp shared(nO,nV,K1,Z,K1tmp) & !$omp private(i,beta,a,u) & !$omp default(none) !$omp do @@ -1429,7 +1441,7 @@ subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) do i = 1, nO do a = 1, nV do u = 1, nO - K1(u,a,i,beta) = K1(u,a,i,beta) + Z(u,beta,a,i) + K1(u,a,i,beta) = K1(u,a,i,beta) + K1tmp(u,i,a,beta) + Z(u,beta,a,i) enddo enddo enddo @@ -1437,6 +1449,6 @@ subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) !$omp end do !$omp end parallel - deallocate(X,Y,Z) + deallocate(K1tmp,X,Y,Z) end From b3b080929b38c152ac1bd868bad4dd311108e7e9 Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 11 Jul 2023 01:56:28 +0200 Subject: [PATCH 228/337] fixed stupid bug in TC 1-RDM and one-e gradient: o-v, o-a are ok --- src/casscf_tc_bi/grad_dm.irp.f | 28 ++++++++++++++++------------ src/casscf_tc_bi/grad_old.irp.f | 21 +++++++++++++++++---- src/tc_bi_ortho/tc_prop.irp.f | 8 +++++--- 3 files changed, 38 insertions(+), 19 deletions(-) diff --git a/src/casscf_tc_bi/grad_dm.irp.f b/src/casscf_tc_bi/grad_dm.irp.f index 0fc2e4eb..7f6155ab 100644 --- a/src/casscf_tc_bi/grad_dm.irp.f +++ b/src/casscf_tc_bi/grad_dm.irp.f @@ -11,8 +11,7 @@ do t=1,n_act_orb tt=list_act(t) indx = mat_idx_c_a(i,t) - call gradvec_tc_it(ii,tt,res_l) - call gradvec_tc_it(tt,ii,res_r) + call gradvec_tc_it(ii,tt,res_l,res_r) do fff = 0,3 gradvec_tc_l(fff,indx)=res_l(fff) gradvec_tc_r(fff,indx)=res_r(fff) @@ -56,23 +55,28 @@ subroutine gradvec_tc_ia(i,a,res) end -subroutine gradvec_tc_it(i,t,res) +subroutine gradvec_tc_it(i,t,res_l, res_r) implicit none BEGIN_DOC ! doubly occupied --> active TC gradient ! -! Corresponds to +! Corresponds to res_r = +! +! res_l = END_DOC integer, intent(in) :: i,t - double precision, intent(out) :: res(0:3) - integer :: rr,r,ss,s + double precision, intent(out) :: res_l(0:3),res_r(0:3) + integer :: rr,r,ss,s,m double precision :: dm - res = 0.d0 - res(1) = -2 * mo_bi_ortho_tc_one_e(i,t) - do rr = 1, n_act_orb - r = list_act(rr) - dm = tc_transition_matrix_mo(t,r,1,1) - res(1) += mo_bi_ortho_tc_one_e(i,r) * dm + res_r = 0.d0 + do m = 1, mo_num + res_r(1) += mo_bi_ortho_tc_one_e(i,m) * tc_transition_matrix_mo(t,m,1,1) & + -mo_bi_ortho_tc_one_e(m,t) * tc_transition_matrix_mo(m,i,1,1) + enddo + res_l = 0.d0 + do m = 1, mo_num + res_l(1) += mo_bi_ortho_tc_one_e(t,m) * tc_transition_matrix_mo(i,m,1,1) & + -mo_bi_ortho_tc_one_e(m,i) * tc_transition_matrix_mo(m,t,1,1) enddo end diff --git a/src/casscf_tc_bi/grad_old.irp.f b/src/casscf_tc_bi/grad_old.irp.f index 6610dee3..ea6747b1 100644 --- a/src/casscf_tc_bi/grad_old.irp.f +++ b/src/casscf_tc_bi/grad_old.irp.f @@ -25,6 +25,19 @@ enddo enddo enddo + + do ii = 1, n_core_inact_orb + ihole = list_core_inact(ii) + do tt = 1, n_act_orb + ipart = list_act(tt) + indx = mat_idx_c_a(ii,tt) + call calc_grad_elem_h_tc(ihole,ipart,res_l, res_r) + do ll = 0, 3 + gradvec_detail_left_old (ll,indx)=res_l(ll) + gradvec_detail_right_old(ll,indx)=res_r(ll) + enddo + enddo + enddo ! do indx=1,nMonoEx ! ihole=excit(1,indx) ! ipart=excit(2,indx) @@ -57,7 +70,7 @@ END_PROVIDER subroutine calc_grad_elem_h_tc(ihole,ipart,res_l, res_r) BEGIN_DOC ! eq 18 of Siegbahn et al, Physica Scripta 1980 - ! we calculate res_l = , and res_r = + ! we calculate res_r = , and res_r = ! q=hole, p=particle ! res_l(0) = total matrix element ! res_l(1) = one-electron part @@ -89,12 +102,12 @@ subroutine calc_grad_elem_h_tc(ihole,ipart,res_l, res_r) if (ierr.eq.1) then call i_H_tc_psi_phi(det_mu_ex,psi_det,psi_l_coef_bi_ortho,psi_r_coef_bi_ortho,N_int & - ,N_det,N_det,N_states,i_H_chi_array,i_H_phi_array) + ,N_det,psi_det_size,N_states,i_H_chi_array,i_H_phi_array) ! print*,i_H_chi_array(1,1),i_H_phi_array(1,1) do istate=1,N_states do ll = 0,3 - res_l(ll)+=i_H_chi_array(ll,istate)*psi_r_coef_bi_ortho(mu,istate)*phase - res_r(ll)+=i_H_phi_array(ll,istate)*psi_l_coef_bi_ortho(mu,istate)*phase + res_l(ll)+=i_H_phi_array(ll,istate)*psi_l_coef_bi_ortho(mu,istate)*phase + res_r(ll)+=i_H_chi_array(ll,istate)*psi_r_coef_bi_ortho(mu,istate)*phase enddo end do end if diff --git a/src/tc_bi_ortho/tc_prop.irp.f b/src/tc_bi_ortho/tc_prop.irp.f index 5bb0e2c0..a13dc9a2 100644 --- a/src/tc_bi_ortho/tc_prop.irp.f +++ b/src/tc_bi_ortho/tc_prop.irp.f @@ -29,7 +29,7 @@ tc_transition_matrix_mo_alpha(m,m,istate,jstate)+= psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate) enddo do p = 1, n_occ_ab(2) ! browsing the beta electrons - m = occ(p,1) + m = occ(p,2) tc_transition_matrix_mo_beta(m,m,istate,jstate)+= psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate) enddo else @@ -38,12 +38,14 @@ ! Single alpha h = exc(1,1,1) ! hole in psi_det(1,1,j) p = exc(1,2,1) ! particle in psi_det(1,1,j) - tc_transition_matrix_mo_alpha(p,h,istate,jstate)+= phase * psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate) + tc_transition_matrix_mo_alpha(p,h,istate,jstate)+= & + phase * psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate) else ! Single beta h = exc(1,1,2) ! hole in psi_det(1,1,j) p = exc(1,2,2) ! particle in psi_det(1,1,j) - tc_transition_matrix_mo_beta(p,h,istate,jstate)+= phase * psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate) + tc_transition_matrix_mo_beta(p,h,istate,jstate)+= & + phase * psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate) endif endif enddo From 64ee4eab75165e4fa283cdf03393c7e93d29f66c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 11 Jul 2023 15:13:01 +0200 Subject: [PATCH 229/337] Removed all vvv in CCSD --- src/ccsd/ccsd_space_orb_sub.irp.f | 4 +- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 43 ++-- src/utils_cc/mo_integrals_cc.irp.f | 323 ++++++++++++++++++++++++- 3 files changed, 335 insertions(+), 35 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 35e14313..e7b115bb 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -92,7 +92,7 @@ subroutine run_ccsd_space_orb call compute_H_vv_chol(nO,nV,tau_x,H_vv) call compute_H_vo_chol(nO,nV,t1,H_vo) - call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + call compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) call compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) else call compute_H_oo(nO,nV,t1,t2,tau,H_oo) @@ -588,8 +588,6 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) deallocate(W_vvov,T_vvoo) - - ! r1(u,beta) = r1(u,beta) - (2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)) * tau(i,j,a,beta) ! r1(u,beta) = r1(u,beta) - W(i,j,a,u) * tau(i,j,a,beta) !do beta = 1, nV diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index b804792f..99a4e426 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -186,14 +186,13 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) deallocate(X_ovov) integer :: iblock, block_size, nVmax - double precision, allocatable :: W_vvov(:,:,:,:), T_vvoo(:,:,:,:) - block_size = 8 - allocate(W_vvov(nV,nV,nO,block_size), T_vvoo(nV,nV,nO,nO)) + double precision, allocatable :: W_vvov(:,:,:,:), W_vvov_tmp(:,:,:,:), T_vvoo(:,:,:,:) + block_size = 16 + allocate(W_vvov(nV,nV,nO,block_size), W_vvov_tmp(nV,nO,nV,block_size), T_vvoo(nV,nV,nO,nO)) !$omp parallel & - !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau) & - !$omp private(b,beta,i,a) & - !$omp default(none) + !$omp private(u,i,b,a) & + !$omp default(shared) !$omp do do u = 1, nO do i = 1, nO @@ -204,26 +203,32 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) enddo enddo enddo - !$omp end do nowait + !$omp end do !$omp end parallel do iblock = 1, nV, block_size nVmax = min(block_size,nV-iblock+1) + + call dgemm('T','N', nV*nO, nV*nVmax, cholesky_ao_num, 1.d0, & + cc_space_v_vo_chol , cholesky_ao_num, & + cc_space_v_vv_chol(1,1,iblock), cholesky_ao_num, & + 0.d0, W_vvov_tmp, nV*nO) + !$omp parallel & - !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau,nVmax,iblock) & !$omp private(b,i,a,beta) & - !$omp default(none) - !$omp do collapse(2) - do beta = iblock, iblock + nVmax - 1 + !$omp default(shared) + do beta = 1, nVmax do i = 1, nO + !$omp do do b = 1, nV do a = 1, nV - W_vvov(a,b,i,beta-iblock+1) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) + W_vvov(a,b,i,beta) = 2d0 * W_vvov_tmp(a,i,b,beta) - W_vvov_tmp(b,i,a,beta) enddo enddo + !$omp end do nowait enddo enddo - !$omp end do nowait + !$omp barrier !$omp end parallel call dgemm('T','N',nO,nVmax,nO*nV*nV, & @@ -234,6 +239,7 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) deallocate(W_vvov,T_vvoo) + double precision, allocatable :: W_oovo(:,:,:,:) allocate(W_oovo(nO,nO,nV,nO)) @@ -462,7 +468,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) call compute_J1_chol(nO,nV,t1,t2,cc_space_v_ovvo,cc_space_v_ovoo, & cc_space_v_vvoo,J1) call compute_K1_chol(nO,nV,t1,t2,cc_space_v_ovoo,cc_space_v_vvoo, & - cc_space_v_ovov,cc_space_v_vvov,K1) + cc_space_v_ovov,K1) ! Residual !r2 = 0d0 @@ -1346,7 +1352,7 @@ end ! K1 -subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) +subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,K1) implicit none @@ -1354,7 +1360,7 @@ subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) double precision, intent(in) :: t1(nO, nV) double precision, intent(in) :: t2(nO, nO, nV, nV) double precision, intent(in) :: v_vvoo(nV,nV,nO,nO), v_ovov(nO,nV,nO,nV) - double precision, intent(in) :: v_vvov(nV,nV,nO,nV), v_ovoo(nO,nV,nO,nO) + double precision, intent(in) :: v_ovoo(nO,nV,nO,nO) double precision, intent(out) :: K1(nO, nV, nO, nV) double precision, allocatable :: X(:,:,:,:), Y(:,:,:,:), Z(:,:,:,:) @@ -1412,11 +1418,6 @@ subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,v_vvov,K1) double precision, allocatable :: K1tmp(:,:,:,:), t1v(:,:,:) allocate(K1tmp(nO,nO,nV,nV), t1v(cholesky_ao_num,nO,nO)) -! call dgemm('N','N',nO,nV*nO*nV,nV, & -! 1d0, t1 , size(t1,1), & -! v_vvov, size(v_vvov,1), & -! 1d0, K1 , size(K1,1)) - call dgemm('N','T', cholesky_ao_num*nO, nO, nV, 1.d0, & cc_space_v_ov_chol, cholesky_ao_num*nO, t1, nO, 0.d0, & t1v, cholesky_ao_num*nO) diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f index 62237229..a68ab8de 100644 --- a/src/utils_cc/mo_integrals_cc.irp.f +++ b/src/utils_cc/mo_integrals_cc.irp.f @@ -190,7 +190,40 @@ BEGIN_PROVIDER [double precision, cc_space_v_oooo, (cc_nOa, cc_nOa, cc_nOa, cc_n implicit none - call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_oooo) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_oooo,1) + n2 = size(cc_space_v_oooo,2) + n3 = size(cc_space_v_oooo,3) + n4 = size(cc_space_v_oooo,4) + + double precision, allocatable :: buffer(:,:,:,:) + allocate(buffer(n1,n3,n2,n4)) + + call dgemm('T','N', n1*n3, n2*n4, cholesky_ao_num, 1.d0, & + cc_space_v_oo_chol, cholesky_ao_num, & + cc_space_v_oo_chol, cholesky_ao_num, 0.d0, buffer, n1*n3) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_oooo(i1,i2,i3,i4) = buffer(i1,i3,i2,i4) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(buffer) + + else + call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_oooo) + endif END_PROVIDER @@ -200,7 +233,40 @@ BEGIN_PROVIDER [double precision, cc_space_v_vooo, (cc_nVa, cc_nOa, cc_nOa, cc_n implicit none - call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_vooo) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_vooo,1) + n2 = size(cc_space_v_vooo,2) + n3 = size(cc_space_v_vooo,3) + n4 = size(cc_space_v_vooo,4) + + double precision, allocatable :: buffer(:,:,:,:) + allocate(buffer(n1,n3,n2,n4)) + + call dgemm('T','N', n1*n3, n2*n4, cholesky_ao_num, 1.d0, & + cc_space_v_vo_chol, cholesky_ao_num, & + cc_space_v_oo_chol, cholesky_ao_num, 0.d0, buffer, n1*n3) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_vooo(i1,i2,i3,i4) = buffer(i1,i3,i2,i4) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(buffer) + + else + call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_vooo) + endif END_PROVIDER @@ -210,7 +276,32 @@ BEGIN_PROVIDER [double precision, cc_space_v_ovoo, (cc_nOa, cc_nVa, cc_nOa, cc_n implicit none - call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_ovoo) + + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_ovoo,1) + n2 = size(cc_space_v_ovoo,2) + n3 = size(cc_space_v_ovoo,3) + n4 = size(cc_space_v_ovoo,4) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_ovoo(i1,i2,i3,i4) = cc_space_v_vooo(i2,i1,i4,i3) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_ovoo) + endif END_PROVIDER @@ -220,7 +311,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_oovo, (cc_nOa, cc_nOa, cc_nVa, cc_n implicit none - call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_oovo) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_oovo,1) + n2 = size(cc_space_v_oovo,2) + n3 = size(cc_space_v_oovo,3) + n4 = size(cc_space_v_oovo,4) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_oovo(i1,i2,i3,i4) = cc_space_v_vooo(i3,i2,i1,i4) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_oovo) + endif END_PROVIDER @@ -230,7 +345,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_ooov, (cc_nOa, cc_nOa, cc_nOa, cc_n implicit none - call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_ooov) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_oovo,1) + n2 = size(cc_space_v_oovo,2) + n3 = size(cc_space_v_oovo,3) + n4 = size(cc_space_v_oovo,4) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_ooov(i1,i2,i3,i4) = cc_space_v_ovoo(i1,i4,i3,i2) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_ooov) + endif END_PROVIDER @@ -240,7 +379,40 @@ BEGIN_PROVIDER [double precision, cc_space_v_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_n implicit none - call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_vvoo) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_vvoo,1) + n2 = size(cc_space_v_vvoo,2) + n3 = size(cc_space_v_vvoo,3) + n4 = size(cc_space_v_vvoo,4) + + double precision, allocatable :: buffer(:,:,:,:) + allocate(buffer(n1,n3,n2,n4)) + + call dgemm('T','N', n1*n3, n2*n4, cholesky_ao_num, 1.d0, & + cc_space_v_vo_chol, cholesky_ao_num, & + cc_space_v_vo_chol, cholesky_ao_num, 0.d0, buffer, n1*n3) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_vvoo(i1,i2,i3,i4) = buffer(i1,i3,i2,i4) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(buffer) + + else + call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_vvoo) + endif END_PROVIDER @@ -250,7 +422,40 @@ BEGIN_PROVIDER [double precision, cc_space_v_vovo, (cc_nVa, cc_nOa, cc_nVa, cc_n implicit none - call gen_v_space(cc_nVa,cc_nOa,cc_nVa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_vovo) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_vovo,1) + n2 = size(cc_space_v_vovo,2) + n3 = size(cc_space_v_vovo,3) + n4 = size(cc_space_v_vovo,4) + + double precision, allocatable :: buffer(:,:,:,:) + allocate(buffer(n1,n3,n2,n4)) + + call dgemm('T','N', n1*n3, n2*n4, cholesky_ao_num, 1.d0, & + cc_space_v_vv_chol, cholesky_ao_num, & + cc_space_v_oo_chol, cholesky_ao_num, 0.d0, buffer, n1*n3) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_vovo(i1,i2,i3,i4) = buffer(i1,i3,i2,i4) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(buffer) + + else + call gen_v_space(cc_nVa,cc_nOa,cc_nVa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_vovo) + endif END_PROVIDER @@ -260,7 +465,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_voov, (cc_nVa, cc_nOa, cc_nOa, cc_n implicit none - call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nVa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_voov) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_voov,1) + n2 = size(cc_space_v_voov,2) + n3 = size(cc_space_v_voov,3) + n4 = size(cc_space_v_voov,4) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_voov(i1,i2,i3,i4) = cc_space_v_vvoo(i1,i4,i3,i2) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nVa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_voov) + endif END_PROVIDER @@ -270,7 +499,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_ovvo, (cc_nOa, cc_nVa, cc_nVa, cc_n implicit none - call gen_v_space(cc_nOa,cc_nVa,cc_nVa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_vir,cc_list_occ, cc_space_v_ovvo) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_ovvo,1) + n2 = size(cc_space_v_ovvo,2) + n3 = size(cc_space_v_ovvo,3) + n4 = size(cc_space_v_ovvo,4) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_ovvo(i1,i2,i3,i4) = cc_space_v_vvoo(i3,i2,i1,i4) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + call gen_v_space(cc_nOa,cc_nVa,cc_nVa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_vir,cc_list_occ, cc_space_v_ovvo) + endif END_PROVIDER @@ -280,7 +533,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_ovov, (cc_nOa, cc_nVa, cc_nOa, cc_n implicit none - call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nVa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_vir, cc_space_v_ovov) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_ovov,1) + n2 = size(cc_space_v_ovov,2) + n3 = size(cc_space_v_ovov,3) + n4 = size(cc_space_v_ovov,4) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_ovov(i1,i2,i3,i4) = cc_space_v_vovo(i2,i1,i4,i3) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nVa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_vir, cc_space_v_ovov) + endif END_PROVIDER @@ -290,7 +567,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_n implicit none - call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, cc_space_v_oovv) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_oovv,1) + n2 = size(cc_space_v_oovv,2) + n3 = size(cc_space_v_oovv,3) + n4 = size(cc_space_v_oovv,4) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_oovv(i1,i2,i3,i4) = cc_space_v_vvoo(i3,i4,i1,i2) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, cc_space_v_oovv) + endif END_PROVIDER From 8c65e01eedebcf164e16ea78097d35ee42ca0b7e Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 11 Jul 2023 17:31:58 +0200 Subject: [PATCH 230/337] I/O in Cholesky --- src/ao_two_e_ints/EZFIO.cfg | 6 + src/ao_two_e_ints/cholesky.irp.f | 836 +++++++++++++------------ src/ccsd/ccsd_space_orb_sub.irp.f | 35 +- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 154 +++-- 4 files changed, 568 insertions(+), 463 deletions(-) diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg index 9f523fca..9c017813 100644 --- a/src/ao_two_e_ints/EZFIO.cfg +++ b/src/ao_two_e_ints/EZFIO.cfg @@ -4,6 +4,12 @@ doc: Read/Write |AO| integrals from/to disk [ Write | Read | None ] interface: ezfio,provider,ocaml default: None +[io_ao_cholesky] +type: Disk_access +doc: Read/Write |AO| integrals from/to disk [ Write | Read | None ] +interface: ezfio,provider,ocaml +default: None + [ao_integrals_threshold] type: Threshold doc: If | (pq|rs) | < `ao_integrals_threshold` then (pq|rs) is zero diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 128aa483..8b969174 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -14,412 +14,438 @@ BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, END_PROVIDER -BEGIN_PROVIDER [ integer, cholesky_ao_num ] + BEGIN_PROVIDER [ integer, cholesky_ao_num ] &BEGIN_PROVIDER [ double precision, cholesky_ao, (ao_num, ao_num, 1) ] - implicit none - BEGIN_DOC - ! Cholesky vectors in AO basis: (ik|a): - ! = (ik|jl) = sum_a (ik|a).(a|jl) - ! - ! Last dimension of cholesky_ao is cholesky_ao_num - END_DOC - - integer :: rank, ndim - double precision :: tau - double precision, pointer :: L(:,:), L_old(:,:) - - - double precision :: s - double precision, parameter :: dscale = 1.d0 - - double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) - integer, allocatable :: Lset(:), Dset(:), addr(:,:), LDmap(:), DLmap(:) - integer, allocatable :: Lset_rev(:), Dset_rev(:) - - integer :: i,j,k,m,p,q, qj, dj, p2, q2 - integer :: N, np, nq - - double precision :: Dmax, Dmin, Qmax, f - double precision, external :: get_ao_two_e_integral - logical, external :: ao_two_e_integral_zero - - double precision, external :: ao_two_e_integral - integer :: block_size, iblock, ierr - - integer(omp_lock_kind), allocatable :: lock(:) - - double precision :: rss - double precision, external :: memory_of_double, memory_of_int - - - PROVIDE nucl_coord - - if (.not.do_direct_integrals) then - PROVIDE ao_two_e_integrals_in_map - endif - deallocate(cholesky_ao) - - ndim = ao_num*ao_num - tau = ao_cholesky_threshold - - rss = 6.d0 * memory_of_double(ndim) + & - 6.d0 * memory_of_int(ndim) - call check_mem(rss, irp_here) - - allocate(L(ndim,1)) - - print *, '' - print *, 'Cholesky decomposition of AO integrals' - print *, '======================================' - print *, '' - print *, '============ =============' - print *, ' Rank Threshold' - print *, '============ =============' - - - rank = 0 - - allocate( D(ndim), Lset(ndim), LDmap(ndim), DLmap(ndim), Dset(ndim) ) - allocate( Lset_rev(ndim), Dset_rev(ndim), lock(ndim) ) - allocate( addr(3,ndim) ) - do k=1,ndim - call omp_init_lock(lock(k)) - enddo - - ! 1. - k=0 - do j=1,ao_num - do i=1,ao_num - k = k+1 - addr(1,k) = i - addr(2,k) = j - addr(3,k) = (i-1)*ao_num + j - enddo - enddo - - if (do_direct_integrals) then - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) - do i=1,ndim - D(i) = ao_two_e_integral(addr(1,i), addr(2,i), & - addr(1,i), addr(2,i)) - enddo - !$OMP END PARALLEL DO - else - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(guided) - do i=1,ndim - D(i) = get_ao_two_e_integral(addr(1,i), addr(1,i), & - addr(2,i), addr(2,i), & - ao_integrals_map) - enddo - !$OMP END PARALLEL DO - endif - - Dmax = maxval(D) - - ! 2. - np=0 - Lset_rev = 0 - do p=1,ndim - if ( dscale*dscale*Dmax*D(p) > tau*tau ) then - np = np+1 - Lset(np) = p - Lset_rev(p) = np - endif - enddo - - ! 3. - N = 0 - - ! 4. - i = 0 - - ! 5. - do while ( (Dmax > tau).and.(rank < ndim) ) - ! a. - i = i+1 - - s = 0.1d0 - - ! Inrease s until the arrays fit in memory - do - - ! b. - Dmin = max(s*Dmax,tau) - - ! c. - nq=0 - LDmap = 0 - DLmap = 0 - Dset_rev = 0 - do p=1,np - if ( D(Lset(p)) > Dmin ) then - nq = nq+1 - Dset(nq) = Lset(p) - Dset_rev(Dset(nq)) = nq - LDmap(p) = nq - DLmap(nq) = p - endif - enddo - - call resident_memory(rss) - rss = rss & - + np*memory_of_double(nq) & ! Delta(np,nq) - + (rank+nq)* memory_of_double(ndim) & ! L(ndim,rank+nq) - + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) - ! Ltmp_q(nq,block_size) - - if (rss > qp_max_mem) then - s = s*2.d0 - else - exit - endif - - if ((s > 1.d0).or.(nq == 0)) then - print *, 'Not enough memory. Reduce cholesky threshold' - stop -1 - endif - - enddo - - ! d., e. - block_size = max(N,24) - - L_old => L - allocate(L(ndim,rank+nq), stat=ierr) - if (ierr /= 0) then - print *, irp_here, ': allocation failed : (L(ndim,rank+nq))' - stop -1 - endif - - !$OMP PARALLEL DO PRIVATE(k) - do k=1,rank - L(:,k) = L_old(:,k) - enddo - !$OMP END PARALLEL DO - - deallocate(L_old) - - allocate(Delta(np,nq), stat=ierr) - if (ierr /= 0) then - print *, irp_here, ': allocation failed : (Delta(np,nq))' - stop -1 - endif - - allocate(Ltmp_p(np,block_size), stat=ierr) - if (ierr /= 0) then - print *, irp_here, ': allocation failed : (Ltmp_p(np,block_size))' - stop -1 - endif - - allocate(Ltmp_q(nq,block_size), stat=ierr) - if (ierr /= 0) then - print *, irp_here, ': allocation failed : (Ltmp_q(nq,block_size))' - stop -1 - endif - - - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q,j) - - !$OMP DO - do q=1,nq - Delta(:,q) = 0.d0 - enddo - !$OMP ENDDO NOWAIT - - !$OMP DO - do k=1,N - do p=1,np - Ltmp_p(p,k) = L(Lset(p),k) - enddo - do q=1,nq - Ltmp_q(q,k) = L(Dset(q),k) - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP BARRIER - - !$OMP DO SCHEDULE(guided) - do m=1,nq - - call omp_set_lock(lock(m)) - do k=1,np - ! Apply only to (k,m) pairs where k is not in Dset - if (LDmap(k) /= 0) cycle - q = Lset_rev(addr(3,Lset(k))) - if ((0 < q).and.(q < k)) cycle - if (.not.ao_two_e_integral_zero( addr(1,Lset(k)), addr(1,Dset(m)), & - addr(2,Lset(k)), addr(2,Dset(m)) ) ) then - if (do_direct_integrals) then - Delta(k,m) = ao_two_e_integral(addr(1,Lset(k)), addr(2,Lset(k)), & - addr(1,Dset(m)), addr(2,Dset(m))) - else - Delta(k,m) = get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)), & - addr(2,Lset(k)), addr(2,Dset(m)), ao_integrals_map) + implicit none + BEGIN_DOC + ! Cholesky vectors in AO basis: (ik|a): + ! = (ik|jl) = sum_a (ik|a).(a|jl) + ! + ! Last dimension of cholesky_ao is cholesky_ao_num + END_DOC + + integer :: rank, ndim + double precision :: tau + double precision, pointer :: L(:,:), L_old(:,:) + + + double precision :: s + double precision, parameter :: dscale = 1.d0 + + double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) + integer, allocatable :: Lset(:), Dset(:), addr(:,:), LDmap(:), DLmap(:) + integer, allocatable :: Lset_rev(:), Dset_rev(:) + + integer :: i,j,k,m,p,q, qj, dj, p2, q2 + integer :: N, np, nq + + double precision :: Dmax, Dmin, Qmax, f + double precision, external :: get_ao_two_e_integral + logical, external :: ao_two_e_integral_zero + + double precision, external :: ao_two_e_integral + integer :: block_size, iblock, ierr + + integer(omp_lock_kind), allocatable :: lock(:) + + double precision :: rss + double precision, external :: memory_of_double, memory_of_int + + integer, external :: getUnitAndOpen + integer :: iunit + + ndim = ao_num*ao_num + deallocate(cholesky_ao) + + if (read_ao_cholesky) then + print *, 'Reading Cholesky vectors from disk...' + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao', 'R') + read(iunit) rank + allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) + read(iunit) cholesky_ao + close(iunit) + cholesky_ao_num = rank + + else + + PROVIDE nucl_coord + + if (.not.do_direct_integrals) then + PROVIDE ao_two_e_integrals_in_map + endif + + tau = ao_cholesky_threshold + + rss = 6.d0 * memory_of_double(ndim) + & + 6.d0 * memory_of_int(ndim) + call check_mem(rss, irp_here) + + allocate(L(ndim,1)) + + print *, '' + print *, 'Cholesky decomposition of AO integrals' + print *, '======================================' + print *, '' + print *, '============ =============' + print *, ' Rank Threshold' + print *, '============ =============' + + + rank = 0 + + allocate( D(ndim), Lset(ndim), LDmap(ndim), DLmap(ndim), Dset(ndim) ) + allocate( Lset_rev(ndim), Dset_rev(ndim), lock(ndim) ) + allocate( addr(3,ndim) ) + do k=1,ndim + call omp_init_lock(lock(k)) + enddo + + ! 1. + k=0 + do j=1,ao_num + do i=1,ao_num + k = k+1 + addr(1,k) = i + addr(2,k) = j + addr(3,k) = (i-1)*ao_num + j + enddo + enddo + + if (do_direct_integrals) then + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) + do i=1,ndim + D(i) = ao_two_e_integral(addr(1,i), addr(2,i), & + addr(1,i), addr(2,i)) + enddo + !$OMP END PARALLEL DO + else + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(guided) + do i=1,ndim + D(i) = get_ao_two_e_integral(addr(1,i), addr(1,i), & + addr(2,i), addr(2,i), & + ao_integrals_map) + enddo + !$OMP END PARALLEL DO + endif + + Dmax = maxval(D) + + ! 2. + np=0 + Lset_rev = 0 + do p=1,ndim + if ( dscale*dscale*Dmax*D(p) > tau*tau ) then + np = np+1 + Lset(np) = p + Lset_rev(p) = np + endif + enddo + + ! 3. + N = 0 + + ! 4. + i = 0 + + ! 5. + do while ( (Dmax > tau).and.(rank < ndim) ) + ! a. + i = i+1 + + s = 0.1d0 + + ! Inrease s until the arrays fit in memory + do while (.True.) + + ! b. + Dmin = max(s*Dmax,tau) + + ! c. + nq=0 + LDmap = 0 + DLmap = 0 + Dset_rev = 0 + do p=1,np + if ( D(Lset(p)) > Dmin ) then + nq = nq+1 + Dset(nq) = Lset(p) + Dset_rev(Dset(nq)) = nq + LDmap(p) = nq + DLmap(nq) = p endif - if (q /= 0) Delta(q,m) = Delta(k,m) - endif - enddo - - j = Dset_rev(addr(3,Dset(m))) - if ((0 < j).and.(j < m)) then - call omp_unset_lock(lock(m)) - cycle - endif - - if ((j /= m).and.(j /= 0)) then - call omp_set_lock(lock(j)) - endif - do k=1,nq - ! Apply only to (k,m) pairs both in Dset - p = DLmap(k) - q = Lset_rev(addr(3,Dset(k))) - if ((0 < q).and.(q < p)) cycle - if (.not.ao_two_e_integral_zero( addr(1,Dset(k)), addr(1,Dset(m)), & - addr(2,Dset(k)), addr(2,Dset(m)) ) ) then - if (do_direct_integrals) then - Delta(p,m) = ao_two_e_integral(addr(1,Dset(k)), addr(2,Dset(k)), & - addr(1,Dset(m)), addr(2,Dset(m))) - else - Delta(p,m) = get_ao_two_e_integral( addr(1,Dset(k)), addr(1,Dset(m)), & - addr(2,Dset(k)), addr(2,Dset(m)), ao_integrals_map) - endif - if (q /= 0) Delta(q,m) = Delta(p,m) - if (j /= 0) Delta(p,j) = Delta(p,m) - if (q*j /= 0) Delta(q,j) = Delta(p,m) - endif - enddo - call omp_unset_lock(lock(m)) - if ((j /= m).and.(j /= 0)) then - call omp_unset_lock(lock(j)) - endif - enddo - !$OMP END DO - - !$OMP END PARALLEL - - if (N>0) then - call dgemm('N','T', np, nq, N, -1.d0, & - Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) - endif - - ! f. - Qmax = D(Dset(1)) - do q=1,nq - Qmax = max(Qmax, D(Dset(q))) - enddo - - ! g. - - iblock = 0 - do j=1,nq - - if ( (Qmax <= Dmin).or.(N+j > ndim) ) exit - ! i. - rank = N+j - - if (iblock == block_size) then - call dgemm('N','T',np,nq,block_size,-1.d0, & - Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) - iblock = 0 - endif - - ! ii. - do dj=1,nq - qj = Dset(dj) - if (D(qj) == Qmax) then - exit - endif - enddo - - L(1:ndim, rank) = 0.d0 - - iblock = iblock+1 - do p=1,np - Ltmp_p(p,iblock) = Delta(p,dj) - enddo - - ! iv. - if (iblock > 1) then - call dgemv('N', np, iblock-1, -1.d0, Ltmp_p, np, Ltmp_q(dj,1), nq, 1.d0, & - Ltmp_p(1,iblock), 1) - endif - - ! iii. - f = 1.d0/dsqrt(Qmax) - - !$OMP PARALLEL PRIVATE(m,p,q,k) DEFAULT(shared) - !$OMP DO - do p=1,np - Ltmp_p(p,iblock) = Ltmp_p(p,iblock) * f - L(Lset(p), rank) = Ltmp_p(p,iblock) - D(Lset(p)) = D(Lset(p)) - Ltmp_p(p,iblock) * Ltmp_p(p,iblock) - enddo - !$OMP END DO - - !$OMP DO - do q=1,nq - Ltmp_q(q,iblock) = L(Dset(q), rank) - enddo - !$OMP END DO - - !$OMP END PARALLEL - - Qmax = D(Dset(1)) - do q=1,nq - Qmax = max(Qmax, D(Dset(q))) - enddo - - enddo - - print '(I10, 4X, ES12.3)', rank, Qmax - - deallocate(Delta, stat=ierr) - deallocate(Ltmp_p, stat=ierr) - deallocate(Ltmp_q, stat=ierr) - - ! i. - N = rank - - ! j. - Dmax = D(Lset(1)) - do p=1,np - Dmax = max(Dmax, D(Lset(p))) - enddo - - np=0 - Lset_rev = 0 - do p=1,ndim - if ( dscale*dscale*Dmax*D(p) > tau*tau ) then - np = np+1 - Lset(np) = p - Lset_rev(p) = np - endif - enddo - - enddo - - do k=1,ndim - call omp_destroy_lock(lock(k)) - enddo - - allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) - if (ierr /= 0) then - print *, irp_here, ': Allocation failed' - stop -1 - endif - !$OMP PARALLEL DO PRIVATE(k) - do k=1,rank - call dcopy(ndim, L(1,k), 1, cholesky_ao(1,1,k), 1) - enddo - !$OMP END PARALLEL DO - deallocate(L) - cholesky_ao_num = rank - - print *, '============ =============' - print *, '' - print *, 'Rank : ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' - print *, '' + enddo + + call resident_memory(rss) + rss = rss & + + np*memory_of_double(nq) &! Delta(np,nq) + + (rank+nq)* memory_of_double(ndim) &! L(ndim,rank+nq) + + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + ! Ltmp_q(nq,block_size) + + if (rss > qp_max_mem) then + s = s*2.d0 + else + exit + endif + + if ((s > 1.d0).or.(nq == 0)) then + print *, 'Not enough memory. Reduce cholesky threshold' + stop -1 + endif + + enddo + + ! d., e. + block_size = max(N,24) + + L_old => L + allocate(L(ndim,rank+nq), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': allocation failed : (L(ndim,rank+nq))' + stop -1 + endif + + !$OMP PARALLEL DO PRIVATE(k) + do k=1,rank + L(:,k) = L_old(:,k) + enddo + !$OMP END PARALLEL DO + + deallocate(L_old) + + allocate(Delta(np,nq), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': allocation failed : (Delta(np,nq))' + stop -1 + endif + + allocate(Ltmp_p(np,block_size), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': allocation failed : (Ltmp_p(np,block_size))' + stop -1 + endif + + allocate(Ltmp_q(nq,block_size), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': allocation failed : (Ltmp_q(nq,block_size))' + stop -1 + endif + + + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q,j) + + !$OMP DO + do q=1,nq + Delta(:,q) = 0.d0 + enddo + !$OMP ENDDO NOWAIT + + !$OMP DO + do k=1,N + do p=1,np + Ltmp_p(p,k) = L(Lset(p),k) + enddo + do q=1,nq + Ltmp_q(q,k) = L(Dset(q),k) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP BARRIER + + !$OMP DO SCHEDULE(guided) + do m=1,nq + + call omp_set_lock(lock(m)) + do k=1,np + ! Apply only to (k,m) pairs where k is not in Dset + if (LDmap(k) /= 0) cycle + q = Lset_rev(addr(3,Lset(k))) + if ((0 < q).and.(q < k)) cycle + if (.not.ao_two_e_integral_zero( addr(1,Lset(k)), addr(1,Dset(m)),& + addr(2,Lset(k)), addr(2,Dset(m)) ) ) then + if (do_direct_integrals) then + Delta(k,m) = ao_two_e_integral(addr(1,Lset(k)), addr(2,Lset(k)),& + addr(1,Dset(m)), addr(2,Dset(m))) + else + Delta(k,m) = get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)),& + addr(2,Lset(k)), addr(2,Dset(m)), ao_integrals_map) + endif + if (q /= 0) Delta(q,m) = Delta(k,m) + endif + enddo + + j = Dset_rev(addr(3,Dset(m))) + if ((0 < j).and.(j < m)) then + call omp_unset_lock(lock(m)) + cycle + endif + + if ((j /= m).and.(j /= 0)) then + call omp_set_lock(lock(j)) + endif + do k=1,nq + ! Apply only to (k,m) pairs both in Dset + p = DLmap(k) + q = Lset_rev(addr(3,Dset(k))) + if ((0 < q).and.(q < p)) cycle + if (.not.ao_two_e_integral_zero( addr(1,Dset(k)), addr(1,Dset(m)),& + addr(2,Dset(k)), addr(2,Dset(m)) ) ) then + if (do_direct_integrals) then + Delta(p,m) = ao_two_e_integral(addr(1,Dset(k)), addr(2,Dset(k)),& + addr(1,Dset(m)), addr(2,Dset(m))) + else + Delta(p,m) = get_ao_two_e_integral( addr(1,Dset(k)), addr(1,Dset(m)),& + addr(2,Dset(k)), addr(2,Dset(m)), ao_integrals_map) + endif + if (q /= 0) Delta(q,m) = Delta(p,m) + if (j /= 0) Delta(p,j) = Delta(p,m) + if (q*j /= 0) Delta(q,j) = Delta(p,m) + endif + enddo + call omp_unset_lock(lock(m)) + if ((j /= m).and.(j /= 0)) then + call omp_unset_lock(lock(j)) + endif + enddo + !$OMP END DO + + !$OMP END PARALLEL + + if (N>0) then + call dgemm('N','T', np, nq, N, -1.d0, & + Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + endif + + ! f. + Qmax = D(Dset(1)) + do q=1,nq + Qmax = max(Qmax, D(Dset(q))) + enddo + + ! g. + + iblock = 0 + do j=1,nq + + if ( (Qmax <= Dmin).or.(N+j > ndim) ) exit + ! i. + rank = N+j + + if (iblock == block_size) then + call dgemm('N','T',np,nq,block_size,-1.d0, & + Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + iblock = 0 + endif + + ! ii. + do dj=1,nq + qj = Dset(dj) + if (D(qj) == Qmax) then + exit + endif + enddo + + L(1:ndim, rank) = 0.d0 + + iblock = iblock+1 + do p=1,np + Ltmp_p(p,iblock) = Delta(p,dj) + enddo + + ! iv. + if (iblock > 1) then + call dgemv('N', np, iblock-1, -1.d0, Ltmp_p, np, Ltmp_q(dj,1), nq, 1.d0,& + Ltmp_p(1,iblock), 1) + endif + + ! iii. + f = 1.d0/dsqrt(Qmax) + + !$OMP PARALLEL PRIVATE(m,p,q,k) DEFAULT(shared) + !$OMP DO + do p=1,np + Ltmp_p(p,iblock) = Ltmp_p(p,iblock) * f + L(Lset(p), rank) = Ltmp_p(p,iblock) + D(Lset(p)) = D(Lset(p)) - Ltmp_p(p,iblock) * Ltmp_p(p,iblock) + enddo + !$OMP END DO + + !$OMP DO + do q=1,nq + Ltmp_q(q,iblock) = L(Dset(q), rank) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + Qmax = D(Dset(1)) + do q=1,nq + Qmax = max(Qmax, D(Dset(q))) + enddo + + enddo + + print '(I10, 4X, ES12.3)', rank, Qmax + + deallocate(Delta, stat=ierr) + deallocate(Ltmp_p, stat=ierr) + deallocate(Ltmp_q, stat=ierr) + + ! i. + N = rank + + ! j. + Dmax = D(Lset(1)) + do p=1,np + Dmax = max(Dmax, D(Lset(p))) + enddo + + np=0 + Lset_rev = 0 + do p=1,ndim + if ( dscale*dscale*Dmax*D(p) > tau*tau ) then + np = np+1 + Lset(np) = p + Lset_rev(p) = np + endif + enddo + + enddo + + do k=1,ndim + call omp_destroy_lock(lock(k)) + enddo + + allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': Allocation failed' + stop -1 + endif + !$OMP PARALLEL DO PRIVATE(k) + do k=1,rank + call dcopy(ndim, L(1,k), 1, cholesky_ao(1,1,k), 1) + enddo + !$OMP END PARALLEL DO + deallocate(L) + cholesky_ao_num = rank + + print *, '============ =============' + print *, '' + + if (write_ao_cholesky) then + print *, 'Writing Cholesky vectors to disk...' + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao', 'W') + write(iunit) rank + write(iunit) cholesky_ao + close(iunit) + call ezfio_set_ao_two_e_ints_io_ao_cholesky('Read') + endif + endif + + print *, 'Rank : ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' + print *, '' + END_PROVIDER - + diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index e7b115bb..f97514cd 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -49,9 +49,34 @@ subroutine run_ccsd_space_orb allocate(H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO)) if (cc_update_method == 'diis') then - allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth)) - all_err = 0d0 - all_t = 0d0 + double precision :: rss, diis_mem, extra_mem + double precision, external :: memory_of_double + diis_mem = 2.d0*memory_of_double(nO*nV)*(1.d0+nO*nV) + call resident_memory(rss) + do while (cc_diis_depth > 1) + if (rss + diis_mem * cc_diis_depth > qp_max_mem) then + cc_diis_depth = cc_diis_depth - 1 + else + exit + endif + end do + if (cc_diis_depth <= 1) then + print *, 'Not enough memory for DIIS' + stop -1 + endif + print *, 'DIIS size ', cc_diis_depth + + allocate(all_err(nO*nV+nO*nO*nV*(nV*1_8),cc_diis_depth), all_t(nO*nV+nO*nO*nV*(nV*1_8),cc_diis_depth)) + !$OMP PARALLEL PRIVATE(i,j) DEFAULT(SHARED) + do j=1,cc_diis_depth + !$OMP DO + do i=1, size(all_err,1) + all_err(i,j) = 0d0 + all_t(i,j) = 0d0 + enddo + !$OMP END DO NOWAIT + enddo + !$OMP END PARALLEL endif if (elec_alpha_num /= elec_beta_num) then @@ -1427,7 +1452,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !enddo !$omp parallel & - !$omp shared(nO,nV,K1,X_ovov,Z_ovov,t2) & + !$omp shared(nO,nV,K1,X_ovov,Y_ovov,t2) & !$omp private(u,v,gam,beta,i,a) & !$omp default(none) !$omp do @@ -1447,7 +1472,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) do v = 1, nO do a = 1, nV do i = 1, nO - Z_ovov(i,a,v,beta) = t2(i,v,beta,a) + Y_ovov(i,a,v,beta) = t2(i,v,beta,a) enddo enddo enddo diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index 99a4e426..1c56996e 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -454,21 +454,8 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) double precision, intent(out) :: r2(nO,nO,nV,nV), max_r2 ! internal - double precision, allocatable :: g_occ(:,:), g_vir(:,:), J1(:,:,:,:), K1(:,:,:,:) - double precision, allocatable :: A1(:,:,:,:) integer :: u,v,i,j,beta,gam,a,b - - allocate(g_occ(nO,nO), g_vir(nV,nV)) - allocate(J1(nO,nV,nV,nO), K1(nO,nV,nO,nV)) - allocate(A1(nO,nO,nO,nO)) - - call compute_g_occ_chol(nO,nV,t1,t2,H_oo,g_occ) - call compute_g_vir_chol(nO,nV,t1,t2,H_vv,g_vir) - call compute_A1_chol(nO,nV,t1,t2,tau,A1) - call compute_J1_chol(nO,nV,t1,t2,cc_space_v_ovvo,cc_space_v_ovoo, & - cc_space_v_vvoo,J1) - call compute_K1_chol(nO,nV,t1,t2,cc_space_v_ovoo,cc_space_v_vvoo, & - cc_space_v_ovov,K1) + double precision :: max_r2_local ! Residual !r2 = 0d0 @@ -490,36 +477,47 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end do !$omp end parallel + double precision, allocatable :: A1(:,:,:,:) + allocate(A1(nO,nO,nO,nO)) + call compute_A1_chol(nO,nV,t1,t2,tau,A1) call dgemm('N','N',nO*nO,nV*nV,nO*nO, & 1d0, A1, size(A1,1) * size(A1,2), & tau, size(tau,1) * size(tau,2), & 1d0, r2, size(r2,1) * size(r2,2)) + deallocate(A1) integer :: block_size, iblock, k block_size = 16 double precision, dimension(:,:,:), allocatable :: B1, tmp_cc, tmpB1 + double precision, dimension(:,:), allocatable :: tmp_cc2 allocate(tmp_cc(cholesky_ao_num,nV,nV)) call dgemm('N','N', cholesky_ao_num*nV, nV, nO, 1.d0, & cc_space_v_vo_chol, cholesky_ao_num*nV, t1, nO, 0.d0, tmp_cc, cholesky_ao_num*nV) - !$OMP PARALLEL PRIVATE(gam, iblock, B1, tmpB1, beta, b, a) - allocate(B1(nV,nV,block_size), tmpB1(nV,block_size,nV)) + call set_multiple_levels_omp(.False.) + + !$OMP PARALLEL PRIVATE(gam, iblock, B1, tmpB1, tmp_cc2, beta, b, a) + allocate(B1(nV,nV,block_size), tmpB1(nV,block_size,nV), tmp_cc2(cholesky_ao_num,nV)) !$OMP DO do gam = 1, nV do iblock = 1, nV, block_size - call dgemm('T', 'N', nV*min(block_size, nV-iblock+1), nV, cholesky_ao_num, & - -1.d0, cc_space_v_vv_chol(1,1,iblock), cholesky_ao_num, & - tmp_cc(1,1,gam), cholesky_ao_num, 0.d0, tmpB1, nV*block_size) call dgemm('T', 'N', nV*min(block_size, nV-iblock+1), nV, cholesky_ao_num, & -1.d0, tmp_cc(1,1,iblock), cholesky_ao_num, & - cc_space_v_vv_chol(1,1,gam), cholesky_ao_num, 1.d0, tmpB1, nV*block_size) + cc_space_v_vv_chol(1,1,gam), cholesky_ao_num, & + 0.d0, tmpB1, nV*block_size) + + do a=1,nV + do k=1,cholesky_ao_num + tmp_cc2(k,a) = cc_space_v_vv_chol(k,a,gam) - tmp_cc(k,a,gam) + enddo + enddo call dgemm('T','N', nV*min(block_size, nV-iblock+1), nV, cholesky_ao_num, 1.d0, & cc_space_v_vv_chol(1,1,iblock), cholesky_ao_num, & - cc_space_v_vv_chol(1,1,gam), cholesky_ao_num, 1.d0, & - tmpB1, nV*block_size) + tmp_cc2, cholesky_ao_num, & + 1.d0, tmpB1, nV*block_size) do beta = iblock, min(nV, iblock+block_size-1) do b = 1, nV @@ -538,15 +536,14 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$OMP ENDDO - deallocate(B1, tmpB1) + deallocate(B1, tmpB1, tmp_cc2) !$OMP END PARALLEL deallocate(tmp_cc) - double precision, allocatable :: X_oovv(:,:,:,:),Y_oovv(:,:,:,:) - allocate(X_oovv(nO,nO,nV,nV),Y_oovv(nO,nO,nV,nV)) - + double precision, allocatable :: X_oovv(:,:,:,:) + allocate(X_oovv(nO,nO,nV,nV)) !$omp parallel & !$omp shared(nO,nV,t2,X_oovv) & !$omp private(u,v,gam,a) & @@ -564,10 +561,19 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end do !$omp end parallel + double precision, allocatable :: g_vir(:,:) + allocate(g_vir(nV,nV)) + call compute_g_vir_chol(nO,nV,t1,t2,H_vv,g_vir) + + double precision, allocatable :: Y_oovv(:,:,:,:) + allocate(Y_oovv(nO,nO,nV,nV)) + call dgemm('N','N',nO*nO*nV,nV,nV, & 1d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), & g_vir, size(g_vir,1), & 0d0, Y_oovv, size(Y_oovv,1) * size(Y_oovv,2) * size(Y_oovv,3)) + deallocate(g_vir) + deallocate(X_oovv) !$omp parallel & !$omp shared(nO,nV,r2,Y_oovv) & @@ -585,11 +591,18 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel + deallocate(Y_oovv) + double precision, allocatable :: g_occ(:,:) + allocate(g_occ(nO,nO)) + call compute_g_occ_chol(nO,nV,t1,t2,H_oo,g_occ) + + allocate(X_oovv(nO,nO,nV,nV)) call dgemm('N','N',nO,nO*nV*nV,nO, & 1d0, g_occ , size(g_occ,1), & t2 , size(t2,1), & 0d0, X_oovv, size(X_oovv,1)) + deallocate(g_occ) !$omp parallel & !$omp shared(nO,nV,r2,X_oovv) & @@ -613,6 +626,8 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) double precision, allocatable :: X_vovv(:,:,:,:) allocate(X_vovv(nV,nO,nV,block_size)) + allocate(Y_oovv(nO,nO,nV,nV)) + do iblock = 1, nV, block_size do gam = iblock, min(nV, iblock+block_size-1) call dgemm('T','N',nV, nO*nV, cholesky_ao_num, 1.d0, & @@ -626,6 +641,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) 0d0, Y_oovv(1,1,1,iblock), size(Y_oovv,1)) enddo + deallocate(X_vovv) !$omp parallel & !$omp shared(nO,nV,r2,Y_oovv) & @@ -643,6 +659,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel + deallocate(Y_oovv) double precision, allocatable :: X_ovvo(:,:,:,:) double precision, allocatable :: tcc(:,:,:), tcc2(:,:,:) @@ -693,6 +710,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) deallocate(X_ovvo) !----- + allocate(X_oovv(nO,nO,nV,nV)) call dgemm('N','N',nO*nO*nV,nV,nO, & @@ -716,9 +734,10 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel + deallocate(X_oovv) double precision, allocatable :: X_vovo(:,:,:,:), Y_oovo(:,:,:,:) - allocate(X_vovo(nV,nO,nV,nO), Y_oovo(nO,nO,nV,nO)) + allocate(X_vovo(nV,nO,nV,nO)) !$omp parallel & !$omp shared(nO,nV,X_vovo,cc_space_v_ovvo) & @@ -737,15 +756,19 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end parallel + allocate(Y_oovo(nO,nO,nV,nO)) call dgemm('N','N',nO,nO*nV*nO,nV, & 1d0, t1, size(t1,1), & X_vovo, size(X_vovo,1), & 0d0, Y_oovo, size(Y_oovo,1)) + deallocate(X_vovo) + allocate(X_oovv(nO,nO,nV,nV)) call dgemm('N','N',nO*nO*nV, nV, nO, & 1d0, Y_oovo, size(Y_oovo,1) * size(Y_oovo,2) * size(Y_oovo,3), & t1 , size(t1,1), & 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + deallocate(Y_oovo) !$omp parallel & !$omp shared(nO,nV,r2,X_oovv) & @@ -763,15 +786,23 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel + deallocate(X_oovv) - deallocate(X_vovo,Y_oovo) - double precision, allocatable :: Y_voov(:,:,:,:), Z_ovov(:,:,:,:) - allocate(X_ovvo(nO,nV,nV,nO), Y_voov(nV,nO,nO,nV),Z_ovov(nO,nV,nO,nV)) + double precision, allocatable :: J1(:,:,:,:) + allocate(J1(nO,nV,nV,nO)) + call compute_J1_chol(nO,nV,t1,t2,cc_space_v_ovvo,cc_space_v_ovoo, & + cc_space_v_vvoo,J1) + + double precision, allocatable :: K1(:,:,:,:) + allocate(K1(nO,nV,nO,nV)) + call compute_K1_chol(nO,nV,t1,t2,cc_space_v_ovoo,cc_space_v_vvoo, & + cc_space_v_ovov,K1) + + allocate(X_ovvo(nO,nV,nV,nO)) !$omp parallel & - !$omp shared(nO,nV,X_ovvo,Y_voov,K1,J1,t2) & !$omp private(u,v,gam,beta,i,a) & - !$omp default(none) + !$omp default(shared) do i = 1, nO !$omp do do a = 1, nV @@ -783,7 +814,15 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do nowait enddo + !$omp end parallel + deallocate(J1) + double precision, allocatable :: Y_voov(:,:,:,:) + allocate(Y_voov(nV,nO,nO,nV)) + + !$omp parallel & + !$omp private(u,v,gam,beta,i,a) & + !$omp default(shared) !$omp do do gam = 1, nV do v = 1, nO @@ -797,11 +836,16 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end do !$omp end parallel + double precision, allocatable :: Z_ovov(:,:,:,:) + allocate(Z_ovov(nO,nV,nO,nV)) + call dgemm('N','N', nO*nV,nO*nV,nV*nO, & 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & Y_voov, size(Y_voov,1) * size(Y_voov,2), & 0d0, Z_ovov, size(Z_ovov,1) * size(Z_ovov,2)) + deallocate(X_ovvo,Y_voov) + !$omp parallel & !$omp shared(nO,nV,r2,Z_ovov) & !$omp private(u,v,gam,beta) & @@ -819,10 +863,11 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end do !$omp end parallel - deallocate(X_ovvo,Y_voov) + deallocate(Z_ovov) double precision, allocatable :: Y_ovov(:,:,:,:), X_ovov(:,:,:,:) - allocate(X_ovov(nO,nV,nO,nV),Y_ovov(nO,nV,nO,nV)) + allocate(X_ovov(nO,nV,nO,nV)) + allocate(Y_ovov(nO,nV,nO,nV)) !$omp parallel & !$omp shared(nO,nV,r2,K1,X_ovov,Y_ovov,t2) & @@ -853,10 +898,12 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end do !$omp end parallel + allocate(Z_ovov(nO,nV,nO,nV)) call dgemm('T','N',nO*nV,nO*nV,nO*nV, & 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) + deallocate(X_ovov, Y_ovov) !$omp parallel & !$omp shared(nO,nV,r2,Z_ovov) & @@ -874,9 +921,11 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) enddo !$omp end do !$omp end parallel + deallocate(Z_ovov) + allocate(X_ovov(nO,nV,nO,nV),Y_ovov(nO,nV,nO,nV)) !$omp parallel & - !$omp shared(nO,nV,K1,X_ovov,Z_ovov,t2) & + !$omp shared(nO,nV,K1,X_ovov,Y_ovov,t2) & !$omp private(u,v,gam,beta,i,a) & !$omp default(none) !$omp do @@ -896,7 +945,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) do v = 1, nO do a = 1, nV do i = 1, nO - Z_ovov(i,a,v,beta) = t2(i,v,beta,a) + Y_ovov(i,a,v,beta) = t2(i,v,beta,a) enddo enddo enddo @@ -904,11 +953,16 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end do !$omp end parallel + deallocate(K1) + + allocate(Z_ovov(nO,nV,nO,nV)) call dgemm('N','N',nO*nV,nO*nV,nO*nV, & 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) + deallocate(X_ovov,Y_ovov) + !$omp parallel & !$omp shared(nO,nV,r2,Z_ovov) & !$omp private(u,v,gam,beta) & @@ -926,39 +980,33 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !$omp end do !$omp end parallel - deallocate(X_ovov,Y_ovov,Z_ovov) + deallocate(Z_ovov) ! Change the sign for consistency with the code in spin orbitals + + max_r2 = 0d0 !$omp parallel & - !$omp shared(nO,nV,r2) & - !$omp private(i,j,a,b) & + !$omp shared(nO,nV,r2,max_r2) & + !$omp private(i,j,a,b,max_r2_local) & !$omp default(none) + max_r2_local = 0.d0 !$omp do do b = 1, nV do a = 1, nV do j = 1, nO do i = 1, nO r2(i,j,a,b) = -r2(i,j,a,b) + max_r2_local = max(r2(i,j,a,b), max_r2_local) enddo enddo enddo enddo - !$omp end do + !$omp end do nowait + !$omp critical + max_r2 = max(max_r2, max_r2_local) + !$omp end critical !$omp end parallel - max_r2 = 0d0 - do b = 1, nV - do a = 1, nV - do j = 1, nO - do i = 1, nO - max_r2 = max(r2(i,j,a,b), max_r2) - enddo - enddo - enddo - enddo - - deallocate(g_occ,g_vir,J1,K1,A1) - end ! A1 From 9e833cc47627e819e60e05faace5fabb3540f760 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 11 Jul 2023 22:17:40 +0200 Subject: [PATCH 231/337] Improve memory control --- src/ao_two_e_ints/cholesky.irp.f | 35 +++++++++++++++++++++----------- src/utils/memory.irp.f | 13 ++++++------ 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 8b969174..4bf60847 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -48,7 +48,7 @@ END_PROVIDER integer(omp_lock_kind), allocatable :: lock(:) - double precision :: rss + double precision :: mem double precision, external :: memory_of_double, memory_of_int integer, external :: getUnitAndOpen @@ -70,16 +70,22 @@ END_PROVIDER PROVIDE nucl_coord - if (.not.do_direct_integrals) then + if (do_direct_integrals) then + if (ao_two_e_integral(1,1,1,1) < huge(1.d0)) then + ! Trigger providers inside ao_two_e_integral + continue + endif + else PROVIDE ao_two_e_integrals_in_map endif tau = ao_cholesky_threshold - rss = 6.d0 * memory_of_double(ndim) + & - 6.d0 * memory_of_int(ndim) - call check_mem(rss, irp_here) + mem = 6.d0 * memory_of_double(ndim) + 6.d0 * memory_of_int(ndim) + call check_mem(mem, irp_here) + call print_memory_usage() + allocate(L(ndim,1)) print *, '' @@ -112,7 +118,7 @@ END_PROVIDER enddo if (do_direct_integrals) then - !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(guided) do i=1,ndim D(i) = ao_two_e_integral(addr(1,i), addr(2,i), & addr(1,i), addr(2,i)) @@ -175,20 +181,20 @@ END_PROVIDER endif enddo - call resident_memory(rss) - rss = rss & + call total_memory(mem) + mem = mem & + np*memory_of_double(nq) &! Delta(np,nq) + (rank+nq)* memory_of_double(ndim) &! L(ndim,rank+nq) - + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) - ! Ltmp_q(nq,block_size) + + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) - if (rss > qp_max_mem) then + if (mem > qp_max_mem) then s = s*2.d0 else exit endif if ((s > 1.d0).or.(nq == 0)) then + call print_memory_usage() print *, 'Not enough memory. Reduce cholesky threshold' stop -1 endif @@ -201,6 +207,7 @@ END_PROVIDER L_old => L allocate(L(ndim,rank+nq), stat=ierr) if (ierr /= 0) then + call print_memory_usage() print *, irp_here, ': allocation failed : (L(ndim,rank+nq))' stop -1 endif @@ -215,18 +222,21 @@ END_PROVIDER allocate(Delta(np,nq), stat=ierr) if (ierr /= 0) then + call print_memory_usage() print *, irp_here, ': allocation failed : (Delta(np,nq))' stop -1 endif allocate(Ltmp_p(np,block_size), stat=ierr) if (ierr /= 0) then + call print_memory_usage() print *, irp_here, ': allocation failed : (Ltmp_p(np,block_size))' stop -1 endif allocate(Ltmp_q(nq,block_size), stat=ierr) if (ierr /= 0) then + call print_memory_usage() print *, irp_here, ': allocation failed : (Ltmp_q(nq,block_size))' stop -1 endif @@ -253,7 +263,7 @@ END_PROVIDER !$OMP BARRIER - !$OMP DO SCHEDULE(guided) + !$OMP DO SCHEDULE(dynamic) do m=1,nq call omp_set_lock(lock(m)) @@ -419,6 +429,7 @@ END_PROVIDER allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) if (ierr /= 0) then + call print_memory_usage() print *, irp_here, ': Allocation failed' stop -1 endif diff --git a/src/utils/memory.irp.f b/src/utils/memory.irp.f index 7da283ec..41ec0428 100644 --- a/src/utils/memory.irp.f +++ b/src/utils/memory.irp.f @@ -99,16 +99,15 @@ subroutine check_mem(rss_in,routine) END_DOC double precision, intent(in) :: rss_in character*(*) :: routine - double precision :: rss - !$OMP CRITICAL - call resident_memory(rss) - rss += rss_in - if (int(rss)+1 > qp_max_mem) then + double precision :: mem + call total_memory(mem) + mem += rss_in + if (mem > qp_max_mem) then + call print_memory_usage() print *, 'Not enough memory: aborting in ', routine - print *, int(rss)+1, ' GB required' + print *, mem, ' GB required' stop -1 endif - !$OMP END CRITICAL end subroutine print_memory_usage() From 349f956e1cd8c6af519718b3043d4f8fd26b7f4f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 11 Jul 2023 22:31:51 +0200 Subject: [PATCH 232/337] Super fast cholesky --- src/ao_two_e_ints/cholesky.irp.f | 92 +++++++++++--------------------- 1 file changed, 30 insertions(+), 62 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 4bf60847..7d02d27f 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -35,6 +35,7 @@ END_PROVIDER double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) integer, allocatable :: Lset(:), Dset(:), addr(:,:), LDmap(:), DLmap(:) integer, allocatable :: Lset_rev(:), Dset_rev(:) + logical, allocatable :: computed(:) integer :: i,j,k,m,p,q, qj, dj, p2, q2 integer :: N, np, nq @@ -158,7 +159,7 @@ END_PROVIDER ! a. i = i+1 - s = 0.1d0 + s = 0.01d0 ! Inrease s until the arrays fit in memory do while (.True.) @@ -242,11 +243,14 @@ END_PROVIDER endif + allocate(computed(nq)) + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q,j) !$OMP DO do q=1,nq Delta(:,q) = 0.d0 + computed(q) = .False. enddo !$OMP ENDDO NOWAIT @@ -262,64 +266,6 @@ END_PROVIDER !$OMP END DO NOWAIT !$OMP BARRIER - - !$OMP DO SCHEDULE(dynamic) - do m=1,nq - - call omp_set_lock(lock(m)) - do k=1,np - ! Apply only to (k,m) pairs where k is not in Dset - if (LDmap(k) /= 0) cycle - q = Lset_rev(addr(3,Lset(k))) - if ((0 < q).and.(q < k)) cycle - if (.not.ao_two_e_integral_zero( addr(1,Lset(k)), addr(1,Dset(m)),& - addr(2,Lset(k)), addr(2,Dset(m)) ) ) then - if (do_direct_integrals) then - Delta(k,m) = ao_two_e_integral(addr(1,Lset(k)), addr(2,Lset(k)),& - addr(1,Dset(m)), addr(2,Dset(m))) - else - Delta(k,m) = get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)),& - addr(2,Lset(k)), addr(2,Dset(m)), ao_integrals_map) - endif - if (q /= 0) Delta(q,m) = Delta(k,m) - endif - enddo - - j = Dset_rev(addr(3,Dset(m))) - if ((0 < j).and.(j < m)) then - call omp_unset_lock(lock(m)) - cycle - endif - - if ((j /= m).and.(j /= 0)) then - call omp_set_lock(lock(j)) - endif - do k=1,nq - ! Apply only to (k,m) pairs both in Dset - p = DLmap(k) - q = Lset_rev(addr(3,Dset(k))) - if ((0 < q).and.(q < p)) cycle - if (.not.ao_two_e_integral_zero( addr(1,Dset(k)), addr(1,Dset(m)),& - addr(2,Dset(k)), addr(2,Dset(m)) ) ) then - if (do_direct_integrals) then - Delta(p,m) = ao_two_e_integral(addr(1,Dset(k)), addr(2,Dset(k)),& - addr(1,Dset(m)), addr(2,Dset(m))) - else - Delta(p,m) = get_ao_two_e_integral( addr(1,Dset(k)), addr(1,Dset(m)),& - addr(2,Dset(k)), addr(2,Dset(m)), ao_integrals_map) - endif - if (q /= 0) Delta(q,m) = Delta(p,m) - if (j /= 0) Delta(p,j) = Delta(p,m) - if (q*j /= 0) Delta(q,j) = Delta(p,m) - endif - enddo - call omp_unset_lock(lock(m)) - if ((j /= m).and.(j /= 0)) then - call omp_unset_lock(lock(j)) - endif - enddo - !$OMP END DO - !$OMP END PARALLEL if (N>0) then @@ -358,6 +304,27 @@ END_PROVIDER L(1:ndim, rank) = 0.d0 + if (.not.computed(dj)) then + m = dj + !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(guided) + do k=np,1,-1 + if (.not.ao_two_e_integral_zero( addr(1,Lset(k)), addr(1,Dset(m)),& + addr(2,Lset(k)), addr(2,Dset(m)) ) ) then + if (do_direct_integrals) then + Delta(k,m) = Delta(k,m) + & + ao_two_e_integral(addr(1,Lset(k)), addr(2,Lset(k)),& + addr(1,Dset(m)), addr(2,Dset(m))) + else + Delta(k,m) = Delta(k,m) + & + get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)),& + addr(2,Lset(k)), addr(2,Dset(m)), ao_integrals_map) + endif + endif + enddo + !$OMP END PARALLEL DO + computed(dj) = .True. + endif + iblock = iblock+1 do p=1,np Ltmp_p(p,iblock) = Delta(p,dj) @@ -398,9 +365,10 @@ END_PROVIDER print '(I10, 4X, ES12.3)', rank, Qmax - deallocate(Delta, stat=ierr) - deallocate(Ltmp_p, stat=ierr) - deallocate(Ltmp_q, stat=ierr) + deallocate(computed) + deallocate(Delta) + deallocate(Ltmp_p) + deallocate(Ltmp_q) ! i. N = rank From 1e390d83574392887c6e3890b9f860c98cd66904 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 11 Jul 2023 23:50:31 +0200 Subject: [PATCH 233/337] Reduce memory --- src/ao_two_e_ints/cholesky.irp.f | 25 ++----------------------- src/ccsd/ccsd_space_orb_sub.irp.f | 7 +++++++ 2 files changed, 9 insertions(+), 23 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 7d02d27f..175ccf6e 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -33,8 +33,7 @@ END_PROVIDER double precision, parameter :: dscale = 1.d0 double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) - integer, allocatable :: Lset(:), Dset(:), addr(:,:), LDmap(:), DLmap(:) - integer, allocatable :: Lset_rev(:), Dset_rev(:) + integer, allocatable :: Lset(:), Dset(:), addr(:,:) logical, allocatable :: computed(:) integer :: i,j,k,m,p,q, qj, dj, p2, q2 @@ -47,8 +46,6 @@ END_PROVIDER double precision, external :: ao_two_e_integral integer :: block_size, iblock, ierr - integer(omp_lock_kind), allocatable :: lock(:) - double precision :: mem double precision, external :: memory_of_double, memory_of_int @@ -100,12 +97,8 @@ END_PROVIDER rank = 0 - allocate( D(ndim), Lset(ndim), LDmap(ndim), DLmap(ndim), Dset(ndim) ) - allocate( Lset_rev(ndim), Dset_rev(ndim), lock(ndim) ) + allocate( D(ndim), Lset(ndim), Dset(ndim) ) allocate( addr(3,ndim) ) - do k=1,ndim - call omp_init_lock(lock(k)) - enddo ! 1. k=0 @@ -139,12 +132,10 @@ END_PROVIDER ! 2. np=0 - Lset_rev = 0 do p=1,ndim if ( dscale*dscale*Dmax*D(p) > tau*tau ) then np = np+1 Lset(np) = p - Lset_rev(p) = np endif enddo @@ -169,16 +160,10 @@ END_PROVIDER ! c. nq=0 - LDmap = 0 - DLmap = 0 - Dset_rev = 0 do p=1,np if ( D(Lset(p)) > Dmin ) then nq = nq+1 Dset(nq) = Lset(p) - Dset_rev(Dset(nq)) = nq - LDmap(p) = nq - DLmap(nq) = p endif enddo @@ -380,21 +365,15 @@ END_PROVIDER enddo np=0 - Lset_rev = 0 do p=1,ndim if ( dscale*dscale*Dmax*D(p) > tau*tau ) then np = np+1 Lset(np) = p - Lset_rev(p) = np endif enddo enddo - do k=1,ndim - call omp_destroy_lock(lock(k)) - enddo - allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) if (ierr /= 0) then call print_memory_usage() diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index f97514cd..b48ca7da 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -18,6 +18,13 @@ subroutine run_ccsd_space_orb integer(bit_kind) :: det(N_int,2) integer :: nO, nV, nOa, nVa + if (do_ao_cholesky) then + PROVIDE cholesky_mo_transp + FREE cholesky_ao + else + PROVIDE mo_two_e_integrals_in_map + endif + det = psi_det(:,:,cc_ref) print*,'Reference determinant:' call print_det(det,N_int) From 3c89e9d88d21d6ea171889989008172b53262e67 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 12 Jul 2023 11:50:34 +0200 Subject: [PATCH 234/337] Fixed qp set_file --- etc/qp.rc | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/etc/qp.rc b/etc/qp.rc index 9eec4570..c485abea 100644 --- a/etc/qp.rc +++ b/etc/qp.rc @@ -188,7 +188,19 @@ _qp_Complete() ;; esac;; set_file) - COMPREPLY=( $(compgen -W "$(for i in */ $(find . -name ezfio | sed 's/ezfio$/.version/') ; do [[ -f $i ]] && echo ${i%/.version} ; done)" -- ${cur} ) ) + # Array to store directory names + dirs=() + + # Find directories containing "ezfio/.version" file recursively + for i in $(find . -name ezfio | sed 's/ezfio$/.version/') + do + dir_name=${i%/.version} # Remove the ".version" suffix + dir_name=${dir_name#./} # Remove the leading "./" if present + dirs+=("$dir_name") + done + + # Output the directory names for completion + COMPREPLY=("${dirs[@]/#/.\/}") # Prefix each directory name with "./" return 0 ;; plugins) From 0aed20f53a68c0225d05b7917da351926c2234e0 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 12 Jul 2023 12:04:42 +0200 Subject: [PATCH 235/337] Fixed previous commit --- etc/qp.rc | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/etc/qp.rc b/etc/qp.rc index c485abea..d316faf5 100644 --- a/etc/qp.rc +++ b/etc/qp.rc @@ -189,18 +189,17 @@ _qp_Complete() esac;; set_file) # Array to store directory names - dirs=() + dirs="" # Find directories containing "ezfio/.version" file recursively for i in $(find . -name ezfio | sed 's/ezfio$/.version/') do dir_name=${i%/.version} # Remove the ".version" suffix - dir_name=${dir_name#./} # Remove the leading "./" if present - dirs+=("$dir_name") + dir_name=${dir_name#./} # Remove the leading "./" + dirs+="./$dir_name " done - # Output the directory names for completion - COMPREPLY=("${dirs[@]/#/.\/}") # Prefix each directory name with "./" + COMPREPLY=( $(compgen -W "$dirs" -- ${cur} ) ) return 0 ;; plugins) From d4574f24d981d793a6038039a4b19af6733fe7a3 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 12 Jul 2023 12:34:48 +0200 Subject: [PATCH 236/337] Reduced memory in CCSD --- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 154 +++++++++++----------- src/mo_two_e_ints/cholesky.irp.f | 24 ++-- src/mo_two_e_ints/integrals_3_index.irp.f | 14 +- src/mo_two_e_ints/mo_bi_integrals.irp.f | 16 +-- src/utils_cc/mo_integrals_cc.irp.f | 62 ++++----- 5 files changed, 139 insertions(+), 131 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index 1c56996e..5969928a 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -209,9 +209,9 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) do iblock = 1, nV, block_size nVmax = min(block_size,nV-iblock+1) - call dgemm('T','N', nV*nO, nV*nVmax, cholesky_ao_num, 1.d0, & - cc_space_v_vo_chol , cholesky_ao_num, & - cc_space_v_vv_chol(1,1,iblock), cholesky_ao_num, & + call dgemm('T','N', nV*nO, nV*nVmax, cholesky_mo_num, 1.d0, & + cc_space_v_vo_chol , cholesky_mo_num, & + cc_space_v_vv_chol(1,1,iblock), cholesky_mo_num, & 0.d0, W_vvov_tmp, nV*nO) !$omp parallel & @@ -304,7 +304,7 @@ subroutine compute_H_oo_chol(nO,nV,tau_x,H_oo) double precision, allocatable :: tau_kau(:,:,:), tmp_vov(:,:,:) - allocate(tau_kau(cholesky_ao_num,nV,nO)) + allocate(tau_kau(cholesky_mo_num,nV,nO)) !$omp parallel & !$omp default(shared) & !$omp private(i,u,j,k,a,b,tmp_vov) @@ -318,9 +318,9 @@ subroutine compute_H_oo_chol(nO,nV,tau_x,H_oo) enddo enddo enddo - call dgemm('N','T',cholesky_ao_num,nV,nO*nV,1.d0, & - cc_space_v_ov_chol, cholesky_ao_num, tmp_vov, nV, & - 0.d0, tau_kau(1,1,u), cholesky_ao_num) + call dgemm('N','T',cholesky_mo_num,nV,nO*nV,1.d0, & + cc_space_v_ov_chol, cholesky_mo_num, tmp_vov, nV, & + 0.d0, tau_kau(1,1,u), cholesky_mo_num) enddo !$omp end do nowait deallocate(tmp_vov) @@ -333,8 +333,8 @@ subroutine compute_H_oo_chol(nO,nV,tau_x,H_oo) !$omp end do nowait !$omp barrier !$omp end parallel - call dgemm('T', 'N', nO, nO, cholesky_ao_num*nV, 1.d0, & - tau_kau, cholesky_ao_num*nV, cc_space_v_vo_chol, cholesky_ao_num*nV, & + call dgemm('T', 'N', nO, nO, cholesky_mo_num*nV, 1.d0, & + tau_kau, cholesky_mo_num*nV, cc_space_v_vo_chol, cholesky_mo_num*nV, & 1.d0, H_oo, nO) end @@ -353,7 +353,7 @@ subroutine compute_H_vv_chol(nO,nV,tau_x,H_vv) double precision, allocatable :: tau_kia(:,:,:), tmp_oov(:,:,:) - allocate(tau_kia(cholesky_ao_num,nO,nV)) + allocate(tau_kia(cholesky_mo_num,nO,nV)) !$omp parallel & !$omp default(shared) & !$omp private(i,beta,j,k,a,b,tmp_oov) @@ -367,9 +367,9 @@ subroutine compute_H_vv_chol(nO,nV,tau_x,H_vv) enddo enddo enddo - call dgemm('N','T',cholesky_ao_num,nO,nO*nV,1.d0, & - cc_space_v_ov_chol, cholesky_ao_num, tmp_oov, nO, & - 0.d0, tau_kia(1,1,a), cholesky_ao_num) + call dgemm('N','T',cholesky_mo_num,nO,nO*nV,1.d0, & + cc_space_v_ov_chol, cholesky_mo_num, tmp_oov, nO, & + 0.d0, tau_kia(1,1,a), cholesky_mo_num) enddo !$omp end do nowait deallocate(tmp_oov) @@ -383,8 +383,8 @@ subroutine compute_H_vv_chol(nO,nV,tau_x,H_vv) !$omp end do nowait !$omp barrier !$omp end parallel - call dgemm('T', 'N', nV, nV, cholesky_ao_num*nO, -1.d0, & - tau_kia, cholesky_ao_num*nO, cc_space_v_ov_chol, cholesky_ao_num*nO, & + call dgemm('T', 'N', nV, nV, cholesky_mo_num*nO, -1.d0, & + tau_kia, cholesky_mo_num*nO, cc_space_v_ov_chol, cholesky_mo_num*nO, & 1.d0, H_vv, nV) end @@ -407,33 +407,33 @@ subroutine compute_H_vo_chol(nO,nV,t1,H_vo) enddo enddo - allocate(tmp_k(cholesky_ao_num)) - call dgemm('N', 'N', cholesky_ao_num, 1, nO*nV, 2.d0, & - cc_space_v_ov_chol, cholesky_ao_num, & - t1, nO*nV, 0.d0, tmp_k, cholesky_ao_num) + allocate(tmp_k(cholesky_mo_num)) + call dgemm('N', 'N', cholesky_mo_num, 1, nO*nV, 2.d0, & + cc_space_v_ov_chol, cholesky_mo_num, & + t1, nO*nV, 0.d0, tmp_k, cholesky_mo_num) - call dgemm('T','N',nV*nO,1,cholesky_ao_num,1.d0, & - cc_space_v_vo_chol, cholesky_ao_num, tmp_k, cholesky_ao_num, 1.d0, & + call dgemm('T','N',nV*nO,1,cholesky_mo_num,1.d0, & + cc_space_v_vo_chol, cholesky_mo_num, tmp_k, cholesky_mo_num, 1.d0, & H_vo, nV*nO) deallocate(tmp_k) - allocate(tmp(cholesky_ao_num,nO,nO)) - allocate(tmp2(cholesky_ao_num,nO,nO)) + allocate(tmp(cholesky_mo_num,nO,nO)) + allocate(tmp2(cholesky_mo_num,nO,nO)) - call dgemm('N','T', cholesky_ao_num*nO, nO, nV, 1.d0, & - cc_space_v_ov_chol, cholesky_ao_num*nO, t1, nO, 0.d0, tmp, cholesky_ao_num*nO) + call dgemm('N','T', cholesky_mo_num*nO, nO, nV, 1.d0, & + cc_space_v_ov_chol, cholesky_mo_num*nO, t1, nO, 0.d0, tmp, cholesky_mo_num*nO) do i=1,nO do j=1,nO - do k=1,cholesky_ao_num + do k=1,cholesky_mo_num tmp2(k,j,i) = tmp(k,i,j) enddo enddo enddo deallocate(tmp) - call dgemm('T','N', nV, nO, cholesky_ao_num*nO, -1.d0, & - cc_space_v_ov_chol, cholesky_ao_num*nO, tmp2, cholesky_ao_num*nO, & + call dgemm('T','N', nV, nO, cholesky_mo_num*nO, -1.d0, & + cc_space_v_ov_chol, cholesky_mo_num*nO, tmp2, cholesky_mo_num*nO, & 1.d0, H_vo, nV) end @@ -491,32 +491,32 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) double precision, dimension(:,:,:), allocatable :: B1, tmp_cc, tmpB1 double precision, dimension(:,:), allocatable :: tmp_cc2 - allocate(tmp_cc(cholesky_ao_num,nV,nV)) - call dgemm('N','N', cholesky_ao_num*nV, nV, nO, 1.d0, & - cc_space_v_vo_chol, cholesky_ao_num*nV, t1, nO, 0.d0, tmp_cc, cholesky_ao_num*nV) + allocate(tmp_cc(cholesky_mo_num,nV,nV)) + call dgemm('N','N', cholesky_mo_num*nV, nV, nO, 1.d0, & + cc_space_v_vo_chol, cholesky_mo_num*nV, t1, nO, 0.d0, tmp_cc, cholesky_mo_num*nV) call set_multiple_levels_omp(.False.) !$OMP PARALLEL PRIVATE(gam, iblock, B1, tmpB1, tmp_cc2, beta, b, a) - allocate(B1(nV,nV,block_size), tmpB1(nV,block_size,nV), tmp_cc2(cholesky_ao_num,nV)) + allocate(B1(nV,nV,block_size), tmpB1(nV,block_size,nV), tmp_cc2(cholesky_mo_num,nV)) !$OMP DO do gam = 1, nV do iblock = 1, nV, block_size - call dgemm('T', 'N', nV*min(block_size, nV-iblock+1), nV, cholesky_ao_num, & - -1.d0, tmp_cc(1,1,iblock), cholesky_ao_num, & - cc_space_v_vv_chol(1,1,gam), cholesky_ao_num, & + call dgemm('T', 'N', nV*min(block_size, nV-iblock+1), nV, cholesky_mo_num, & + -1.d0, tmp_cc(1,1,iblock), cholesky_mo_num, & + cc_space_v_vv_chol(1,1,gam), cholesky_mo_num, & 0.d0, tmpB1, nV*block_size) do a=1,nV - do k=1,cholesky_ao_num + do k=1,cholesky_mo_num tmp_cc2(k,a) = cc_space_v_vv_chol(k,a,gam) - tmp_cc(k,a,gam) enddo enddo - call dgemm('T','N', nV*min(block_size, nV-iblock+1), nV, cholesky_ao_num, 1.d0, & - cc_space_v_vv_chol(1,1,iblock), cholesky_ao_num, & - tmp_cc2, cholesky_ao_num, & + call dgemm('T','N', nV*min(block_size, nV-iblock+1), nV, cholesky_mo_num, 1.d0, & + cc_space_v_vv_chol(1,1,iblock), cholesky_mo_num, & + tmp_cc2, cholesky_mo_num, & 1.d0, tmpB1, nV*block_size) do beta = iblock, min(nV, iblock+block_size-1) @@ -630,9 +630,9 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) do iblock = 1, nV, block_size do gam = iblock, min(nV, iblock+block_size-1) - call dgemm('T','N',nV, nO*nV, cholesky_ao_num, 1.d0, & - cc_space_v_vv_chol(1,1,gam), cholesky_ao_num, cc_space_v_ov_chol, & - cholesky_ao_num, 0.d0, X_vovv(1,1,1,gam-iblock+1), nV) + call dgemm('T','N',nV, nO*nV, cholesky_mo_num, 1.d0, & + cc_space_v_vv_chol(1,1,gam), cholesky_mo_num, cc_space_v_ov_chol, & + cholesky_mo_num, 0.d0, X_vovv(1,1,1,gam-iblock+1), nV) enddo call dgemm('N','N',nO,nO*nV*min(block_size, nV-iblock+1),nV, & @@ -663,19 +663,19 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) double precision, allocatable :: X_ovvo(:,:,:,:) double precision, allocatable :: tcc(:,:,:), tcc2(:,:,:) - allocate(tcc2(cholesky_ao_num,nV,nO), X_ovvo(nO,nV,nV,nO)) - allocate(tcc(cholesky_ao_num,nO,nV)) + allocate(tcc2(cholesky_mo_num,nV,nO), X_ovvo(nO,nV,nV,nO)) + allocate(tcc(cholesky_mo_num,nO,nV)) - call dgemm('N','T', cholesky_ao_num*nV, nO, nV, 1.d0, & - cc_space_v_vv_chol, cholesky_ao_num*nV, t1, nO, & - 0.d0, tcc2, cholesky_ao_num*nV) + call dgemm('N','T', cholesky_mo_num*nV, nO, nV, 1.d0, & + cc_space_v_vv_chol, cholesky_mo_num*nV, t1, nO, & + 0.d0, tcc2, cholesky_mo_num*nV) - call dgemm('N','N', cholesky_ao_num*nO, nV, nO, 1.d0, & - cc_space_v_oo_chol, cholesky_ao_num*nO, t1, nO, & - 0.d0, tcc, cholesky_ao_num*nO) + call dgemm('N','N', cholesky_mo_num*nO, nV, nO, 1.d0, & + cc_space_v_oo_chol, cholesky_mo_num*nO, t1, nO, & + 0.d0, tcc, cholesky_mo_num*nO) - call dgemm('T','N', nO*nV, nV*nO, cholesky_ao_num, 1.d0, & - tcc, cholesky_ao_num, tcc2, cholesky_ao_num, 0.d0, & + call dgemm('T','N', nO*nV, nV*nO, cholesky_mo_num, 1.d0, & + tcc, cholesky_mo_num, tcc2, cholesky_mo_num, 0.d0, & X_ovvo, nO*nV) deallocate(tcc, tcc2) @@ -1160,23 +1160,23 @@ subroutine compute_g_vir_chol(nO,nV,t1,t2,H_vv,g_vir) 0d0, g_vir, size(g_vir,1)) double precision, allocatable :: tmp_k(:), tmp_vo(:,:,:), tmp_vo2(:,:,:) - allocate(tmp_k(cholesky_ao_num)) - call dgemm('N','N', cholesky_ao_num, 1, nO*nV, 1.d0, & - cc_space_v_ov_chol, cholesky_ao_num, t1, nO*nV, 0.d0, tmp_k, cholesky_ao_num) + allocate(tmp_k(cholesky_mo_num)) + call dgemm('N','N', cholesky_mo_num, 1, nO*nV, 1.d0, & + cc_space_v_ov_chol, cholesky_mo_num, t1, nO*nV, 0.d0, tmp_k, cholesky_mo_num) - call dgemm('T','N', nV*nV, 1, cholesky_ao_num, 2.d0, & - cc_space_v_vv_chol, cholesky_ao_num, tmp_k, cholesky_ao_num, 1.d0, & + call dgemm('T','N', nV*nV, 1, cholesky_mo_num, 2.d0, & + cc_space_v_vv_chol, cholesky_mo_num, tmp_k, cholesky_mo_num, 1.d0, & g_vir, nV*nV) deallocate(tmp_k) - allocate(tmp_vo(cholesky_ao_num,nV,nO)) - call dgemm('N','T',cholesky_ao_num*nV, nO, nV, 1.d0, & - cc_space_v_vv_chol, cholesky_ao_num*nV, t1, nO, 0.d0, tmp_vo, cholesky_ao_num*nV) + allocate(tmp_vo(cholesky_mo_num,nV,nO)) + call dgemm('N','T',cholesky_mo_num*nV, nO, nV, 1.d0, & + cc_space_v_vv_chol, cholesky_mo_num*nV, t1, nO, 0.d0, tmp_vo, cholesky_mo_num*nV) - allocate(tmp_vo2(cholesky_ao_num,nO,nV)) + allocate(tmp_vo2(cholesky_mo_num,nO,nV)) do beta=1,nV do i=1,nO - do k=1,cholesky_ao_num + do k=1,cholesky_mo_num tmp_vo2(k,i,beta) = -tmp_vo(k,beta,i) enddo enddo @@ -1189,9 +1189,9 @@ subroutine compute_g_vir_chol(nO,nV,t1,t2,H_vv,g_vir) enddo enddo - call dgemm('T','N', nV, nV, nO*cholesky_ao_num, 1.d0, & - cc_space_v_ov_chol, cholesky_ao_num*nO, & - tmp_vo2, cholesky_ao_num*nO, 1.d0, g_vir, nV) + call dgemm('T','N', nV, nV, nO*cholesky_mo_num, 1.d0, & + cc_space_v_ov_chol, cholesky_mo_num*nO, & + tmp_vo2, cholesky_mo_num*nO, 1.d0, g_vir, nV) end @@ -1265,15 +1265,15 @@ subroutine compute_J1_chol(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvoo,J1) deallocate(X_ovoo) double precision, allocatable :: tmp_cc(:,:,:), J1_tmp(:,:,:,:) - allocate(tmp_cc(cholesky_ao_num,nV,nO), J1_tmp(nV,nO,nV,nO)) + allocate(tmp_cc(cholesky_mo_num,nV,nO), J1_tmp(nV,nO,nV,nO)) - call dgemm('N','T', cholesky_ao_num*nV, nO, nV, 1.d0, & - cc_space_v_vv_chol, cholesky_ao_num*nV, & + call dgemm('N','T', cholesky_mo_num*nV, nO, nV, 1.d0, & + cc_space_v_vv_chol, cholesky_mo_num*nV, & t1, nO, & - 0.d0, tmp_cc, cholesky_ao_num*nV) + 0.d0, tmp_cc, cholesky_mo_num*nV) - call dgemm('T','N', nV*nO, nV*nO, cholesky_ao_num, 1.d0, & - tmp_cc, cholesky_ao_num, cc_space_v_vo_chol, cholesky_ao_num, & + call dgemm('T','N', nV*nO, nV*nO, cholesky_mo_num, 1.d0, & + tmp_cc, cholesky_mo_num, cc_space_v_vo_chol, cholesky_mo_num, & 0.d0, J1_tmp, nV*nO) deallocate(tmp_cc) @@ -1464,14 +1464,14 @@ subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,K1) 1d0, K1 , size(K1,1) * size(K1,2) * size(K1,3)) double precision, allocatable :: K1tmp(:,:,:,:), t1v(:,:,:) - allocate(K1tmp(nO,nO,nV,nV), t1v(cholesky_ao_num,nO,nO)) + allocate(K1tmp(nO,nO,nV,nV), t1v(cholesky_mo_num,nO,nO)) - call dgemm('N','T', cholesky_ao_num*nO, nO, nV, 1.d0, & - cc_space_v_ov_chol, cholesky_ao_num*nO, t1, nO, 0.d0, & - t1v, cholesky_ao_num*nO) + call dgemm('N','T', cholesky_mo_num*nO, nO, nV, 1.d0, & + cc_space_v_ov_chol, cholesky_mo_num*nO, t1, nO, 0.d0, & + t1v, cholesky_mo_num*nO) - call dgemm('T','N', nO*nO, nV*nV, cholesky_ao_num, 1.d0, & - t1v, cholesky_ao_num, cc_space_v_vv_chol, cholesky_ao_num, 0.d0, & + call dgemm('T','N', nO*nO, nV*nV, cholesky_mo_num, 1.d0, & + t1v, cholesky_mo_num, cc_space_v_vv_chol, cholesky_mo_num, 0.d0, & K1tmp, nO*nO) deallocate(t1v) diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 3a868cbe..349f13b9 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -1,4 +1,12 @@ -BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_ao_num) ] +BEGIN_PROVIDER [ integer, cholesky_mo_num ] + implicit none + BEGIN_DOC + ! Number of Cholesky vectors in MO basis + END_DOC + cholesky_mo_num = cholesky_ao_num +END_PROVIDER + +BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_mo_num) ] implicit none BEGIN_DOC ! Cholesky vectors in MO basis @@ -8,7 +16,7 @@ BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_ao_num call set_multiple_levels_omp(.False.) !$OMP PARALLEL DO PRIVATE(k) - do k=1,cholesky_ao_num + do k=1,cholesky_mo_num do j=1,mo_num do i=1,mo_num cholesky_mo(i,j,k) = cholesky_mo_transp(k,i,j) @@ -19,7 +27,7 @@ BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_ao_num END_PROVIDER -BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_ao_num, mo_num, mo_num) ] +BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_mo_num, mo_num, mo_num) ] implicit none BEGIN_DOC ! Cholesky vectors in MO basis @@ -29,14 +37,14 @@ BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_ao_num, mo_num, integer :: ierr print *, 'AO->MO Transformation of Cholesky vectors' - allocate(X(mo_num,cholesky_ao_num,ao_num), stat=ierr) + allocate(X(mo_num,cholesky_mo_num,ao_num), stat=ierr) if (ierr /= 0) then print *, irp_here, ': Allocation failed' endif - call dgemm('T','N', ao_num*cholesky_ao_num, mo_num, ao_num, 1.d0, & - cholesky_ao, ao_num, mo_coef, ao_num, 0.d0, X, ao_num*cholesky_ao_num) - call dgemm('T','N', cholesky_ao_num*mo_num, mo_num, ao_num, 1.d0, & - X, ao_num, mo_coef, ao_num, 0.d0, cholesky_mo_transp, cholesky_ao_num*mo_num) + call dgemm('T','N', ao_num*cholesky_mo_num, mo_num, ao_num, 1.d0, & + cholesky_ao, ao_num, mo_coef, ao_num, 0.d0, X, ao_num*cholesky_mo_num) + call dgemm('T','N', cholesky_mo_num*mo_num, mo_num, ao_num, 1.d0, & + X, ao_num, mo_coef, ao_num, 0.d0, cholesky_mo_transp, cholesky_mo_num*mo_num) deallocate(X) END_PROVIDER diff --git a/src/mo_two_e_ints/integrals_3_index.irp.f b/src/mo_two_e_ints/integrals_3_index.irp.f index d807f619..eb05da84 100644 --- a/src/mo_two_e_ints/integrals_3_index.irp.f +++ b/src/mo_two_e_ints/integrals_3_index.irp.f @@ -13,14 +13,14 @@ if (do_ao_cholesky) then double precision, allocatable :: buffer_jj(:,:), buffer(:,:,:) - allocate(buffer_jj(cholesky_ao_num,mo_num), buffer(mo_num,mo_num,mo_num)) + allocate(buffer_jj(cholesky_mo_num,mo_num), buffer(mo_num,mo_num,mo_num)) do j=1,mo_num buffer_jj(:,j) = cholesky_mo_transp(:,j,j) enddo - call dgemm('T','N', mo_num*mo_num,mo_num,cholesky_ao_num, 1.d0, & - cholesky_mo_transp, cholesky_ao_num, & - buffer_jj, cholesky_ao_num, 0.d0, & + call dgemm('T','N', mo_num*mo_num,mo_num,cholesky_mo_num, 1.d0, & + cholesky_mo_transp, cholesky_mo_num, & + buffer_jj, cholesky_mo_num, 0.d0, & buffer, mo_num*mo_num) do k = 1, mo_num @@ -36,9 +36,9 @@ do j = 1, mo_num - call dgemm('T','N',mo_num,mo_num,cholesky_ao_num, 1.d0, & - cholesky_mo_transp(1,1,j), cholesky_ao_num, & - cholesky_mo_transp(1,1,j), cholesky_ao_num, 0.d0, & + call dgemm('T','N',mo_num,mo_num,cholesky_mo_num, 1.d0, & + cholesky_mo_transp(1,1,j), cholesky_mo_num, & + cholesky_mo_transp(1,1,j), cholesky_mo_num, 0.d0, & buffer_jj, mo_num) do k=1,mo_num diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index 0d3fe176..0e77b6a2 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -479,9 +479,9 @@ subroutine add_integrals_to_map_cholesky !$OMP DO SCHEDULE(dynamic) do l=1,mo_num - call dgemm('T','N',mo_num*mo_num,mo_num,cholesky_ao_num,1.d0, & - cholesky_mo_transp, cholesky_ao_num, & - cholesky_mo_transp(1,1,l), cholesky_ao_num, 0.d0, & + call dgemm('T','N',mo_num*mo_num,mo_num,cholesky_mo_num,1.d0, & + cholesky_mo_transp, cholesky_mo_num, & + cholesky_mo_transp(1,1,l), cholesky_mo_num, 0.d0, & Vtmp, mo_num*mo_num) do k=1,l @@ -1364,20 +1364,20 @@ END_PROVIDER if (do_ao_cholesky) then double precision, allocatable :: buffer(:,:) - allocate (buffer(cholesky_ao_num,mo_num)) - do k=1,cholesky_ao_num + allocate (buffer(cholesky_mo_num,mo_num)) + do k=1,cholesky_mo_num do i=1,mo_num buffer(k,i) = cholesky_mo_transp(k,i,i) enddo enddo - call dgemm('T','N',mo_num,mo_num,cholesky_ao_num,1.d0, & - buffer, cholesky_ao_num, buffer, cholesky_ao_num, 0.d0, mo_two_e_integrals_jj, mo_num) + call dgemm('T','N',mo_num,mo_num,cholesky_mo_num,1.d0, & + buffer, cholesky_mo_num, buffer, cholesky_mo_num, 0.d0, mo_two_e_integrals_jj, mo_num) deallocate(buffer) do j=1,mo_num do i=1,mo_num mo_two_e_integrals_jj_exchange(i,j) = 0.d0 - do k=1,cholesky_ao_num + do k=1,cholesky_mo_num mo_two_e_integrals_jj_exchange(i,j) = mo_two_e_integrals_jj_exchange(i,j) + & cholesky_mo_transp(k,i,j)*cholesky_mo_transp(k,j,i) enddo diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f index a68ab8de..b2b68d05 100644 --- a/src/utils_cc/mo_integrals_cc.irp.f +++ b/src/utils_cc/mo_integrals_cc.irp.f @@ -50,15 +50,15 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v) if (do_ao_cholesky) then double precision, allocatable :: buffer(:,:,:,:) double precision, allocatable :: v1(:,:,:), v2(:,:,:) - allocate(v1(cholesky_ao_num,n1,n3), v2(cholesky_ao_num,n2,n4)) + allocate(v1(cholesky_mo_num,n1,n3), v2(cholesky_mo_num,n2,n4)) allocate(buffer(n1,n3,n2,n4)) - call gen_v_space_chol(n1,n3,list1,list3,v1,cholesky_ao_num) - call gen_v_space_chol(n2,n4,list2,list4,v2,cholesky_ao_num) + call gen_v_space_chol(n1,n3,list1,list3,v1,cholesky_mo_num) + call gen_v_space_chol(n2,n4,list2,list4,v2,cholesky_mo_num) - call dgemm('T','N', n1*n3, n2*n4, cholesky_ao_num, 1.d0, & - v1, cholesky_ao_num, & - v2, cholesky_ao_num, 0.d0, buffer, n1*n3) + call dgemm('T','N', n1*n3, n2*n4, cholesky_mo_num, 1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n1*n3) deallocate(v1,v2) @@ -119,7 +119,7 @@ subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) idx3 = list3(i3) do i1=1,n1 idx1 = list1(i1) - do k=1,cholesky_ao_num + do k=1,cholesky_mo_num v(k,i1,i3) = cholesky_mo_transp(k,idx1,idx3) enddo enddo @@ -137,15 +137,15 @@ BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)] double precision, allocatable :: buffer(:,:,:) call set_multiple_levels_omp(.False.) !$OMP PARALLEL & - !$OMP SHARED(cc_space_v,mo_num,cholesky_mo_transp,cholesky_ao_num) & + !$OMP SHARED(cc_space_v,mo_num,cholesky_mo_transp,cholesky_mo_num) & !$OMP PRIVATE(i1,i2,i3,i4,k,buffer)& !$OMP DEFAULT(NONE) allocate(buffer(mo_num,mo_num,mo_num)) !$OMP DO do i4 = 1, mo_num - call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, & - cholesky_mo_transp, cholesky_ao_num, & - cholesky_mo_transp(1,1,i4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num) + call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_mo_num, 1.d0, & + cholesky_mo_transp, cholesky_mo_num, & + cholesky_mo_transp(1,1,i4), cholesky_mo_num, 0.d0, buffer, mo_num*mo_num) do i2 = 1, mo_num do i3 = 1, mo_num do i1 = 1, mo_num @@ -203,9 +203,9 @@ BEGIN_PROVIDER [double precision, cc_space_v_oooo, (cc_nOa, cc_nOa, cc_nOa, cc_n double precision, allocatable :: buffer(:,:,:,:) allocate(buffer(n1,n3,n2,n4)) - call dgemm('T','N', n1*n3, n2*n4, cholesky_ao_num, 1.d0, & - cc_space_v_oo_chol, cholesky_ao_num, & - cc_space_v_oo_chol, cholesky_ao_num, 0.d0, buffer, n1*n3) + call dgemm('T','N', n1*n3, n2*n4, cholesky_mo_num, 1.d0, & + cc_space_v_oo_chol, cholesky_mo_num, & + cc_space_v_oo_chol, cholesky_mo_num, 0.d0, buffer, n1*n3) !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) do i4 = 1, n4 @@ -246,9 +246,9 @@ BEGIN_PROVIDER [double precision, cc_space_v_vooo, (cc_nVa, cc_nOa, cc_nOa, cc_n double precision, allocatable :: buffer(:,:,:,:) allocate(buffer(n1,n3,n2,n4)) - call dgemm('T','N', n1*n3, n2*n4, cholesky_ao_num, 1.d0, & - cc_space_v_vo_chol, cholesky_ao_num, & - cc_space_v_oo_chol, cholesky_ao_num, 0.d0, buffer, n1*n3) + call dgemm('T','N', n1*n3, n2*n4, cholesky_mo_num, 1.d0, & + cc_space_v_vo_chol, cholesky_mo_num, & + cc_space_v_oo_chol, cholesky_mo_num, 0.d0, buffer, n1*n3) !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) do i4 = 1, n4 @@ -392,9 +392,9 @@ BEGIN_PROVIDER [double precision, cc_space_v_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_n double precision, allocatable :: buffer(:,:,:,:) allocate(buffer(n1,n3,n2,n4)) - call dgemm('T','N', n1*n3, n2*n4, cholesky_ao_num, 1.d0, & - cc_space_v_vo_chol, cholesky_ao_num, & - cc_space_v_vo_chol, cholesky_ao_num, 0.d0, buffer, n1*n3) + call dgemm('T','N', n1*n3, n2*n4, cholesky_mo_num, 1.d0, & + cc_space_v_vo_chol, cholesky_mo_num, & + cc_space_v_vo_chol, cholesky_mo_num, 0.d0, buffer, n1*n3) !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) do i4 = 1, n4 @@ -435,9 +435,9 @@ BEGIN_PROVIDER [double precision, cc_space_v_vovo, (cc_nVa, cc_nOa, cc_nVa, cc_n double precision, allocatable :: buffer(:,:,:,:) allocate(buffer(n1,n3,n2,n4)) - call dgemm('T','N', n1*n3, n2*n4, cholesky_ao_num, 1.d0, & - cc_space_v_vv_chol, cholesky_ao_num, & - cc_space_v_oo_chol, cholesky_ao_num, 0.d0, buffer, n1*n3) + call dgemm('T','N', n1*n3, n2*n4, cholesky_mo_num, 1.d0, & + cc_space_v_vv_chol, cholesky_mo_num, & + cc_space_v_oo_chol, cholesky_mo_num, 0.d0, buffer, n1*n3) !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) do i4 = 1, n4 @@ -645,35 +645,35 @@ BEGIN_PROVIDER [double precision, cc_space_v_vvvv, (cc_nVa, cc_nVa, cc_nVa, cc_n END_PROVIDER -BEGIN_PROVIDER [double precision, cc_space_v_vv_chol, (cholesky_ao_num, cc_nVa, cc_nVa)] +BEGIN_PROVIDER [double precision, cc_space_v_vv_chol, (cholesky_mo_num, cc_nVa, cc_nVa)] implicit none - call gen_v_space_chol(cc_nVa, cc_nVa, cc_list_vir, cc_list_vir, cc_space_v_vv_chol, cholesky_ao_num) + call gen_v_space_chol(cc_nVa, cc_nVa, cc_list_vir, cc_list_vir, cc_space_v_vv_chol, cholesky_mo_num) END_PROVIDER -BEGIN_PROVIDER [double precision, cc_space_v_vo_chol, (cholesky_ao_num, cc_nVa, cc_nOa)] +BEGIN_PROVIDER [double precision, cc_space_v_vo_chol, (cholesky_mo_num, cc_nVa, cc_nOa)] implicit none - call gen_v_space_chol(cc_nVa, cc_nOa, cc_list_vir, cc_list_occ, cc_space_v_vo_chol, cholesky_ao_num) + call gen_v_space_chol(cc_nVa, cc_nOa, cc_list_vir, cc_list_occ, cc_space_v_vo_chol, cholesky_mo_num) END_PROVIDER -BEGIN_PROVIDER [double precision, cc_space_v_ov_chol, (cholesky_ao_num, cc_nOa, cc_nVa)] +BEGIN_PROVIDER [double precision, cc_space_v_ov_chol, (cholesky_mo_num, cc_nOa, cc_nVa)] implicit none - call gen_v_space_chol(cc_nOa, cc_nVa, cc_list_occ, cc_list_vir, cc_space_v_ov_chol, cholesky_ao_num) + call gen_v_space_chol(cc_nOa, cc_nVa, cc_list_occ, cc_list_vir, cc_space_v_ov_chol, cholesky_mo_num) END_PROVIDER -BEGIN_PROVIDER [double precision, cc_space_v_oo_chol, (cholesky_ao_num, cc_nOa, cc_nOa)] +BEGIN_PROVIDER [double precision, cc_space_v_oo_chol, (cholesky_mo_num, cc_nOa, cc_nOa)] implicit none - call gen_v_space_chol(cc_nOa, cc_nOa, cc_list_occ, cc_list_occ, cc_space_v_oo_chol, cholesky_ao_num) + call gen_v_space_chol(cc_nOa, cc_nOa, cc_list_occ, cc_list_occ, cc_space_v_oo_chol, cholesky_mo_num) END_PROVIDER From fba2fefb1943cff26ba7c18d1ee92448b8482b3a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 12 Jul 2023 18:33:18 +0200 Subject: [PATCH 237/337] Moved loop --- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index 5969928a..fc5da8c0 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -501,6 +501,13 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) allocate(B1(nV,nV,block_size), tmpB1(nV,block_size,nV), tmp_cc2(cholesky_mo_num,nV)) !$OMP DO do gam = 1, nV + + do a=1,nV + do k=1,cholesky_mo_num + tmp_cc2(k,a) = cc_space_v_vv_chol(k,a,gam) - tmp_cc(k,a,gam) + enddo + enddo + do iblock = 1, nV, block_size call dgemm('T', 'N', nV*min(block_size, nV-iblock+1), nV, cholesky_mo_num, & @@ -508,12 +515,6 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) cc_space_v_vv_chol(1,1,gam), cholesky_mo_num, & 0.d0, tmpB1, nV*block_size) - do a=1,nV - do k=1,cholesky_mo_num - tmp_cc2(k,a) = cc_space_v_vv_chol(k,a,gam) - tmp_cc(k,a,gam) - enddo - enddo - call dgemm('T','N', nV*min(block_size, nV-iblock+1), nV, cholesky_mo_num, 1.d0, & cc_space_v_vv_chol(1,1,iblock), cholesky_mo_num, & tmp_cc2, cholesky_mo_num, & From 4ad77651276305fdfbed5648a57bb9965dab636b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 12 Jul 2023 18:56:13 +0200 Subject: [PATCH 238/337] Minor changes --- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index fc5da8c0..0ba46e56 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -457,8 +457,7 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) integer :: u,v,i,j,beta,gam,a,b double precision :: max_r2_local - ! Residual - !r2 = 0d0 + call set_multiple_levels_omp(.False.) !$omp parallel & !$omp shared(nO,nV,r2,cc_space_v_oovv) & From b102cc816aa709da2f105ab989816b4031daf1bd Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 15 Jul 2023 18:44:47 +0200 Subject: [PATCH 239/337] ARM perf library in parallel --- config/gfortran_armpl.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/gfortran_armpl.cfg b/config/gfortran_armpl.cfg index fb5ee1cc..370e396e 100644 --- a/config/gfortran_armpl.cfg +++ b/config/gfortran_armpl.cfg @@ -14,7 +14,7 @@ # [COMMON] FC : gfortran -g -ffree-line-length-none -I . -fPIC -march=native -LAPACK_LIB : -larmpl_lp64 +LAPACK_LIB : -larmpl_lp64_mp IRPF90 : irpf90 IRPF90_FLAGS : --ninja --align=32 --assert -DSET_NESTED From 467f7563797c4c32fc9597542b9d761309e3565c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 16 Jul 2023 20:04:17 +0200 Subject: [PATCH 240/337] Optimized A1 in CCSD --- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 48 ++++---------------------- 1 file changed, 6 insertions(+), 42 deletions(-) diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index 0ba46e56..ec6c2afb 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -1023,56 +1023,26 @@ subroutine compute_A1_chol(nO,nV,t1,t2,tau,A1) integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta - double precision, allocatable :: X_vooo(:,:,:,:), Y_oooo(:,:,:,:) - allocate(X_vooo(nV,nO,nO,nO), Y_oooo(nO,nO,nO,nO)) + double precision, allocatable :: Y_oooo(:,:,:,:) + allocate(Y_oooo(nO,nO,nO,nO)) ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) - !$omp parallel & - !$omp shared(nO,nV,A1,cc_space_v_oooo,cc_space_v_ovoo,X_vooo) & - !$omp private(u,v,i,j) & - !$omp default(none) - !$omp do collapse(2) - do j = 1, nO - do i = 1, nO - do v = 1, nO - do u = 1, nO - A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) - enddo - enddo - enddo - enddo - !$omp end do nowait - ! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) & - !$omp do collapse(2) - do j = 1, nO - do i = 1, nO - do u = 1, nO - do a = 1, nV - X_vooo(a,u,i,j) = cc_space_v_ovoo(u,a,i,j) - enddo - enddo - enddo - enddo - !$omp end do - !$omp end parallel - call dgemm('N','N', nO, nO*nO*nO, nV, & 1d0, t1 , size(t1,1), & - X_vooo, size(X_vooo,1), & + cc_space_v_vooo, size(cc_space_v_vooo,1), & 0d0, Y_oooo, size(Y_oooo,1)) !$omp parallel & - !$omp shared(nO,nV,A1,Y_oooo) & !$omp private(u,v,i,j) & - !$omp default(none) + !$omp default(shared) !$omp do collapse(2) do j = 1, nO do i = 1, nO do v = 1, nO do u = 1, nO - A1(u,v,i,j) = A1(u,v,i,j) + Y_oooo(v,u,i,j) + A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + Y_oooo(v,u,j,i) + Y_oooo(u,v,i,j) enddo enddo enddo @@ -1080,13 +1050,7 @@ subroutine compute_A1_chol(nO,nV,t1,t2,tau,A1) !$omp end do !$omp end parallel - deallocate(X_vooo,Y_oooo) - - ! A1(u,v,i,j) += cc_space_v_vooo(a,v,i,j) * t1(u,a) - call dgemm('N','N', nO, nO*nO*nO, nV, & - 1d0, t1 , size(t1,1), & - cc_space_v_vooo, size(cc_space_v_vooo,1), & - 1d0, A1 , size(A1,1)) + deallocate(Y_oooo) ! A1(u,v,i,j) += cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b) call dgemm('N','N', nO*nO, nO*nO, nV*nV, & From bd570b19c1d4ae0479b8ff4c4611ae1127605441 Mon Sep 17 00:00:00 2001 From: ydamour Date: Mon, 17 Jul 2023 17:05:48 +0200 Subject: [PATCH 241/337] fix bug restore_symmetry --- src/utils/linear_algebra.irp.f | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index 65c57a76..314ad4f6 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1661,7 +1661,15 @@ subroutine restore_symmetry(m,n,A,LDA,thresh) ! Update i i = i + 1 enddo - copy(i:) = 0.d0 + + ! To nullify the remaining elements that are below the threshold + if (i == sze) then + if (-copy(i) <= thresh) then + copy(i) = 0d0 + endif + else + copy(i:) = 0.d0 + endif !$OMP PARALLEL if (sze>10000) & !$OMP SHARED(m,sze,copy_sign,copy,key,A,ii,jj) & From cc7b97c09b5f8a970319a3e247551c34401e731c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sat, 5 Aug 2023 01:47:48 +0200 Subject: [PATCH 242/337] Cleaning in C --- external/ezfio | 2 +- external/irpf90 | 2 +- src/utils/fortran_mmap.c | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/external/ezfio b/external/ezfio index ed1df9f3..d5805497 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit ed1df9f3c1f51752656ca98da5693a4119add05c +Subproject commit d5805497fa0ef30e70e055cde1ecec2963303e93 diff --git a/external/irpf90 b/external/irpf90 index 33ca5e10..0007f72f 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit 33ca5e1018f3bbb5e695e6ee558f5dac0753b271 +Subproject commit 0007f72f677fe7d61c5e1ed461882cb239517102 diff --git a/src/utils/fortran_mmap.c b/src/utils/fortran_mmap.c index 71426002..e8d85a2f 100644 --- a/src/utils/fortran_mmap.c +++ b/src/utils/fortran_mmap.c @@ -9,7 +9,6 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) { - int i; int fd; int result; void* map; From c6b50d5f500faed66fdc13a2b2c4d2dd874fbca1 Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 7 Aug 2023 16:39:53 +0200 Subject: [PATCH 243/337] found a bug in left-right coefficients in perturbation --- src/cipsi_tc_bi_ortho/selection.irp.f | 35 +++++++++++++++++++++++---- src/fci_tc_bi/selectors.irp.f | 2 ++ src/tc_bi_ortho/tc_h_eigvectors.irp.f | 5 ++++ 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/src/cipsi_tc_bi_ortho/selection.irp.f b/src/cipsi_tc_bi_ortho/selection.irp.f index 77377554..06cf848b 100644 --- a/src/cipsi_tc_bi_ortho/selection.irp.f +++ b/src/cipsi_tc_bi_ortho/selection.irp.f @@ -893,20 +893,45 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d alpha_h_psi += alpha_h_i * psi_selectors_coef_tc(iii,1,1) ! right function enddo else if(debug_tc_pt2 == 2)then !! debugging the new version +! psi_h_alpha_tmp = 0.d0 +! alpha_h_psi_tmp = 0.d0 +! do iii = 1, N_det_selectors ! old version +! call htilde_mu_mat_opt_bi_ortho_no_3e(psi_selectors(1,1,iii), det, N_int, i_h_alpha) +! call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_selectors(1,1,iii), N_int, alpha_h_i) +! psi_h_alpha_tmp += i_h_alpha * psi_selectors_coef_tc(iii,1,1) ! left function +! alpha_h_psi_tmp += alpha_h_i * psi_selectors_coef_tc(iii,2,1) ! right function +! enddo psi_h_alpha_tmp = mat_l(istate, p1, p2) ! new version alpha_h_psi_tmp = mat_r(istate, p1, p2) ! new version psi_h_alpha = 0.d0 alpha_h_psi = 0.d0 - do iii = 1, N_det_selectors ! old version - call htilde_mu_mat_opt_bi_ortho_no_3e(psi_selectors(1,1,iii), det, N_int, i_h_alpha) - call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_selectors(1,1,iii), N_int, alpha_h_i) - psi_h_alpha += i_h_alpha * psi_selectors_coef_tc(iii,2,1) ! left function - alpha_h_psi += alpha_h_i * psi_selectors_coef_tc(iii,1,1) ! right function + do iii = 1, N_det ! old version + call htilde_mu_mat_opt_bi_ortho_no_3e(psi_det(1,1,iii), det, N_int, i_h_alpha) + call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_det(1,1,iii), N_int, alpha_h_i) + psi_h_alpha += i_h_alpha * psi_l_coef_bi_ortho(iii,1) ! left function + alpha_h_psi += alpha_h_i * psi_r_coef_bi_ortho(iii,1) ! right function enddo if(dabs(psi_h_alpha*alpha_h_psi/delta_E).gt.1.d-10)then error = dabs(psi_h_alpha * alpha_h_psi - psi_h_alpha_tmp * alpha_h_psi_tmp)/dabs(psi_h_alpha * alpha_h_psi) if(error.gt.1.d-2)then + call debug_det(det, N_int) print*,'error =',error,psi_h_alpha * alpha_h_psi/delta_E,psi_h_alpha_tmp * alpha_h_psi_tmp/delta_E + print*,psi_h_alpha , alpha_h_psi + print*,psi_h_alpha_tmp , alpha_h_psi_tmp + print*,'selectors ' + do iii = 1, N_det_selectors ! old version + print*,'iii',iii,psi_selectors_coef_tc(iii,1,1),psi_selectors_coef_tc(iii,2,1) + call htilde_mu_mat_opt_bi_ortho_no_3e(psi_selectors(1,1,iii), det, N_int, i_h_alpha) + call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_selectors(1,1,iii), N_int, alpha_h_i) + print*,i_h_alpha,alpha_h_i + call debug_det(psi_selectors(1,1,iii),N_int) + enddo +! print*,'psi_det ' +! do iii = 1, N_det! old version +! print*,'iii',iii,psi_l_coef_bi_ortho(iii,1),psi_r_coef_bi_ortho(iii,1) +! call debug_det(psi_det(1,1,iii),N_int) +! enddo + stop endif endif else diff --git a/src/fci_tc_bi/selectors.irp.f b/src/fci_tc_bi/selectors.irp.f index 4d3de7d0..7f93ae55 100644 --- a/src/fci_tc_bi/selectors.irp.f +++ b/src/fci_tc_bi/selectors.irp.f @@ -27,6 +27,8 @@ END_PROVIDER implicit none BEGIN_DOC ! Determinants on which we apply for perturbation. + ! psi_selectors_coef_tc(iii,1,istate) = left coefficient of the iii determinant + ! psi_selectors_coef_tc(iii,2,istate) = right coefficient of the iii determinant END_DOC integer :: i,k diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index f027c38f..48257943 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -328,6 +328,11 @@ end TOUCH psi_r_coef_bi_ortho call ezfio_set_tc_bi_ortho_psi_r_coef_bi_ortho(buffer) deallocate(buffer) +! print*,'After diag' +! do i = 1, N_det! old version +! print*,'i',i,psi_l_coef_bi_ortho(i,1),psi_r_coef_bi_ortho(i,1) +! call debug_det(psi_det(1,1,i),N_int) +! enddo END_PROVIDER From c945e027c03c283682f6bffdb933c588069450bf Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 7 Aug 2023 16:56:10 +0200 Subject: [PATCH 244/337] fixed a bug in psi_selectors_coef --- src/cipsi_tc_bi_ortho/get_d0_good.irp.f | 8 +++--- src/cipsi_tc_bi_ortho/get_d1_good.irp.f | 36 ++++++++++++------------- src/cipsi_tc_bi_ortho/get_d2_good.irp.f | 28 +++++++++---------- 3 files changed, 36 insertions(+), 36 deletions(-) diff --git a/src/cipsi_tc_bi_ortho/get_d0_good.irp.f b/src/cipsi_tc_bi_ortho/get_d0_good.irp.f index 4270e7b8..9bba162e 100644 --- a/src/cipsi_tc_bi_ortho/get_d0_good.irp.f +++ b/src/cipsi_tc_bi_ortho/get_d0_good.irp.f @@ -53,7 +53,7 @@ subroutine get_d0_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (hij == (0.d0,0.d0)) cycle !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,1) * hij ! HOTSPOT + mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,2) * hij ! HOTSPOT enddo end do !!!!!!!!!! @@ -72,7 +72,7 @@ subroutine get_d0_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (hji == (0.d0,0.d0)) cycle !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,2) * hji ! HOTSPOT + mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,1) * hji ! HOTSPOT enddo end do end do @@ -109,7 +109,7 @@ subroutine get_d0_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, end if !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij enddo end do @@ -128,7 +128,7 @@ subroutine get_d0_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, end if !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji enddo end do end do diff --git a/src/cipsi_tc_bi_ortho/get_d1_good.irp.f b/src/cipsi_tc_bi_ortho/get_d1_good.irp.f index bc19e7e4..b2a38e02 100644 --- a/src/cipsi_tc_bi_ortho/get_d1_good.irp.f +++ b/src/cipsi_tc_bi_ortho/get_d1_good.irp.f @@ -76,7 +76,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowij(k,putj) = tmp_rowij(k,putj) + hij * coefs(k,1) + tmp_rowij(k,putj) = tmp_rowij(k,putj) + hij * coefs(k,2) enddo endif end do @@ -88,7 +88,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowij(k,putj) = tmp_rowij(k,putj) + hij * coefs(k,1) + tmp_rowij(k,putj) = tmp_rowij(k,putj) + hij * coefs(k,2) enddo endif end do @@ -114,7 +114,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowji(k,putj) = tmp_rowji(k,putj) + hji * coefs(k,2) + tmp_rowji(k,putj) = tmp_rowji(k,putj) + hji * coefs(k,1) enddo endif end do @@ -126,7 +126,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowji(k,putj) = tmp_rowji(k,putj) + hji * coefs(k,2) + tmp_rowji(k,putj) = tmp_rowji(k,putj) + hji * coefs(k,1) enddo endif end do @@ -169,7 +169,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowij(k,puti) = tmp_rowij(k,puti) + hij * coefs(k,1) + tmp_rowij(k,puti) = tmp_rowij(k,puti) + hij * coefs(k,2) enddo endif end if @@ -180,7 +180,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) do k=1,N_states - tmp_rowij2(k,puti) = tmp_rowij2(k,puti) + hij * coefs(k,1) + tmp_rowij2(k,puti) = tmp_rowij2(k,puti) + hij * coefs(k,2) enddo endif end if @@ -211,7 +211,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowji(k,puti) = tmp_rowji(k,puti) + hji * coefs(k,2) + tmp_rowji(k,puti) = tmp_rowji(k,puti) + hji * coefs(k,1) enddo endif end if @@ -222,7 +222,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (hji /= 0.d0) then hji = hji * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) do k=1,N_states - tmp_rowji2(k,puti) = tmp_rowji2(k,puti) + hji * coefs(k,2) + tmp_rowji2(k,puti) = tmp_rowji2(k,puti) + hji * coefs(k,1) enddo endif end if @@ -265,7 +265,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij_cache(putj,1) - hij_cache(putj,2) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) - tmp_rowij(:,putj) = tmp_rowij(:,putj) + hij * coefs(:,1) + tmp_rowij(:,putj) = tmp_rowij(:,putj) + hij * coefs(:,2) endif end do do putj=hfix+1,mo_num @@ -274,7 +274,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij_cache(putj,2) - hij_cache(putj,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) - tmp_rowij(:,putj) = tmp_rowij(:,putj) + hij * coefs(:,1) + tmp_rowij(:,putj) = tmp_rowij(:,putj) + hij * coefs(:,2) endif end do @@ -293,7 +293,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji_cache(putj,1) - hji_cache(putj,2) if (hji /= 0.d0) then hji = hji * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) - tmp_rowji(:,putj) = tmp_rowji(:,putj) + hji * coefs(:,2) + tmp_rowji(:,putj) = tmp_rowji(:,putj) + hji * coefs(:,1) endif end do do putj=hfix+1,mo_num @@ -302,7 +302,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji_cache(putj,2) - hji_cache(putj,1) if (hji /= 0.d0) then hji = hji * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) - tmp_rowji(:,putj) = tmp_rowji(:,putj) + hji * coefs(:,2) + tmp_rowji(:,putj) = tmp_rowji(:,putj) + hji * coefs(:,1) endif end do @@ -342,7 +342,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowij(k,puti) = tmp_rowij(k,puti) + hij * coefs(k,1) + tmp_rowij(k,puti) = tmp_rowij(k,puti) + hij * coefs(k,2) enddo endif end if @@ -353,7 +353,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) do k=1,N_states - tmp_rowij2(k,puti) = tmp_rowij2(k,puti) + hij * coefs(k,1) + tmp_rowij2(k,puti) = tmp_rowij2(k,puti) + hij * coefs(k,2) enddo endif end if @@ -385,7 +385,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowji(k,puti) = tmp_rowji(k,puti) + hji * coefs(k,2) + tmp_rowji(k,puti) = tmp_rowji(k,puti) + hji * coefs(k,1) enddo endif end if @@ -396,7 +396,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (hji /= 0.d0) then hji = hji * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) do k=1,N_states - tmp_rowji2(k,puti) = tmp_rowji2(k,puti) + hji * coefs(k,2) + tmp_rowji2(k,puti) = tmp_rowji2(k,puti) + hji * coefs(k,1) enddo endif end if @@ -445,8 +445,8 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, do k=1,N_states ! take conjugate to get contribution to instead of ! mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,1) * dconjg(hij) - mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,1) * hij - mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,2) * hji + mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,2) * hij + mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,1) * hji enddo end do end do diff --git a/src/cipsi_tc_bi_ortho/get_d2_good.irp.f b/src/cipsi_tc_bi_ortho/get_d2_good.irp.f index 0a08c808..d01ed433 100644 --- a/src/cipsi_tc_bi_ortho/get_d2_good.irp.f +++ b/src/cipsi_tc_bi_ortho/get_d2_good.irp.f @@ -79,12 +79,12 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if(ma == 1) then ! if particle spins are (alpha,alpha,alpha,beta), then puti is beta and putj is alpha !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,1) * hij + mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,2) * hij enddo else ! if particle spins are (beta,beta,beta,alpha), then puti is alpha and putj is beta !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij enddo end if end do @@ -103,12 +103,12 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if(ma == 1) then ! if particle spins are (alpha,alpha,alpha,beta), then puti is beta and putj is alpha !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,2) * hji + mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,1) * hji enddo else ! if particle spins are (beta,beta,beta,alpha), then puti is alpha and putj is beta !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji enddo end if end do @@ -135,7 +135,7 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij enddo endif end do @@ -154,7 +154,7 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji enddo endif end do @@ -189,7 +189,7 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, puti, putj) = mat_r(k, puti, putj) +coefs(k,1) * hij + mat_r(k, puti, putj) = mat_r(k, puti, putj) +coefs(k,2) * hij enddo end do end do @@ -210,7 +210,7 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, puti, putj) = mat_l(k, puti, putj) +coefs(k,2) * hji + mat_l(k, puti, putj) = mat_l(k, puti, putj) +coefs(k,1) * hji enddo end do end do @@ -239,12 +239,12 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (puti < putj) then !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij enddo else !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,1) * hij + mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,2) * hij enddo endif end do @@ -262,12 +262,12 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (puti < putj) then !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji enddo else !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,2) * hji + mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,1) * hji enddo endif end do @@ -290,7 +290,7 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij enddo end if !! @@ -299,7 +299,7 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji enddo end if end if From 0440def3637b4fc528ffd488f117247662e12c4d Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 9 Aug 2023 16:23:09 +0200 Subject: [PATCH 245/337] added lccsd --- src/cisd/EZFIO.cfg | 8 ++++ src/cisd/NEED | 1 + src/cisd/lccsd.irp.f | 95 +++++++++++++++++++++++++++++++++++++++ src/cisd/lccsd_prov.irp.f | 44 ++++++++++++++++++ 4 files changed, 148 insertions(+) create mode 100644 src/cisd/lccsd.irp.f create mode 100644 src/cisd/lccsd_prov.irp.f diff --git a/src/cisd/EZFIO.cfg b/src/cisd/EZFIO.cfg index 4565d2df..688f802a 100644 --- a/src/cisd/EZFIO.cfg +++ b/src/cisd/EZFIO.cfg @@ -5,3 +5,11 @@ interface: ezfio size: (determinants.n_states) + +[lcc_energy] +type: double precision +doc: lccsd energy +interface: ezfio +size: (determinants.n_states) + + diff --git a/src/cisd/NEED b/src/cisd/NEED index d9ad3efc..616d021e 100644 --- a/src/cisd/NEED +++ b/src/cisd/NEED @@ -1,3 +1,4 @@ selectors_full single_ref_method davidson_undressed +dav_general_mat diff --git a/src/cisd/lccsd.irp.f b/src/cisd/lccsd.irp.f new file mode 100644 index 00000000..919c5aaa --- /dev/null +++ b/src/cisd/lccsd.irp.f @@ -0,0 +1,95 @@ +program lccsd + implicit none + BEGIN_DOC +! Linerarized CCSD +! + ! This program takes a reference Slater determinant of ROHF-like occupancy, + ! + ! and performs all single and double excitations on top of it, disregarding + ! spatial symmetry and compute the "n_states" lowest eigenstates of that CI + ! matrix (see :option:`determinants n_states`). + ! + ! This program can be useful in many cases: + ! + ! * **Ground state calculation**: if even after a :c:func:`cis` calculation, natural + ! orbitals (see :c:func:`save_natorb`) and then :c:func:`scf` optimization, you are not sure to have the lowest scf + ! solution, + ! do the same strategy with the :c:func:`cisd` executable instead of the :c:func:`cis` exectuable to generate the natural + ! orbitals as a guess for the :c:func:`scf`. + ! + ! + ! + ! * **Excited states calculations**: the lowest excited states are much likely to + ! be dominanted by single- or double-excitations. + ! Therefore, running a :c:func:`cisd` will save the "n_states" lowest states within + ! the CISD space + ! in the |EZFIO| directory, which can afterward be used as guess wave functions + ! for a further multi-state fci calculation if you specify "read_wf" = True + ! before running the fci executable (see :option:`determinants read_wf`). + ! Also, if you specify "s2_eig" = True, the cisd will only retain states + ! having the good value :math:`S^2` value + ! (see :option:`determinants expected_s2` and :option:`determinants s2_eig`). + ! If "s2_eig" = False, it will take the lowest n_states, whatever + ! multiplicity they are. + ! + ! + ! + ! Note: if you would like to discard some orbitals, use + ! :ref:`qp_set_mo_class` to specify: + ! + ! * "core" orbitals which will be always doubly occupied + ! + ! * "act" orbitals where an electron can be either excited from or to + ! + ! * "del" orbitals which will be never occupied + ! + END_DOC + PROVIDE N_states + read_wf = .False. + TOUCH read_wf + call run +end + +subroutine run + implicit none + + if(pseudo_sym)then + call H_apply_cisd_sym + else + call H_apply_cisd + endif + call get_lccsd_2 +end + +subroutine get_lccsd_2 + implicit none + integer :: i,k + double precision :: cisdq(N_states), delta_e + double precision,external :: diag_h_mat_elem + psi_coef = lccsd_coef + SOFT_TOUCH psi_coef + call save_wavefunction_truncated(save_threshold) + call ezfio_set_cisd_lcc_energy(lccsd_energies) + + print *, 'N_det = ', N_det + print*,'' + print*,'******************************' + print *, 'LCCSD Energies' + do i = 1,N_states + print *, i, lccsd_energies(i) + enddo + if (N_states > 1) then + print*,'******************************' + print*,'Excitation energies (au) (LCCSD)' + do i = 2, N_states + print*, i ,lccsd_energies(i) - lccsd_energies(1) + enddo + print*,'' + print*,'******************************' + print*,'Excitation energies (eV) (LCCSD)' + do i = 2, N_states + print*, i ,(lccsd_energies(i) - lccsd_energies(1)) * ha_to_ev + enddo + endif + +end diff --git a/src/cisd/lccsd_prov.irp.f b/src/cisd/lccsd_prov.irp.f new file mode 100644 index 00000000..b071a8f8 --- /dev/null +++ b/src/cisd/lccsd_prov.irp.f @@ -0,0 +1,44 @@ + BEGIN_PROVIDER [ double precision, lccsd_coef, (N_det, N_states)] +&BEGIN_PROVIDER [ double precision, lccsd_energies, (N_states)] + implicit none + double precision, allocatable :: Dress_jj(:), H_jj(:), u_in(:,:) + double precision :: ebefore, eafter, ecorr, thresh + integer :: i,it + logical :: converged + external H_u_0_nstates_openmp + allocate(Dress_jj(N_det),H_jj(N_det),u_in(N_det,N_states_diag)) + thresh = 1.d-6 + converged = .False. + Dress_jj = 0.d0 + u_in = 0.d0 + it = 0 + ! initial guess + do i = 1, N_states_diag + u_in(i,i) = 1.d0 + enddo + do i = 1,N_det + call i_H_j(psi_det(1,1,i),psi_det(1,1,i),N_int,H_jj(i)) + enddo + ebefore = H_jj(1) + do while (.not.converged) + it += 1 + print*,'N_det = ',N_det + call davidson_general_ext_rout_diag_dressed(u_in,H_jj,Dress_jj,lccsd_energies,& + N_det,N_states,N_states_diag,converged,H_u_0_nstates_openmp) + ecorr = lccsd_energies(1) - H_jj(1) + print*,'---------------------' + print*,'it = ',it + print*,'ecorr = ',ecorr + Dress_jj(1) = 0.d0 + do i = 2, N_det + Dress_jj(i) = ecorr + enddo + eafter = lccsd_energies(1) + converged = (dabs(eafter - ebefore).lt.thresh) + ebefore = eafter + enddo + do i = 1, N_states + lccsd_coef(1:N_det,i) = u_in(1:N_det,i) + enddo + +END_PROVIDER From 1a2632c280bc74eca95912fc7eec4e4b9d1f8587 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 9 Aug 2023 16:59:47 +0200 Subject: [PATCH 246/337] added condition in lccsd --- src/cisd/lccsd_prov.irp.f | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cisd/lccsd_prov.irp.f b/src/cisd/lccsd_prov.irp.f index b071a8f8..38149ac9 100644 --- a/src/cisd/lccsd_prov.irp.f +++ b/src/cisd/lccsd_prov.irp.f @@ -31,7 +31,9 @@ print*,'ecorr = ',ecorr Dress_jj(1) = 0.d0 do i = 2, N_det - Dress_jj(i) = ecorr + if(ecorr + H_jj(i) .gt. H_jj(1))then + Dress_jj(i) = ecorr + endif enddo eafter = lccsd_energies(1) converged = (dabs(eafter - ebefore).lt.thresh) From 2e458e93bafcd652e996cd3067d652f61522a915 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 10 Aug 2023 08:38:05 +0200 Subject: [PATCH 247/337] Fixing CI --- .github/workflows/configuration.yml | 2 +- external/ezfio | 2 +- external/irpf90 | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/configuration.yml b/.github/workflows/configuration.yml index ba37f5dd..178b394e 100644 --- a/.github/workflows/configuration.yml +++ b/.github/workflows/configuration.yml @@ -22,7 +22,7 @@ jobs: - uses: actions/checkout@v3 - name: Install dependencies run: | - sudo apt install gfortran gcc liblapack-dev libblas-dev wget python3 make m4 pkg-config hdf5 + sudo apt install gfortran gcc liblapack-dev libblas-dev wget python3 make m4 pkg-config libhdf5-dev - name: zlib run: | ./configure -i zlib || echo OK diff --git a/external/ezfio b/external/ezfio index d5805497..ed1df9f3 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit d5805497fa0ef30e70e055cde1ecec2963303e93 +Subproject commit ed1df9f3c1f51752656ca98da5693a4119add05c diff --git a/external/irpf90 b/external/irpf90 index 0007f72f..33ca5e10 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit 0007f72f677fe7d61c5e1ed461882cb239517102 +Subproject commit 33ca5e1018f3bbb5e695e6ee558f5dac0753b271 From ee2c470054b76bffc680f54b960daae99ed9679d Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 10 Aug 2023 15:53:35 +0200 Subject: [PATCH 248/337] clarified the TC-CASSCF gradients --- src/casscf_tc_bi/grad_dm.irp.f | 17 +++++----- src/casscf_tc_bi/grad_old.irp.f | 42 ++++++++++++++----------- src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f | 22 ++++++------- 3 files changed, 45 insertions(+), 36 deletions(-) diff --git a/src/casscf_tc_bi/grad_dm.irp.f b/src/casscf_tc_bi/grad_dm.irp.f index 7f6155ab..1618adc6 100644 --- a/src/casscf_tc_bi/grad_dm.irp.f +++ b/src/casscf_tc_bi/grad_dm.irp.f @@ -24,8 +24,7 @@ do a=1,n_virt_orb indx = mat_idx_c_v(i,a) aa=list_virt(a) - call gradvec_tc_ia(ii,aa,res_l) - call gradvec_tc_ia(aa,ii,res_r) + call gradvec_tc_ia(ii,aa,res_l,res_r) do fff = 0,3 gradvec_tc_l(fff,indx)=res_l(fff) gradvec_tc_r(fff,indx)=res_r(fff) @@ -41,17 +40,21 @@ end do END_PROVIDER -subroutine gradvec_tc_ia(i,a,res) +subroutine gradvec_tc_ia(i,a,res_l, res_r) implicit none BEGIN_DOC ! doubly occupied --> virtual TC gradient ! -! Corresponds to +! Corresponds to res_r = , +! +! res_l = END_DOC integer, intent(in) :: i,a - double precision, intent(out) :: res(0:3) - res = 0.d0 - res(1) = -2 * mo_bi_ortho_tc_one_e(i,a) + double precision, intent(out) :: res_l(0:3), res_r(0:3) + res_l = 0.d0 + res_r = 0.d0 + res_l(1) = -2 * mo_bi_ortho_tc_one_e(a,i) + res_r(1) = -2 * mo_bi_ortho_tc_one_e(i,a) end diff --git a/src/casscf_tc_bi/grad_old.irp.f b/src/casscf_tc_bi/grad_old.irp.f index ea6747b1..6c976d66 100644 --- a/src/casscf_tc_bi/grad_old.irp.f +++ b/src/casscf_tc_bi/grad_old.irp.f @@ -69,45 +69,51 @@ END_PROVIDER subroutine calc_grad_elem_h_tc(ihole,ipart,res_l, res_r) BEGIN_DOC - ! eq 18 of Siegbahn et al, Physica Scripta 1980 - ! we calculate res_r = , and res_r = - ! q=hole, p=particle - ! res_l(0) = total matrix element - ! res_l(1) = one-electron part - ! res_l(2) = two-electron part - ! res_l(3) = three-electron part + ! Computes the gradient with respect to orbital rotation BRUT FORCE + ! + ! res_l = + ! + ! res_r = + ! + ! q=hole, p=particle. NOTE that on res_l it is E_qp and on res_r it is E_pq + ! + ! res_l(0) = total matrix element, res_l(1) = one-electron part, + ! + ! res_l(2) = two-electron part, res_l(3) = three-electron part + ! END_DOC implicit none integer, intent(in) :: ihole,ipart double precision, intent(out) :: res_l(0:3), res_r(0:3) integer :: mu,iii,ispin,ierr,nu,istate,ll integer(bit_kind), allocatable :: det_mu(:,:),det_mu_ex(:,:) - real*8 :: i_H_chi_array(0:3,N_states),i_H_phi_array(0:3,N_states),phase + real*8 :: chi_H_mu_ex_array(0:3,N_states),mu_ex_H_phi_array(0:3,N_states),phase allocate(det_mu(N_int,2)) allocate(det_mu_ex(N_int,2)) res_l=0.D0 res_r=0.D0 -! print*,'in i_h_psi' -! print*,ihole,ipart do mu=1,n_det - ! get the string of the determinant + ! get the string of the determinant |mu> call det_extract(det_mu,mu,N_int) do ispin=1,2 - ! do the monoexcitation on it + ! do the monoexcitation on it: |det_mu_ex> = a^dagger_{p,ispin} a_{q,ispin} |mu> call det_copy(det_mu,det_mu_ex,N_int) call do_signed_mono_excitation(det_mu,det_mu_ex,nu & ,ihole,ipart,ispin,phase,ierr) + ! |det_mu_ex> = a^dagger_{p,ispin} a_{q,ispin} |mu> if (ierr.eq.1) then - call i_H_tc_psi_phi(det_mu_ex,psi_det,psi_l_coef_bi_ortho,psi_r_coef_bi_ortho,N_int & - ,N_det,psi_det_size,N_states,i_H_chi_array,i_H_phi_array) -! print*,i_H_chi_array(1,1),i_H_phi_array(1,1) + ,N_det,psi_det_size,N_states,chi_H_mu_ex_array,mu_ex_H_phi_array) + ! chi_H_mu_ex_array = + ! mu_ex_H_phi_array = do istate=1,N_states - do ll = 0,3 - res_l(ll)+=i_H_phi_array(ll,istate)*psi_l_coef_bi_ortho(mu,istate)*phase - res_r(ll)+=i_H_chi_array(ll,istate)*psi_r_coef_bi_ortho(mu,istate)*phase + do ll = 0,3 ! loop over the body components (1e,2e,3e) + !res_l = \sum_mu c_mu^l = + res_l(ll)+= mu_ex_H_phi_array(ll,istate)*psi_l_coef_bi_ortho(mu,istate)*phase + !res_r = \sum_mu c_mu^r = + res_r(ll)+= chi_H_mu_ex_array(ll,istate)*psi_r_coef_bi_ortho(mu,istate)*phase enddo end do end if diff --git a/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f b/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f index 8524253a..e96e738e 100644 --- a/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f +++ b/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f @@ -90,7 +90,7 @@ subroutine htcdag_bi_ortho_calc_tdav_slow(v, u, N_st, sze) end -subroutine i_H_tc_psi_phi(key,keys,coef_l,coef_r,Nint,Ndet,Ndet_max,Nstate,i_H_chi_array,i_H_phi_array) +subroutine i_H_tc_psi_phi(key,keys,coef_l,coef_r,Nint,Ndet,Ndet_max,Nstate,chi_H_i_array,i_H_phi_array) use bitmasks implicit none BEGIN_DOC @@ -116,7 +116,7 @@ subroutine i_H_tc_psi_phi(key,keys,coef_l,coef_r,Nint,Ndet,Ndet_max,Nstate,i_H_c integer(bit_kind), intent(in) :: keys(Nint,2,Ndet) integer(bit_kind), intent(in) :: key(Nint,2) double precision, intent(in) :: coef_l(Ndet_max,Nstate),coef_r(Ndet_max,Nstate) - double precision, intent(out) :: i_H_chi_array(0:3,Nstate),i_H_phi_array(0:3,Nstate) + double precision, intent(out) :: chi_H_i_array(0:3,Nstate),i_H_phi_array(0:3,Nstate) integer :: i, ii,j double precision :: phase @@ -131,7 +131,7 @@ subroutine i_H_tc_psi_phi(key,keys,coef_l,coef_r,Nint,Ndet,Ndet_max,Nstate,i_H_c ASSERT (Ndet_max >= Ndet) allocate(idx(0:Ndet)) - i_H_chi_array = 0.d0 + chi_H_i_array = 0.d0 i_H_phi_array = 0.d0 call filter_connected_i_H_psi0(keys,key,Nint,Ndet,idx) @@ -142,10 +142,10 @@ subroutine i_H_tc_psi_phi(key,keys,coef_l,coef_r,Nint,Ndet,Ndet_max,Nstate,i_H_c ! computes !DIR$ FORCEINLINE call htilde_mu_mat_opt_bi_ortho(keys(1,1,i), key, Nint, hmono, htwoe, hthree, htot) - i_H_chi_array(0,1) = i_H_chi_array(0,1) + coef_l(i,1)*htot - i_H_chi_array(1,1) = i_H_chi_array(1,1) + coef_l(i,1)*hmono - i_H_chi_array(2,1) = i_H_chi_array(2,1) + coef_l(i,1)*htwoe - i_H_chi_array(3,1) = i_H_chi_array(3,1) + coef_l(i,1)*hthree + chi_H_i_array(0,1) = chi_H_i_array(0,1) + coef_l(i,1)*htot + chi_H_i_array(1,1) = chi_H_i_array(1,1) + coef_l(i,1)*hmono + chi_H_i_array(2,1) = chi_H_i_array(2,1) + coef_l(i,1)*htwoe + chi_H_i_array(3,1) = chi_H_i_array(3,1) + coef_l(i,1)*hthree ! computes !DIR$ FORCEINLINE call htilde_mu_mat_opt_bi_ortho(key,keys(1,1,i), Nint, hmono, htwoe, hthree, htot) @@ -163,10 +163,10 @@ subroutine i_H_tc_psi_phi(key,keys,coef_l,coef_r,Nint,Ndet,Ndet_max,Nstate,i_H_c !DIR$ FORCEINLINE call htilde_mu_mat_opt_bi_ortho(keys(1,1,i), key, Nint, hmono, htwoe, hthree, htot) do j = 1, Nstate - i_H_chi_array(0,j) = i_H_chi_array(0,j) + coef_l(i,j)*htot - i_H_chi_array(1,j) = i_H_chi_array(1,j) + coef_l(i,j)*hmono - i_H_chi_array(2,j) = i_H_chi_array(2,j) + coef_l(i,j)*htwoe - i_H_chi_array(3,j) = i_H_chi_array(3,j) + coef_l(i,j)*hthree + chi_H_i_array(0,j) = chi_H_i_array(0,j) + coef_l(i,j)*htot + chi_H_i_array(1,j) = chi_H_i_array(1,j) + coef_l(i,j)*hmono + chi_H_i_array(2,j) = chi_H_i_array(2,j) + coef_l(i,j)*htwoe + chi_H_i_array(3,j) = chi_H_i_array(3,j) + coef_l(i,j)*hthree enddo ! computes !DIR$ FORCEINLINE From cc20c97eda6bdf4240c7d689022f37a867bce81a Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 10 Aug 2023 17:07:40 +0200 Subject: [PATCH 249/337] all the one-body gradients are correct for TC-CASSCF --- src/casscf_tc_bi/grad_dm.irp.f | 59 +++++++++++++++++++++++++++++++-- src/casscf_tc_bi/grad_old.irp.f | 22 +++++++----- 2 files changed, 69 insertions(+), 12 deletions(-) diff --git a/src/casscf_tc_bi/grad_dm.irp.f b/src/casscf_tc_bi/grad_dm.irp.f index 1618adc6..00b20d41 100644 --- a/src/casscf_tc_bi/grad_dm.irp.f +++ b/src/casscf_tc_bi/grad_dm.irp.f @@ -1,11 +1,29 @@ BEGIN_PROVIDER [real*8, gradvec_tc_r, (0:3,nMonoEx)] &BEGIN_PROVIDER [real*8, gradvec_tc_l, (0:3,nMonoEx)] + BEGIN_DOC +! gradvec_tc_r(0:3,i) = +! +! gradvec_tc_l(0:3,i) = +! +! where the indices "i" corresponds to E_q^p(i) +! +! i = mat_idx_c_a(q,p) +! +! and gradvec_tc_r/l(0) = full matrix element +! +! gradvec_tc_r/l(1) = one-body part + +! gradvec_tc_r/l(2) = two-body part + +! gradvec_tc_r/l(3) = three-body part + END_DOC implicit none integer :: ii,tt,aa,indx integer :: i,t,a,fff double precision :: res_l(0:3), res_r(0:3) gradvec_tc_l = 0.d0 gradvec_tc_r = 0.d0 + ! computing the core/inactive --> virtual orbitals gradients do i=1,n_core_inact_orb ii=list_core_inact(i) do t=1,n_act_orb @@ -33,9 +51,15 @@ end do do t=1,n_act_orb + tt=list_act(t) do a=1,n_virt_orb - indx = mat_idx_a_v(i,a) -! gradvec_tc_l(indx)=gradvec_ta(t,a) + aa=list_virt(a) + indx = mat_idx_a_v(t,a) + call gradvec_tc_ta(tt,aa,res_l, res_r) + do fff = 0,3 + gradvec_tc_l(fff,indx)=res_l(fff) + gradvec_tc_r(fff,indx)=res_r(fff) + enddo end do end do END_PROVIDER @@ -65,7 +89,7 @@ subroutine gradvec_tc_it(i,t,res_l, res_r) ! ! Corresponds to res_r = ! -! res_l = +! res_l = END_DOC integer, intent(in) :: i,t double precision, intent(out) :: res_l(0:3),res_r(0:3) @@ -83,3 +107,32 @@ subroutine gradvec_tc_it(i,t,res_l, res_r) enddo end + +subroutine gradvec_tc_ta(t,a,res_l, res_r) + implicit none + BEGIN_DOC +! active --> virtual TC gradient +! +! Corresponds to res_r = +! +! res_l = + END_DOC + integer, intent(in) :: t,a + double precision, intent(out) :: res_l(0:3),res_r(0:3) + integer :: rr,r,m + double precision :: dm + res_r = 0.d0 + res_l = 0.d0 +! do rr = 1, n_act_orb +! r = list_act(rr) +! res_l(1) += mo_bi_ortho_tc_one_e(a,r) * tc_transition_matrix_mo(t,r,1,1) +! res_r(1) += -mo_bi_ortho_tc_one_e(r,a) * tc_transition_matrix_mo(r,t,1,1) +! enddo + do m = 1, mo_num + res_r(1) += mo_bi_ortho_tc_one_e(t,m) * tc_transition_matrix_mo(a,m,1,1) & + -mo_bi_ortho_tc_one_e(m,a) * tc_transition_matrix_mo(m,t,1,1) + res_l(1) += mo_bi_ortho_tc_one_e(a,m) * tc_transition_matrix_mo(t,m,1,1) & + -mo_bi_ortho_tc_one_e(m,t) * tc_transition_matrix_mo(m,a,1,1) + enddo + +end diff --git a/src/casscf_tc_bi/grad_old.irp.f b/src/casscf_tc_bi/grad_old.irp.f index 6c976d66..e8440513 100644 --- a/src/casscf_tc_bi/grad_old.irp.f +++ b/src/casscf_tc_bi/grad_old.irp.f @@ -38,15 +38,19 @@ enddo enddo enddo -! do indx=1,nMonoEx -! ihole=excit(1,indx) -! ipart=excit(2,indx) -! call calc_grad_elem_h_tc(ihole,ipart,res_l, res_r) -! do ll = 0, 3 -! gradvec_detail_left_old (ll,indx)=res_l(ll) -! gradvec_detail_right_old(ll,indx)=res_r(ll) -! enddo -! end do + + do tt = 1, n_act_orb + ihole = list_act(tt) + do aa = 1, n_virt_orb + ipart = list_virt(aa) + indx = mat_idx_a_v(tt,aa) + call calc_grad_elem_h_tc(ihole,ipart,res_l, res_r) + do ll = 0, 3 + gradvec_detail_left_old (ll,indx)=res_l(ll) + gradvec_detail_right_old(ll,indx)=res_r(ll) + enddo + enddo + enddo real*8 :: norm_grad_left, norm_grad_right norm_grad_left=0.d0 From a15055e9648d65fd4c95513ccb9f7ed0b765199c Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 10 Aug 2023 17:26:33 +0200 Subject: [PATCH 250/337] optimization of one-body TC-CASSCF gradients --- src/casscf_tc_bi/grad_dm.irp.f | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/src/casscf_tc_bi/grad_dm.irp.f b/src/casscf_tc_bi/grad_dm.irp.f index 00b20d41..6e06f6ce 100644 --- a/src/casscf_tc_bi/grad_dm.irp.f +++ b/src/casscf_tc_bi/grad_dm.irp.f @@ -93,17 +93,17 @@ subroutine gradvec_tc_it(i,t,res_l, res_r) END_DOC integer, intent(in) :: i,t double precision, intent(out) :: res_l(0:3),res_r(0:3) - integer :: rr,r,ss,s,m + integer :: rr,r,ss,s,m,mm double precision :: dm res_r = 0.d0 - do m = 1, mo_num - res_r(1) += mo_bi_ortho_tc_one_e(i,m) * tc_transition_matrix_mo(t,m,1,1) & - -mo_bi_ortho_tc_one_e(m,t) * tc_transition_matrix_mo(m,i,1,1) - enddo res_l = 0.d0 - do m = 1, mo_num - res_l(1) += mo_bi_ortho_tc_one_e(t,m) * tc_transition_matrix_mo(i,m,1,1) & - -mo_bi_ortho_tc_one_e(m,i) * tc_transition_matrix_mo(m,t,1,1) + res_r(1) += -2.d0 * mo_bi_ortho_tc_one_e(i,t) + res_l(1) += 2.D0 * mo_bi_ortho_tc_one_e(t,i) + + do rr = 1, n_act_orb + r = list_act(rr) + res_r(1) += mo_bi_ortho_tc_one_e(i,r) * tc_transition_matrix_mo(t,r,1,1) + res_l(1) += -mo_bi_ortho_tc_one_e(r,i) * tc_transition_matrix_mo(r,t,1,1) enddo end @@ -123,16 +123,10 @@ subroutine gradvec_tc_ta(t,a,res_l, res_r) double precision :: dm res_r = 0.d0 res_l = 0.d0 -! do rr = 1, n_act_orb -! r = list_act(rr) -! res_l(1) += mo_bi_ortho_tc_one_e(a,r) * tc_transition_matrix_mo(t,r,1,1) -! res_r(1) += -mo_bi_ortho_tc_one_e(r,a) * tc_transition_matrix_mo(r,t,1,1) -! enddo - do m = 1, mo_num - res_r(1) += mo_bi_ortho_tc_one_e(t,m) * tc_transition_matrix_mo(a,m,1,1) & - -mo_bi_ortho_tc_one_e(m,a) * tc_transition_matrix_mo(m,t,1,1) - res_l(1) += mo_bi_ortho_tc_one_e(a,m) * tc_transition_matrix_mo(t,m,1,1) & - -mo_bi_ortho_tc_one_e(m,t) * tc_transition_matrix_mo(m,a,1,1) + do rr = 1, n_act_orb + r = list_act(rr) + res_l(1) += mo_bi_ortho_tc_one_e(a,r) * tc_transition_matrix_mo(t,r,1,1) + res_r(1) += -mo_bi_ortho_tc_one_e(r,a) * tc_transition_matrix_mo(r,t,1,1) enddo end From 5dc4fb29284c496cf3b6921e9c9422e94b972a97 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 16 Aug 2023 14:06:29 +0200 Subject: [PATCH 251/337] naive two rdm in tc works for He in cisd and bi ortho orbitals --- src/tc_bi_ortho/test_tc_two_rdm.irp.f | 60 ++++++++++++ src/tc_bi_ortho/two_rdm_naive.irp.f | 132 ++++++++++++++++++++++++++ 2 files changed, 192 insertions(+) create mode 100644 src/tc_bi_ortho/test_tc_two_rdm.irp.f create mode 100644 src/tc_bi_ortho/two_rdm_naive.irp.f diff --git a/src/tc_bi_ortho/test_tc_two_rdm.irp.f b/src/tc_bi_ortho/test_tc_two_rdm.irp.f new file mode 100644 index 00000000..ecdeef43 --- /dev/null +++ b/src/tc_bi_ortho/test_tc_two_rdm.irp.f @@ -0,0 +1,60 @@ +program test_tc_rdm + + BEGIN_DOC + ! + ! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together + ! with the energy. Saves the left-right wave functions at the end. + ! + END_DOC + + my_grid_becke = .True. + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + read_wf = .True. + touch read_wf + + print*, ' nb of states = ', N_states + print*, ' nb of det = ', N_det + + call test() + +end + +subroutine test + implicit none + integer :: h1,p1,h2,p2,i,j,istate + double precision :: rdm, integral, accu,ref + double precision :: hmono, htwoe, hthree, htot + accu = 0.d0 + do h1 = 1, mo_num + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + integral = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) + rdm = tc_two_rdm(p1,h1,p2,h2) + accu += integral * rdm + enddo + enddo + enddo + enddo + accu *= 0.5d0 + print*,'accu = ',accu +! print*,tc_two_rdm(1,1,1,1),mo_bi_ortho_tc_two_e(1,1,1,1) + ref = 0.d0 + do i = 1, N_det + do j = 1, N_det +! if(i.ne.j)cycle + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) + do istate = 1,N_states +! print*,'i,j',i,j,psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * state_average_weight(istate) * htwoe +! print*,psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) , htwoe + ref += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * state_average_weight(istate) * htwoe + enddo + enddo + enddo + print*,' ref = ',ref + +end diff --git a/src/tc_bi_ortho/two_rdm_naive.irp.f b/src/tc_bi_ortho/two_rdm_naive.irp.f new file mode 100644 index 00000000..9694c653 --- /dev/null +++ b/src/tc_bi_ortho/two_rdm_naive.irp.f @@ -0,0 +1,132 @@ +BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] + implicit none + BEGIN_DOC + ! tc_two_rdm(p,s,q,r) = psi_det(i) + call get_excitation(psi_det(1,1,j),psi_det(1,1,i),exc,degree,phase,N_int) + call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) + contrib = psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * phase * state_average_weight(1) + do istate = 2, N_states + contrib += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * phase * state_average_weight(istate) + enddo + if(degree == 2)then + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm,mo_num,contrib) + else if(degree==1)then +! cycle + ! occupation of the determinant psi_det(j) + call bitstring_to_list_ab(psi_det(1,1,j), occ, n_occ_ab, N_int) + + ! run over the electrons of opposite spin than the excitation + s2 = other_spin(s1) + do mm = 1, n_occ_ab(s2) + m = occ(mm,s2) + h2 = m + p2 = m + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm,mo_num,contrib) + enddo + ! run over the electrons of same spin than the excitation + s2 = s1 + do mm = 1, n_occ_ab(s2) + m = occ(mm,s2) + h2 = m + p2 = m + if(h2.le.h1)cycle + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm,mo_num,contrib) + enddo + endif + else if(degree == 0)then + contrib = psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * state_average_weight(1) +! print*,'contrib',contrib + do istate = 2, N_states + contrib += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * state_average_weight(istate) + enddo + ! occupation of the determinant psi_det(j) + call bitstring_to_list_ab(psi_det(1,1,j), occ, n_occ_ab, N_int) + s1 = 1 ! alpha electrons + do nn = 1, n_occ_ab(s1) + h1 = occ(nn,s1) + p1 = occ(nn,s1) + ! run over the couple of alpha-beta electrons + s2 = other_spin(s1) + do mm = 1, n_occ_ab(s2) + m = occ(mm,s2) + h2 = m + p2 = m + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm,mo_num,contrib) + enddo + ! run over the couple of alpha-alpha electrons + s2 = s1 + do mm = 1, n_occ_ab(s2) + m = occ(mm,s2) + h2 = m + p2 = m + if(h2.le.h1)cycle + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm,mo_num,contrib) + enddo + enddo + s1 = 2 + do nn = 1, n_occ_ab(s1) + h1 = occ(nn,s1) + p1 = occ(nn,s1) + ! run over the couple of beta-beta electrons + s2 = s1 + do mm = 1, n_occ_ab(s2) + m = occ(mm,s2) + h2 = m + p2 = m + if(h2.le.h1)cycle + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm,mo_num,contrib) + enddo + enddo + endif + enddo + enddo + +END_PROVIDER + +subroutine update_tc_rdm(h1,p1,h2,p2,s1,s2,array,sze,contrib) + implicit none + integer, intent(in) :: h1,p1,h2,p2,s1,s2,sze + double precision, intent(in) :: contrib + double precision, intent(inout) :: array(sze, sze, sze, sze) + integer :: istate + if(s1.ne.s2)then + array(p1,h1,p2,h2) += contrib + ! permutation for particle symmetry + array(p2,h2,p1,h1) += contrib + else ! same spin double excitation + array(p1,h1,p2,h2) += contrib + ! exchange + ! exchanging the holes + array(p2,h1,p1,h2) -= contrib + ! exchanging the particles + array(p1,h2,p2,h1) -= contrib + + ! permutation for particle symmetry + array(p2,h2,p1,h1) += contrib + ! exchange + ! exchanging the holes + array(p1,h2,p2,h1) -= contrib + ! exchanging the particles + array(p2,h1,p1,h2) -= contrib + endif + +end From 0ffaf820a2bc156e021e0f68bddf119e04ce82b5 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 16 Aug 2023 18:07:50 +0200 Subject: [PATCH 252/337] tc two rdm works for O CISD in biorthonormal basis --- src/tc_bi_ortho/test_tc_two_rdm.irp.f | 19 ++++++++++++++++--- src/tc_bi_ortho/two_rdm_naive.irp.f | 14 +++----------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/tc_bi_ortho/test_tc_two_rdm.irp.f b/src/tc_bi_ortho/test_tc_two_rdm.irp.f index ecdeef43..3e556312 100644 --- a/src/tc_bi_ortho/test_tc_two_rdm.irp.f +++ b/src/tc_bi_ortho/test_tc_two_rdm.irp.f @@ -35,6 +35,10 @@ subroutine test do p2 = 1, mo_num integral = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) rdm = tc_two_rdm(p1,h1,p2,h2) +! if(dabs(rdm).gt.1.d-10)then +! print*,h1,p1,h2,p2 +! print*,rdm,integral,rdm*integral +! endif accu += integral * rdm enddo enddo @@ -42,19 +46,28 @@ subroutine test enddo accu *= 0.5d0 print*,'accu = ',accu -! print*,tc_two_rdm(1,1,1,1),mo_bi_ortho_tc_two_e(1,1,1,1) +! print*,mo_bi_ortho_tc_two_e(2,15,2,1) +! print*,mo_bi_ortho_tc_two_e(15,2,2,1) +! print*,mo_bi_ortho_tc_two_e(2,1,2,15) +! print*,mo_bi_ortho_tc_two_e(2,1,15,2) ref = 0.d0 do i = 1, N_det do j = 1, N_det -! if(i.ne.j)cycle +! if(i.eq.j)cycle call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) do istate = 1,N_states -! print*,'i,j',i,j,psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * state_average_weight(istate) * htwoe +! print*,'i,j',i,j +! print*,psi_l_coef_bi_ortho(i,istate) , psi_r_coef_bi_ortho(j,istate) , htwoe +! print*,psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * htwoe +! if(i.ne.j)then ! print*,psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) , htwoe +! print*,psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * htwoe +! endif ref += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * state_average_weight(istate) * htwoe enddo enddo enddo print*,' ref = ',ref + print*,'delta= ',ref-accu end diff --git a/src/tc_bi_ortho/two_rdm_naive.irp.f b/src/tc_bi_ortho/two_rdm_naive.irp.f index 9694c653..8fd34975 100644 --- a/src/tc_bi_ortho/two_rdm_naive.irp.f +++ b/src/tc_bi_ortho/two_rdm_naive.irp.f @@ -30,7 +30,6 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] if(degree == 2)then call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm,mo_num,contrib) else if(degree==1)then -! cycle ! occupation of the determinant psi_det(j) call bitstring_to_list_ab(psi_det(1,1,j), occ, n_occ_ab, N_int) @@ -48,13 +47,12 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] m = occ(mm,s2) h2 = m p2 = m - if(h2.le.h1)cycle call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm,mo_num,contrib) enddo endif else if(degree == 0)then +! cycle contrib = psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * state_average_weight(1) -! print*,'contrib',contrib do istate = 2, N_states contrib += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * state_average_weight(istate) enddo @@ -115,18 +113,12 @@ subroutine update_tc_rdm(h1,p1,h2,p2,s1,s2,array,sze,contrib) else ! same spin double excitation array(p1,h1,p2,h2) += contrib ! exchange - ! exchanging the holes - array(p2,h1,p1,h2) -= contrib ! exchanging the particles + array(p2,h1,p1,h2) -= contrib + ! exchanging the array(p1,h2,p2,h1) -= contrib - ! permutation for particle symmetry array(p2,h2,p1,h1) += contrib - ! exchange - ! exchanging the holes - array(p1,h2,p2,h1) -= contrib - ! exchanging the particles - array(p2,h1,p1,h2) -= contrib endif end From 6b56e213d89cd7ee07a33ec460ed7613fb9a28e6 Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 17 Aug 2023 18:17:46 +0200 Subject: [PATCH 253/337] right two-body inactive-virtual gradients implemented --- src/casscf_tc_bi/grad_dm.irp.f | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/casscf_tc_bi/grad_dm.irp.f b/src/casscf_tc_bi/grad_dm.irp.f index 6e06f6ce..77336c93 100644 --- a/src/casscf_tc_bi/grad_dm.irp.f +++ b/src/casscf_tc_bi/grad_dm.irp.f @@ -79,7 +79,22 @@ subroutine gradvec_tc_ia(i,a,res_l, res_r) res_r = 0.d0 res_l(1) = -2 * mo_bi_ortho_tc_one_e(a,i) res_r(1) = -2 * mo_bi_ortho_tc_one_e(i,a) - + integer :: j,t,r,jj,tt,rr + do jj = 1, n_core_inact_orb + j = list_core_inact(jj) + res_r(2) += -2.d0 * ( 2.d0 * mo_bi_ortho_tc_two_e(j,i,j,a) - mo_bi_ortho_tc_two_e(i,j,j,a)) + res_l(2) += -2.d0 * ( 2.d0 * mo_bi_ortho_tc_two_e(j,a,j,i) - mo_bi_ortho_tc_two_e(j,a,i,j)) + enddo + do tt = 1, n_act_orb + t = list_act(tt) + do rr = 1, n_act_orb + r = list_act(rr) + res_r(2) += -0.5d0 * ( & + tc_transition_matrix_mo(r,t,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(r,i,t,a) - mo_bi_ortho_tc_two_e(i,r,t,a)) & + +tc_transition_matrix_mo(t,r,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(t,i,r,a) - mo_bi_ortho_tc_two_e(i,t,r,a)) & + ) + enddo + enddo end subroutine gradvec_tc_it(i,t,res_l, res_r) From a4d7648bb011ef80dd210baf411f1a74a67671a0 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 21 Aug 2023 09:56:07 +0200 Subject: [PATCH 254/337] Fix segfault in cholesky due to array syntax --- src/ao_two_e_ints/cholesky.irp.f | 164 +++++++++++++------------ src/ccsd/ccsd_space_orb_sub_chol.irp.f | 51 ++++---- 2 files changed, 105 insertions(+), 110 deletions(-) diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 175ccf6e..2977f0f4 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -23,38 +23,38 @@ END_PROVIDER ! ! Last dimension of cholesky_ao is cholesky_ao_num END_DOC - + integer :: rank, ndim double precision :: tau double precision, pointer :: L(:,:), L_old(:,:) - - + + double precision :: s double precision, parameter :: dscale = 1.d0 - + double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) integer, allocatable :: Lset(:), Dset(:), addr(:,:) logical, allocatable :: computed(:) - + integer :: i,j,k,m,p,q, qj, dj, p2, q2 integer :: N, np, nq - + double precision :: Dmax, Dmin, Qmax, f double precision, external :: get_ao_two_e_integral logical, external :: ao_two_e_integral_zero - + double precision, external :: ao_two_e_integral integer :: block_size, iblock, ierr - + double precision :: mem double precision, external :: memory_of_double, memory_of_int - + integer, external :: getUnitAndOpen integer :: iunit - + ndim = ao_num*ao_num deallocate(cholesky_ao) - + if (read_ao_cholesky) then print *, 'Reading Cholesky vectors from disk...' iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao', 'R') @@ -63,11 +63,11 @@ END_PROVIDER read(iunit) cholesky_ao close(iunit) cholesky_ao_num = rank - + else - + PROVIDE nucl_coord - + if (do_direct_integrals) then if (ao_two_e_integral(1,1,1,1) < huge(1.d0)) then ! Trigger providers inside ao_two_e_integral @@ -76,16 +76,16 @@ END_PROVIDER else PROVIDE ao_two_e_integrals_in_map endif - + tau = ao_cholesky_threshold - + mem = 6.d0 * memory_of_double(ndim) + 6.d0 * memory_of_int(ndim) call check_mem(mem, irp_here) - + call print_memory_usage() allocate(L(ndim,1)) - + print *, '' print *, 'Cholesky decomposition of AO integrals' print *, '======================================' @@ -93,13 +93,13 @@ END_PROVIDER print *, '============ =============' print *, ' Rank Threshold' print *, '============ =============' - - + + rank = 0 - + allocate( D(ndim), Lset(ndim), Dset(ndim) ) allocate( addr(3,ndim) ) - + ! 1. k=0 do j=1,ao_num @@ -110,7 +110,7 @@ END_PROVIDER addr(3,k) = (i-1)*ao_num + j enddo enddo - + if (do_direct_integrals) then !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(guided) do i=1,ndim @@ -127,9 +127,9 @@ END_PROVIDER enddo !$OMP END PARALLEL DO endif - + Dmax = maxval(D) - + ! 2. np=0 do p=1,ndim @@ -138,26 +138,26 @@ END_PROVIDER Lset(np) = p endif enddo - + ! 3. N = 0 - + ! 4. i = 0 - + ! 5. do while ( (Dmax > tau).and.(rank < ndim) ) ! a. i = i+1 - + s = 0.01d0 - + ! Inrease s until the arrays fit in memory do while (.True.) - + ! b. Dmin = max(s*Dmax,tau) - + ! c. nq=0 do p=1,np @@ -166,30 +166,30 @@ END_PROVIDER Dset(nq) = Lset(p) endif enddo - + call total_memory(mem) mem = mem & + np*memory_of_double(nq) &! Delta(np,nq) + (rank+nq)* memory_of_double(ndim) &! L(ndim,rank+nq) + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) - + if (mem > qp_max_mem) then s = s*2.d0 else exit endif - + if ((s > 1.d0).or.(nq == 0)) then call print_memory_usage() print *, 'Not enough memory. Reduce cholesky threshold' stop -1 endif - + enddo - + ! d., e. block_size = max(N,24) - + L_old => L allocate(L(ndim,rank+nq), stat=ierr) if (ierr /= 0) then @@ -197,48 +197,52 @@ END_PROVIDER print *, irp_here, ': allocation failed : (L(ndim,rank+nq))' stop -1 endif - - !$OMP PARALLEL DO PRIVATE(k) + + !$OMP PARALLEL DO PRIVATE(k,j) do k=1,rank - L(:,k) = L_old(:,k) + do j=1,ndim + L(j,k) = L_old(j,k) + enddo enddo !$OMP END PARALLEL DO - + deallocate(L_old) - + allocate(Delta(np,nq), stat=ierr) if (ierr /= 0) then call print_memory_usage() print *, irp_here, ': allocation failed : (Delta(np,nq))' stop -1 endif - + allocate(Ltmp_p(np,block_size), stat=ierr) if (ierr /= 0) then call print_memory_usage() print *, irp_here, ': allocation failed : (Ltmp_p(np,block_size))' stop -1 endif - + allocate(Ltmp_q(nq,block_size), stat=ierr) if (ierr /= 0) then call print_memory_usage() print *, irp_here, ': allocation failed : (Ltmp_q(nq,block_size))' stop -1 endif - - + + allocate(computed(nq)) - + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q,j) - + !$OMP DO do q=1,nq - Delta(:,q) = 0.d0 + do j=1,np + Delta(j,q) = 0.d0 + enddo computed(q) = .False. enddo !$OMP ENDDO NOWAIT - + !$OMP DO do k=1,N do p=1,np @@ -249,36 +253,36 @@ END_PROVIDER enddo enddo !$OMP END DO NOWAIT - + !$OMP BARRIER !$OMP END PARALLEL - + if (N>0) then call dgemm('N','T', np, nq, N, -1.d0, & Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) endif - + ! f. Qmax = D(Dset(1)) do q=1,nq Qmax = max(Qmax, D(Dset(q))) enddo - + ! g. - + iblock = 0 do j=1,nq - + if ( (Qmax <= Dmin).or.(N+j > ndim) ) exit ! i. rank = N+j - + if (iblock == block_size) then call dgemm('N','T',np,nq,block_size,-1.d0, & Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) iblock = 0 endif - + ! ii. do dj=1,nq qj = Dset(dj) @@ -286,9 +290,9 @@ END_PROVIDER exit endif enddo - + L(1:ndim, rank) = 0.d0 - + if (.not.computed(dj)) then m = dj !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(guided) @@ -314,16 +318,16 @@ END_PROVIDER do p=1,np Ltmp_p(p,iblock) = Delta(p,dj) enddo - + ! iv. if (iblock > 1) then call dgemv('N', np, iblock-1, -1.d0, Ltmp_p, np, Ltmp_q(dj,1), nq, 1.d0,& Ltmp_p(1,iblock), 1) endif - + ! iii. f = 1.d0/dsqrt(Qmax) - + !$OMP PARALLEL PRIVATE(m,p,q,k) DEFAULT(shared) !$OMP DO do p=1,np @@ -332,38 +336,38 @@ END_PROVIDER D(Lset(p)) = D(Lset(p)) - Ltmp_p(p,iblock) * Ltmp_p(p,iblock) enddo !$OMP END DO - + !$OMP DO do q=1,nq Ltmp_q(q,iblock) = L(Dset(q), rank) enddo !$OMP END DO - + !$OMP END PARALLEL - + Qmax = D(Dset(1)) do q=1,nq Qmax = max(Qmax, D(Dset(q))) enddo - + enddo - + print '(I10, 4X, ES12.3)', rank, Qmax - + deallocate(computed) deallocate(Delta) deallocate(Ltmp_p) deallocate(Ltmp_q) - + ! i. N = rank - + ! j. Dmax = D(Lset(1)) do p=1,np Dmax = max(Dmax, D(Lset(p))) enddo - + np=0 do p=1,ndim if ( dscale*dscale*Dmax*D(p) > tau*tau ) then @@ -371,9 +375,9 @@ END_PROVIDER Lset(np) = p endif enddo - + enddo - + allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) if (ierr /= 0) then call print_memory_usage() @@ -387,10 +391,10 @@ END_PROVIDER !$OMP END PARALLEL DO deallocate(L) cholesky_ao_num = rank - + print *, '============ =============' print *, '' - + if (write_ao_cholesky) then print *, 'Writing Cholesky vectors to disk...' iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao', 'W') @@ -401,9 +405,9 @@ END_PROVIDER endif endif - + print *, 'Rank : ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' print *, '' - + END_PROVIDER - + diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index 0ba46e56..54ebff73 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -252,7 +252,8 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) do a = 1, nV do j = 1, nO do i = 1, nO - W_oovo(i,j,a,u) = 2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i) +! W_oovo(i,j,a,u) = 2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i) + W_oovo(i,j,a,u) = 2d0 * cc_space_v_oovo(i,j,a,u) - cc_space_v_oovo(j,i,a,u) enddo enddo enddo @@ -514,10 +515,10 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) cc_space_v_vv_chol(1,1,gam), cholesky_mo_num, & 0.d0, tmpB1, nV*block_size) - call dgemm('T','N', nV*min(block_size, nV-iblock+1), nV, cholesky_mo_num, 1.d0, & - cc_space_v_vv_chol(1,1,iblock), cholesky_mo_num, & - tmp_cc2, cholesky_mo_num, & - 1.d0, tmpB1, nV*block_size) + call dgemm('T','N', nV*min(block_size, nV-iblock+1), nV, cholesky_mo_num, & + 1.d0, cc_space_v_vv_chol(1,1,iblock), cholesky_mo_num, & + tmp_cc2, cholesky_mo_num, & + 1.d0, tmpB1, nV*block_size) do beta = iblock, min(nV, iblock+block_size-1) do b = 1, nV @@ -1107,37 +1108,27 @@ subroutine compute_g_occ_chol(nO,nV,t1,t2,H_oo,g_occ) double precision, intent(in) :: t2(nO, nO, nV, nV) double precision, intent(out) :: g_occ(nO, nO) - integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + g_occ = H_oo call dgemm('N','N',nO,nO,nV, & 1d0, t1, size(t1,1), & cc_space_f_vo, size(cc_space_f_vo,1), & - 0d0, g_occ, size(g_occ,1)) + 1d0, g_occ, size(g_occ,1)) - !$omp parallel & - !$omp shared(nO,nV,g_occ,H_oo, cc_space_v_ovoo,t1) & - !$omp private(i,j,a,u) & - !$omp default(none) - !$omp do - do i = 1, nO - do u = 1, nO - g_occ(u,i) = g_occ(u,i) + H_oo(u,i) - enddo - enddo - !$omp end do + double precision, allocatable :: X(:) + allocate(X(cholesky_mo_num)) + call dgemv('N',cholesky_mo_num,nO*nV,2.d0, & + cc_space_v_ov_chol, cholesky_mo_num, & + t1, 1, 0.d0, X, 1) - !$omp do - do i = 1, nO - do j = 1, nO - do a = 1, nV - do u = 1, nO - g_occ(u,i) = g_occ(u,i) + (2d0 * cc_space_v_ovoo(u,a,i,j) - cc_space_v_ovoo(u,a,j,i)) * t1(j,a) - enddo - enddo - enddo - enddo - !$omp end do - !$omp end parallel + call dgemv('T',cholesky_mo_num,nO*nO,1.d0, & + cc_space_v_oo_chol, cholesky_mo_num, & + X, 1, 1.d0, g_occ, 1) + deallocate(X) + + call dgemv('T',nO*nV,nO*nO,-1.d0, & + cc_space_v_ovoo, nO*nV, & + t1, 1, 1.d0, g_occ, 1) end From e2416a2d680e84137d6a44d335ca09f39bda5fad Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 21 Aug 2023 10:05:48 +0200 Subject: [PATCH 255/337] Fix openMP compilation in CCSD --- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f index 01068f4f..b59dc0bb 100644 --- a/src/ccsd/ccsd_space_orb_sub_chol.irp.f +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -244,7 +244,7 @@ subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) allocate(W_oovo(nO,nO,nV,nO)) !$omp parallel & - !$omp shared(nO,nV,cc_space_v_vooo,W_oovo) & + !$omp shared(nO,nV,cc_space_v_oovo,W_oovo) & !$omp private(u,a,i,j) & !$omp default(none) do u = 1, nO From 30c238656f03e5c4b45fc31cb4f67dc3231c2d2d Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 21 Aug 2023 10:06:58 +0200 Subject: [PATCH 256/337] Gradient for inactive-->virtual work --- src/casscf_tc_bi/grad_dm.irp.f | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/casscf_tc_bi/grad_dm.irp.f b/src/casscf_tc_bi/grad_dm.irp.f index 77336c93..d7d7046d 100644 --- a/src/casscf_tc_bi/grad_dm.irp.f +++ b/src/casscf_tc_bi/grad_dm.irp.f @@ -93,6 +93,10 @@ subroutine gradvec_tc_ia(i,a,res_l, res_r) tc_transition_matrix_mo(r,t,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(r,i,t,a) - mo_bi_ortho_tc_two_e(i,r,t,a)) & +tc_transition_matrix_mo(t,r,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(t,i,r,a) - mo_bi_ortho_tc_two_e(i,t,r,a)) & ) + res_l(2) += -0.5d0 * ( & + tc_transition_matrix_mo(t,r,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(t,a,r,i) - mo_bi_ortho_tc_two_e(t,a,i,r)) & + +tc_transition_matrix_mo(t,r,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(r,a,t,i) - mo_bi_ortho_tc_two_e(r,a,i,t)) & + ) enddo enddo end From f9fdd1effdb589f37f264010d3cea7e7bd3f26c8 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 21 Aug 2023 14:54:41 +0200 Subject: [PATCH 257/337] python -> python3 --- ocaml/tests/test_pub.py | 2 +- ocaml/tests/test_task_server.py | 2 +- scripts/Hn.py | 3 ++- scripts/qp_exc_energy.py | 3 ++- scripts/utility/qp_json.py | 3 ++- src/nuclei/write_pt_charges.py | 2 +- 6 files changed, 9 insertions(+), 6 deletions(-) diff --git a/ocaml/tests/test_pub.py b/ocaml/tests/test_pub.py index e4a883ee..be577685 100755 --- a/ocaml/tests/test_pub.py +++ b/ocaml/tests/test_pub.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python3 import zmq import sys, os diff --git a/ocaml/tests/test_task_server.py b/ocaml/tests/test_task_server.py index dac14083..ebbb07ae 100755 --- a/ocaml/tests/test_task_server.py +++ b/ocaml/tests/test_task_server.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python3 import zmq import sys, os diff --git a/scripts/Hn.py b/scripts/Hn.py index 0f938510..55a958f7 100644 --- a/scripts/Hn.py +++ b/scripts/Hn.py @@ -1,4 +1,5 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 + import sys from math import * arg = sys.argv diff --git a/scripts/qp_exc_energy.py b/scripts/qp_exc_energy.py index 7e7f1d67..44136311 100755 --- a/scripts/qp_exc_energy.py +++ b/scripts/qp_exc_energy.py @@ -1,4 +1,5 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 + # Computes the error on the excitation energy of a CIPSI run. def student(p,df): diff --git a/scripts/utility/qp_json.py b/scripts/utility/qp_json.py index 09ffe1be..5cba9ff2 100644 --- a/scripts/utility/qp_json.py +++ b/scripts/utility/qp_json.py @@ -1,4 +1,5 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 + import os import json diff --git a/src/nuclei/write_pt_charges.py b/src/nuclei/write_pt_charges.py index f5007090..03ac859b 100644 --- a/src/nuclei/write_pt_charges.py +++ b/src/nuclei/write_pt_charges.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import os import sys From b2fa6b0b9c0da217d87ff7fc348fef87a76a0e3b Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 21 Aug 2023 11:17:58 +0200 Subject: [PATCH 258/337] inactive --> active gradient are OK for real ! --- src/casscf_tc_bi/grad_dm.irp.f | 27 +++++++++++++++++++-- src/tc_bi_ortho/two_rdm_naive.irp.f | 37 ++++++++++++++++++++++------- 2 files changed, 53 insertions(+), 11 deletions(-) diff --git a/src/casscf_tc_bi/grad_dm.irp.f b/src/casscf_tc_bi/grad_dm.irp.f index d7d7046d..f62acbdd 100644 --- a/src/casscf_tc_bi/grad_dm.irp.f +++ b/src/casscf_tc_bi/grad_dm.irp.f @@ -112,8 +112,7 @@ subroutine gradvec_tc_it(i,t,res_l, res_r) END_DOC integer, intent(in) :: i,t double precision, intent(out) :: res_l(0:3),res_r(0:3) - integer :: rr,r,ss,s,m,mm - double precision :: dm + integer :: rr,r,j,jj,u,uu,v,vv res_r = 0.d0 res_l = 0.d0 res_r(1) += -2.d0 * mo_bi_ortho_tc_one_e(i,t) @@ -124,6 +123,30 @@ subroutine gradvec_tc_it(i,t,res_l, res_r) res_r(1) += mo_bi_ortho_tc_one_e(i,r) * tc_transition_matrix_mo(t,r,1,1) res_l(1) += -mo_bi_ortho_tc_one_e(r,i) * tc_transition_matrix_mo(r,t,1,1) enddo + + do jj = 1, n_core_inact_orb + j = list_core_inact(jj) + res_r(2) += 2.d0 * (2d0 * mo_bi_ortho_tc_two_e(i,j,t,j) - mo_bi_ortho_tc_two_e(j,i,t,j)) + do rr = 1, n_act_orb + r = list_act(rr) + res_r(2) += tc_transition_matrix_mo(t,r,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(i,j,r,j) - mo_bi_ortho_tc_two_e(i,j,j,r)) + enddo + enddo + do rr = 1, n_act_orb + r = list_act(rr) + do uu = 1, n_act_orb + u = list_act(uu) + res_r(2) += -0.5d0 * ( & + tc_transition_matrix_mo(u,r,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(u,i,r,t) - mo_bi_ortho_tc_two_e(u,i,t,r)) & + + tc_transition_matrix_mo(r,u,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(i,r,t,u) - mo_bi_ortho_tc_two_e(i,r,u,t)) & + ) + do vv = 1, n_act_orb + v = list_act(vv) + res_r(2) += 0.5d0 * ( & + mo_bi_ortho_tc_two_e(i,r,v,u) * tc_two_rdm(t,r,v,u) + mo_bi_ortho_tc_two_e(r,i,v,u) * tc_two_rdm(r,t,v,u) ) + enddo + enddo + enddo end diff --git a/src/tc_bi_ortho/two_rdm_naive.irp.f b/src/tc_bi_ortho/two_rdm_naive.irp.f index 8fd34975..3963d09e 100644 --- a/src/tc_bi_ortho/two_rdm_naive.irp.f +++ b/src/tc_bi_ortho/two_rdm_naive.irp.f @@ -1,7 +1,7 @@ -BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] +BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, mo_num)] implicit none BEGIN_DOC - ! tc_two_rdm(p,s,q,r) = = CHEMIST NOTATION END_DOC integer :: i,j,istate,m,mm,nn integer :: exc(0:2,2,2) @@ -13,7 +13,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] other_spin(1) = 2 other_spin(2) = 1 allocate(occ(N_int*bit_kind_size,2)) - tc_two_rdm = 0.d0 + tc_two_rdm_chemist = 0.d0 do i = 1, N_det ! psi_left do j = 1, N_det ! psi_right @@ -28,7 +28,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] contrib += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * phase * state_average_weight(istate) enddo if(degree == 2)then - call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) else if(degree==1)then ! occupation of the determinant psi_det(j) call bitstring_to_list_ab(psi_det(1,1,j), occ, n_occ_ab, N_int) @@ -39,7 +39,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] m = occ(mm,s2) h2 = m p2 = m - call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) enddo ! run over the electrons of same spin than the excitation s2 = s1 @@ -47,7 +47,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] m = occ(mm,s2) h2 = m p2 = m - call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) enddo endif else if(degree == 0)then @@ -68,7 +68,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] m = occ(mm,s2) h2 = m p2 = m - call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) enddo ! run over the couple of alpha-alpha electrons s2 = s1 @@ -77,7 +77,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] h2 = m p2 = m if(h2.le.h1)cycle - call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) enddo enddo s1 = 2 @@ -91,7 +91,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] h2 = m p2 = m if(h2.le.h1)cycle - call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) enddo enddo endif @@ -122,3 +122,22 @@ subroutine update_tc_rdm(h1,p1,h2,p2,s1,s2,array,sze,contrib) endif end + + +BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] + implicit none + BEGIN_DOC + ! tc_two_rdm(p,q,s,r) = = PHYSICIST NOTATION + END_DOC + integer :: p,q,r,s + do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + do p = 1, mo_num + tc_two_rdm(p,q,s,r) = tc_two_rdm_chemist(p,s,q,r) + enddo + enddo + enddo + enddo + +END_PROVIDER From 7afc7e5fa4f645f36d8c2100364b792889f3434b Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 22 Aug 2023 10:49:59 +0200 Subject: [PATCH 259/337] OpenMP in MO optimization --- external/ezfio | 2 +- external/irpf90 | 2 +- ...optimization.irp.f => cipsi_orb_opt.irp.f} | 2 +- .../state_average_energy.irp.f | 17 +- src/two_body_rdm/state_av_act_2rdm.irp.f | 10 +- .../state_av_full_orb_2_rdm.irp.f | 320 +++++++++++------- src/two_body_rdm/two_e_dm_mo.irp.f | 5 +- 7 files changed, 231 insertions(+), 127 deletions(-) rename src/mo_optimization/{optimization.irp.f => cipsi_orb_opt.irp.f} (96%) diff --git a/external/ezfio b/external/ezfio index ed1df9f3..d5805497 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit ed1df9f3c1f51752656ca98da5693a4119add05c +Subproject commit d5805497fa0ef30e70e055cde1ecec2963303e93 diff --git a/external/irpf90 b/external/irpf90 index 33ca5e10..0007f72f 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit 33ca5e1018f3bbb5e695e6ee558f5dac0753b271 +Subproject commit 0007f72f677fe7d61c5e1ed461882cb239517102 diff --git a/src/mo_optimization/optimization.irp.f b/src/mo_optimization/cipsi_orb_opt.irp.f similarity index 96% rename from src/mo_optimization/optimization.irp.f rename to src/mo_optimization/cipsi_orb_opt.irp.f index 9892b3e3..ae3aa1bf 100644 --- a/src/mo_optimization/optimization.irp.f +++ b/src/mo_optimization/cipsi_orb_opt.irp.f @@ -15,7 +15,7 @@ subroutine run_optimization logical :: not_converged character (len=100) :: filename - PROVIDE psi_det psi_coef mo_two_e_integrals_in_map + PROVIDE psi_det psi_coef mo_two_e_integrals_in_map ao_pseudo_integrals not_converged = .True. nb_iter = 0 diff --git a/src/mo_optimization/state_average_energy.irp.f b/src/mo_optimization/state_average_energy.irp.f index 2cd063da..05aec18a 100644 --- a/src/mo_optimization/state_average_energy.irp.f +++ b/src/mo_optimization/state_average_energy.irp.f @@ -39,17 +39,24 @@ subroutine state_average_energy(energy) double precision :: get_two_e_integral double precision :: mono_e, bi_e integer :: i,j,k,l - + + energy = nuclear_repulsion ! mono electronic part + !$OMP PARALLEL DEFAULT(NONE) PRIVATE(i,j,k,l,mono_e, bi_e) & + !$OMP SHARED(mo_num, mo_integrals_map, two_e_dm_mo, one_e_dm_mo, energy, & + !$OMP mo_one_e_integrals) mono_e = 0d0 + !$OMP DO do j = 1, mo_num do i = 1, mo_num mono_e = mono_e + mo_one_e_integrals(i,j) * one_e_dm_mo(i,j) enddo enddo + !$OMP END DO NOWAIT ! bi electronic part bi_e = 0d0 + !$OMP DO do l = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -59,13 +66,17 @@ subroutine state_average_energy(energy) enddo enddo enddo + !$OMP END DO ! State average energy - energy = mono_e + 0.5d0 * bi_e + nuclear_repulsion + !$OMP CRITICAL + energy = energy + mono_e + 0.5d0 * bi_e + !$OMP END CRITICAL + !$OMP END PARALLEL ! Check !call print_energy_components - + print*,'State average energy:', energy !print*,ci_energy diff --git a/src/two_body_rdm/state_av_act_2rdm.irp.f b/src/two_body_rdm/state_av_act_2rdm.irp.f index cd417a9d..ea636212 100644 --- a/src/two_body_rdm/state_av_act_2rdm.irp.f +++ b/src/two_body_rdm/state_av_act_2rdm.irp.f @@ -17,12 +17,12 @@ state_weights = state_average_weight integer :: ispin ! condition for alpha/beta spin - print*,'' - print*,'' - print*,'' - print*,'providint state_av_act_2_rdm_ab_mo ' +! print*,'' +! print*,'' +! print*,'' +! print*,'Providing state_av_act_2_rdm_ab_mo ' ispin = 3 - print*,'ispin = ',ispin +! print*,'ispin = ',ispin state_av_act_2_rdm_ab_mo = 0.d0 call wall_time(wall_1) double precision :: wall_1, wall_2 diff --git a/src/two_body_rdm/state_av_full_orb_2_rdm.irp.f b/src/two_body_rdm/state_av_full_orb_2_rdm.irp.f index 2e44665d..5fb9e475 100644 --- a/src/two_body_rdm/state_av_full_orb_2_rdm.irp.f +++ b/src/two_body_rdm/state_av_full_orb_2_rdm.irp.f @@ -4,7 +4,7 @@ state_av_full_occ_2_rdm_ab_mo = 0.d0 integer :: i,j,k,l,iorb,jorb,korb,lorb BEGIN_DOC -! state_av_full_occ_2_rdm_ab_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of alpha/beta + beta/alpha electrons +! state_av_full_occ_2_rdm_ab_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of alpha/beta + beta/alpha electrons ! ! = \sum_{istate} w(istate) * ! @@ -12,11 +12,19 @@ ! ! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\alpha} * N_{\beta} * 2 ! -! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" -! -! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero - END_DOC +! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" +! +! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero + END_DOC + PROVIDE n_core_orb list_core state_av_full_occ_2_rdm_ab_mo = 0.d0 + !$OMP PARALLEL PRIVATE(i,j,k,l,iorb,jorb,korb,lorb) & + !$OMP DEFAULT(NONE) SHARED(n_act_orb, n_inact_orb, n_core_orb, & + !$OMP list_core, list_act, list_inact, no_core_density, & + !$OMP one_e_dm_mo_alpha_average, one_e_dm_mo_beta_average, & + !$OMP state_av_act_2_rdm_ab_mo, state_av_full_occ_2_rdm_ab_mo) + + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -25,15 +33,17 @@ korb = list_act(k) do l = 1, n_act_orb lorb = list_act(l) - ! alph beta alph beta - state_av_full_occ_2_rdm_ab_mo(lorb,korb,jorb,iorb) = & + ! alph beta alph beta + state_av_full_occ_2_rdm_ab_mo(lorb,korb,jorb,iorb) = & state_av_act_2_rdm_ab_mo(l,k,j,i) enddo enddo enddo enddo - !! BETA ACTIVE - ALPHA inactive - !! + !$OMP END DO + !! BETA ACTIVE - ALPHA inactive + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -45,9 +55,11 @@ enddo enddo enddo + !$OMP END DO - !! ALPHA ACTIVE - BETA inactive - !! + !! ALPHA ACTIVE - BETA inactive + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -59,9 +71,11 @@ enddo enddo enddo + !$OMP END DO - !! ALPHA INACTIVE - BETA INACTIVE - !! + !! ALPHA INACTIVE - BETA INACTIVE + !! + !$OMP DO do j = 1, n_inact_orb jorb = list_inact(j) do k = 1, n_inact_orb @@ -70,13 +84,15 @@ state_av_full_occ_2_rdm_ab_mo(korb,jorb,korb,jorb) = 2.D0 enddo enddo + !$OMP END DO !!!!!!!!!!!! -!!!!!!!!!!!! if "no_core_density" then you don't put the core part -!!!!!!!!!!!! CAN BE USED +!!!!!!!!!!!! if "no_core_density" then you don't put the core part +!!!!!!!!!!!! CAN BE USED if (.not.no_core_density)then - !! BETA ACTIVE - ALPHA CORE - !! + !! BETA ACTIVE - ALPHA CORE + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -88,9 +104,11 @@ enddo enddo enddo - + !$OMP END DO + !! ALPHA ACTIVE - BETA CORE - !! + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -102,9 +120,11 @@ enddo enddo enddo + !$OMP END DO - !! ALPHA CORE - BETA CORE - !! + !! ALPHA CORE - BETA CORE + !! + !$OMP DO do j = 1, n_core_orb jorb = list_core(j) do k = 1, n_core_orb @@ -113,9 +133,11 @@ state_av_full_occ_2_rdm_ab_mo(korb,jorb,korb,jorb) = 2.D0 enddo enddo + !$OMP END DO endif - END_PROVIDER + !$OMP END PARALLEL + END_PROVIDER BEGIN_PROVIDER [double precision, state_av_full_occ_2_rdm_aa_mo, (n_core_inact_act_orb,n_core_inact_act_orb,n_core_inact_act_orb,n_core_inact_act_orb)] @@ -123,7 +145,7 @@ state_av_full_occ_2_rdm_aa_mo = 0.d0 integer :: i,j,k,l,iorb,jorb,korb,lorb BEGIN_DOC -! state_av_full_occ_2_rdm_aa_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of alpha/alpha electrons +! state_av_full_occ_2_rdm_aa_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of alpha/alpha electrons ! ! = \sum_{istate} w(istate) * ! @@ -131,13 +153,20 @@ ! ! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\alpha} * (N_{\alpha} - 1) ! -! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" +! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" ! -! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero - END_DOC +! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero + END_DOC + PROVIDE n_core_orb list_core + !$OMP PARALLEL PRIVATE(i,j,k,l,iorb,jorb,korb,lorb) & + !$OMP DEFAULT(NONE) SHARED(n_act_orb, n_inact_orb, n_core_orb, & + !$OMP list_core, list_act, list_inact, no_core_density, & + !$OMP one_e_dm_mo_alpha_average, one_e_dm_mo_beta_average, & + !$OMP state_av_act_2_rdm_aa_mo, state_av_full_occ_2_rdm_aa_mo) !! PURE ACTIVE PART ALPHA-ALPHA - !! + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -152,74 +181,84 @@ enddo enddo enddo - !! ALPHA ACTIVE - ALPHA inactive - !! + !$OMP END DO + !! ALPHA ACTIVE - ALPHA inactive + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_inact_orb korb = list_inact(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_aa_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_aa_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_aa_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) enddo enddo enddo + !$OMP END DO - !! ALPHA INACTIVE - ALPHA INACTIVE + !! ALPHA INACTIVE - ALPHA INACTIVE + !$OMP DO do j = 1, n_inact_orb jorb = list_inact(j) do k = 1, n_inact_orb korb = list_inact(k) - state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_aa_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_aa_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO !!!!!!!!!! -!!!!!!!!!! if "no_core_density" then you don't put the core part -!!!!!!!!!! CAN BE USED +!!!!!!!!!! if "no_core_density" then you don't put the core part +!!!!!!!!!! CAN BE USED if (.not.no_core_density)then - !! ALPHA ACTIVE - ALPHA CORE + !! ALPHA ACTIVE - ALPHA CORE + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_core_orb korb = list_core(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_aa_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_aa_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_aa_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) enddo enddo enddo - !! ALPHA CORE - ALPHA CORE - + !$OMP END DO + !! ALPHA CORE - ALPHA CORE + + !$OMP DO do j = 1, n_core_orb jorb = list_core(j) do k = 1, n_core_orb korb = list_core(k) - state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_aa_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_aa_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO endif - END_PROVIDER + !$OMP END PARALLEL + END_PROVIDER BEGIN_PROVIDER [double precision, state_av_full_occ_2_rdm_bb_mo, (n_core_inact_act_orb,n_core_inact_act_orb,n_core_inact_act_orb,n_core_inact_act_orb)] implicit none state_av_full_occ_2_rdm_bb_mo = 0.d0 integer :: i,j,k,l,iorb,jorb,korb,lorb BEGIN_DOC -! state_av_full_occ_2_rdm_bb_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of beta/beta electrons +! state_av_full_occ_2_rdm_bb_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of beta/beta electrons ! ! = \sum_{istate} w(istate) * ! @@ -227,13 +266,20 @@ ! ! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\beta} * (N_{\beta} - 1) ! -! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" +! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" ! -! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero - END_DOC +! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero + END_DOC + PROVIDE n_core_orb list_core + !$OMP PARALLEL PRIVATE(i,j,k,l,iorb,jorb,korb,lorb) & + !$OMP DEFAULT(NONE) SHARED(n_act_orb, n_inact_orb, n_core_orb, & + !$OMP list_core, list_act, list_inact, no_core_density, & + !$OMP one_e_dm_mo_alpha_average, one_e_dm_mo_beta_average, & + !$OMP state_av_act_2_rdm_bb_mo, state_av_full_occ_2_rdm_bb_mo) !! PURE ACTIVE PART beta-beta - !! + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -242,80 +288,90 @@ korb = list_act(k) do l = 1, n_act_orb lorb = list_act(l) - state_av_full_occ_2_rdm_bb_mo(lorb,korb,jorb,iorb) = & + state_av_full_occ_2_rdm_bb_mo(lorb,korb,jorb,iorb) = & state_av_act_2_rdm_bb_mo(l,k,j,i) enddo enddo enddo enddo - !! beta ACTIVE - beta inactive - !! + !$OMP END DO + !! beta ACTIVE - beta inactive + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_inact_orb korb = list_inact(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_bb_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_bb_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_bb_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) enddo enddo enddo + !$OMP END DO - !! beta INACTIVE - beta INACTIVE + !! beta INACTIVE - beta INACTIVE + !$OMP DO do j = 1, n_inact_orb jorb = list_inact(j) do k = 1, n_inact_orb korb = list_inact(k) - state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_bb_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_bb_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO !!!!!!!!!!!! -!!!!!!!!!!!! if "no_core_density" then you don't put the core part -!!!!!!!!!!!! CAN BE USED +!!!!!!!!!!!! if "no_core_density" then you don't put the core part +!!!!!!!!!!!! CAN BE USED if (.not.no_core_density)then - !! beta ACTIVE - beta CORE + !! beta ACTIVE - beta CORE + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_core_orb korb = list_core(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_bb_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_bb_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_bb_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) enddo enddo enddo - !! beta CORE - beta CORE - + !$OMP END DO + !! beta CORE - beta CORE + + !$OMP DO do j = 1, n_core_orb jorb = list_core(j) do k = 1, n_core_orb korb = list_core(k) - state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_bb_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_bb_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO endif + !$OMP END PARALLEL - END_PROVIDER + END_PROVIDER BEGIN_PROVIDER [double precision, state_av_full_occ_2_rdm_spin_trace_mo, (n_core_inact_act_orb,n_core_inact_act_orb,n_core_inact_act_orb,n_core_inact_act_orb)] implicit none state_av_full_occ_2_rdm_spin_trace_mo = 0.d0 integer :: i,j,k,l,iorb,jorb,korb,lorb BEGIN_DOC -! state_av_full_occ_2_rdm_bb_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of beta/beta electrons +! state_av_full_occ_2_rdm_bb_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of beta/beta electrons ! ! = \sum_{istate} w(istate) * \sum_{sigma,sigma'} ! @@ -324,14 +380,22 @@ ! ! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{elec} * (N_{elec} - 1) ! -! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" +! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" ! -! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero - END_DOC +! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero + END_DOC - !!!!!!!!!!!!!!!! - !!!!!!!!!!!!!!!! + PROVIDE n_core_orb list_core + + !$OMP PARALLEL PRIVATE(i,j,k,l,iorb,jorb,korb,lorb) & + !$OMP DEFAULT(NONE) SHARED(n_act_orb, n_inact_orb, n_core_orb, & + !$OMP list_core, list_act, list_inact, no_core_density, & + !$OMP one_e_dm_mo_alpha_average, one_e_dm_mo_beta_average, & + !$OMP state_av_act_2_rdm_spin_trace_mo, state_av_full_occ_2_rdm_spin_trace_mo) + !!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!! !! PURE ACTIVE PART SPIN-TRACE + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -340,128 +404,146 @@ korb = list_act(k) do l = 1, n_act_orb lorb = list_act(l) - state_av_full_occ_2_rdm_spin_trace_mo(lorb,korb,jorb,iorb) += & + state_av_full_occ_2_rdm_spin_trace_mo(lorb,korb,jorb,iorb) += & state_av_act_2_rdm_spin_trace_mo(l,k,j,i) enddo enddo enddo enddo + !$OMP END DO - !!!!!!!!!!!!!!!! - !!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!! !!!!! BETA-BETA !!!!! - !! beta ACTIVE - beta inactive + !! beta ACTIVE - beta inactive + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_inact_orb korb = list_inact(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) enddo enddo enddo - !! beta INACTIVE - beta INACTIVE + !$OMP END DO + !! beta INACTIVE - beta INACTIVE + !$OMP DO do j = 1, n_inact_orb jorb = list_inact(j) do k = 1, n_inact_orb korb = list_inact(k) - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO if (.not.no_core_density)then - !! beta ACTIVE - beta CORE + !! beta ACTIVE - beta CORE + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_core_orb korb = list_core(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) enddo enddo enddo - !! beta CORE - beta CORE + !$OMP END DO + !! beta CORE - beta CORE + !$OMP DO do j = 1, n_core_orb jorb = list_core(j) do k = 1, n_core_orb korb = list_core(k) - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO endif - !!!!!!!!!!!!!!!! - !!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!! !!!!! ALPHA-ALPHA !!!!! - !! ALPHA ACTIVE - ALPHA inactive + !! ALPHA ACTIVE - ALPHA inactive + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_inact_orb korb = list_inact(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) enddo enddo enddo - !! ALPHA INACTIVE - ALPHA INACTIVE + !$OMP END DO + !! ALPHA INACTIVE - ALPHA INACTIVE + !$OMP DO do j = 1, n_inact_orb jorb = list_inact(j) do k = 1, n_inact_orb korb = list_inact(k) - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO if (.not.no_core_density)then - !! ALPHA ACTIVE - ALPHA CORE + !! ALPHA ACTIVE - ALPHA CORE + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_core_orb korb = list_core(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) enddo enddo enddo - !! ALPHA CORE - ALPHA CORE + !$OMP END DO + !! ALPHA CORE - ALPHA CORE + !$OMP DO do j = 1, n_core_orb jorb = list_core(j) do k = 1, n_core_orb korb = list_core(k) - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO endif - !!!!!!!!!!!!!!!! - !!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!! !!!!! ALPHA-BETA + BETA-ALPHA !!!!! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -474,14 +556,16 @@ ! beta alph beta alph state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) ! BETA INACTIVE - ALPHA ACTIVE - ! beta alph beta alpha + ! beta alph beta alpha state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) - ! alph beta alph beta + ! alph beta alph beta state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) enddo enddo enddo - !! ALPHA INACTIVE - BETA INACTIVE + !$OMP END DO + !! ALPHA INACTIVE - BETA INACTIVE + !$OMP DO do j = 1, n_inact_orb jorb = list_inact(j) do k = 1, n_inact_orb @@ -491,31 +575,35 @@ state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,jorb,korb) += 1.0d0 enddo enddo + !$OMP END DO !!!!!!!!!!!! -!!!!!!!!!!!! if "no_core_density" then you don't put the core part -!!!!!!!!!!!! CAN BE USED +!!!!!!!!!!!! if "no_core_density" then you don't put the core part +!!!!!!!!!!!! CAN BE USED if (.not.no_core_density)then + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_core_orb korb = list_core(k) - !! BETA ACTIVE - ALPHA CORE + !! BETA ACTIVE - ALPHA CORE ! alph beta alph beta state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0D0 * one_e_dm_mo_beta_average(jorb,iorb) - ! beta alph beta alph + ! beta alph beta alph state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0D0 * one_e_dm_mo_beta_average(jorb,iorb) - !! ALPHA ACTIVE - BETA CORE + !! ALPHA ACTIVE - BETA CORE ! alph beta alph beta state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0D0 * one_e_dm_mo_alpha_average(jorb,iorb) - ! beta alph beta alph + ! beta alph beta alph state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0D0 * one_e_dm_mo_alpha_average(jorb,iorb) enddo enddo enddo - !! ALPHA CORE - BETA CORE + !$OMP END DO + !! ALPHA CORE - BETA CORE + !$OMP DO do j = 1, n_core_orb jorb = list_core(j) do k = 1, n_core_orb @@ -525,7 +613,9 @@ state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,jorb,korb) += 1.0D0 enddo enddo + !$OMP END DO endif + !$OMP END PARALLEL - END_PROVIDER + END_PROVIDER diff --git a/src/two_body_rdm/two_e_dm_mo.irp.f b/src/two_body_rdm/two_e_dm_mo.irp.f index 99be1f54..04c44f61 100644 --- a/src/two_body_rdm/two_e_dm_mo.irp.f +++ b/src/two_body_rdm/two_e_dm_mo.irp.f @@ -16,6 +16,9 @@ BEGIN_PROVIDER [double precision, two_e_dm_mo, (mo_num,mo_num,mo_num,mo_num)] two_e_dm_mo = 0.d0 integer :: i,j,k,l,iorb,jorb,korb,lorb,istate + !$OMP PARALLEL DO PRIVATE(i,j,k,l,iorb,jorb,korb,lorb) & + !$OMP DEFAULT(NONE) SHARED(n_core_inact_act_orb, list_core_inact_act, & + !$OMP two_e_dm_mo, state_av_full_occ_2_rdm_spin_trace_mo) do l=1,n_core_inact_act_orb lorb = list_core_inact_act(l) do k=1,n_core_inact_act_orb @@ -29,7 +32,7 @@ BEGIN_PROVIDER [double precision, two_e_dm_mo, (mo_num,mo_num,mo_num,mo_num)] enddo enddo enddo - two_e_dm_mo(:,:,:,:) = two_e_dm_mo(:,:,:,:) + !$OMP END PARALLEL DO END_PROVIDER From c0b221f64706ef9a52d71a5b1655a16e04a69ff1 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 22 Aug 2023 11:27:36 +0200 Subject: [PATCH 260/337] Change defaults in mo_optimization --- src/mo_optimization/EZFIO.cfg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mo_optimization/EZFIO.cfg b/src/mo_optimization/EZFIO.cfg index e6aa2d67..078da3a2 100644 --- a/src/mo_optimization/EZFIO.cfg +++ b/src/mo_optimization/EZFIO.cfg @@ -2,7 +2,7 @@ type: character*(32) doc: Define the kind of hessian for the orbital optimization full : full hessian, diag : diagonal hessian, none : no hessian interface: ezfio,provider,ocaml -default: full +default: diag [n_det_max_opt] type: integer @@ -14,7 +14,7 @@ default: 200000 type: integer doc: Maximal number of iterations for the orbital optimization interface: ezfio,provider,ocaml -default: 20 +default: 10 [thresh_opt_max_elem_grad] type: double precision From 50f2fb0bfad74a0c6a1cfc2c71354e1690b74e6c Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 22 Aug 2023 13:48:01 +0200 Subject: [PATCH 261/337] all gradients are OK ! --- src/casscf_tc_bi/grad_dm.irp.f | 111 ++++++++++++++++++++++++++++---- src/casscf_tc_bi/grad_old.irp.f | 2 + 2 files changed, 102 insertions(+), 11 deletions(-) diff --git a/src/casscf_tc_bi/grad_dm.irp.f b/src/casscf_tc_bi/grad_dm.irp.f index f62acbdd..be19e6f0 100644 --- a/src/casscf_tc_bi/grad_dm.irp.f +++ b/src/casscf_tc_bi/grad_dm.irp.f @@ -50,11 +50,13 @@ end do end do +! print*,'DM grad' do t=1,n_act_orb tt=list_act(t) do a=1,n_virt_orb aa=list_virt(a) indx = mat_idx_a_v(t,a) +! print*,indx,t,a call gradvec_tc_ta(tt,aa,res_l, res_r) do fff = 0,3 gradvec_tc_l(fff,indx)=res_l(fff) @@ -83,7 +85,7 @@ subroutine gradvec_tc_ia(i,a,res_l, res_r) do jj = 1, n_core_inact_orb j = list_core_inact(jj) res_r(2) += -2.d0 * ( 2.d0 * mo_bi_ortho_tc_two_e(j,i,j,a) - mo_bi_ortho_tc_two_e(i,j,j,a)) - res_l(2) += -2.d0 * ( 2.d0 * mo_bi_ortho_tc_two_e(j,a,j,i) - mo_bi_ortho_tc_two_e(j,a,i,j)) + res_l(2) -= -2.d0 * ( 2.d0 * mo_bi_ortho_tc_two_e(j,a,j,i) - mo_bi_ortho_tc_two_e(j,a,i,j)) enddo do tt = 1, n_act_orb t = list_act(tt) @@ -93,12 +95,14 @@ subroutine gradvec_tc_ia(i,a,res_l, res_r) tc_transition_matrix_mo(r,t,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(r,i,t,a) - mo_bi_ortho_tc_two_e(i,r,t,a)) & +tc_transition_matrix_mo(t,r,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(t,i,r,a) - mo_bi_ortho_tc_two_e(i,t,r,a)) & ) - res_l(2) += -0.5d0 * ( & - tc_transition_matrix_mo(t,r,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(t,a,r,i) - mo_bi_ortho_tc_two_e(t,a,i,r)) & - +tc_transition_matrix_mo(t,r,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(r,a,t,i) - mo_bi_ortho_tc_two_e(r,a,i,t)) & + res_l(2) -= -0.5d0 * ( & + tc_transition_matrix_mo(t,r,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(t,a,r,i) - mo_bi_ortho_tc_two_e(t,a,i,r)) & + +tc_transition_matrix_mo(r,t,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(r,a,t,i) - mo_bi_ortho_tc_two_e(r,a,i,t)) & ) enddo enddo + res_r(0) = res_r(1) + res_r(2) + res_r(3) + res_l(0) = res_l(1) + res_l(2) + res_l(3) end subroutine gradvec_tc_it(i,t,res_l, res_r) @@ -116,20 +120,22 @@ subroutine gradvec_tc_it(i,t,res_l, res_r) res_r = 0.d0 res_l = 0.d0 res_r(1) += -2.d0 * mo_bi_ortho_tc_one_e(i,t) - res_l(1) += 2.D0 * mo_bi_ortho_tc_one_e(t,i) + res_l(1) -= -2.D0 * mo_bi_ortho_tc_one_e(t,i) do rr = 1, n_act_orb r = list_act(rr) res_r(1) += mo_bi_ortho_tc_one_e(i,r) * tc_transition_matrix_mo(t,r,1,1) - res_l(1) += -mo_bi_ortho_tc_one_e(r,i) * tc_transition_matrix_mo(r,t,1,1) + res_l(1) -= mo_bi_ortho_tc_one_e(r,i) * tc_transition_matrix_mo(r,t,1,1) enddo do jj = 1, n_core_inact_orb j = list_core_inact(jj) - res_r(2) += 2.d0 * (2d0 * mo_bi_ortho_tc_two_e(i,j,t,j) - mo_bi_ortho_tc_two_e(j,i,t,j)) + res_r(2) += -2.d0 * (2d0 * mo_bi_ortho_tc_two_e(i,j,t,j) - mo_bi_ortho_tc_two_e(j,i,t,j)) + res_l(2) -= -2.d0 * (2d0 * mo_bi_ortho_tc_two_e(t,j,i,j) - mo_bi_ortho_tc_two_e(t,j,j,i)) do rr = 1, n_act_orb r = list_act(rr) res_r(2) += tc_transition_matrix_mo(t,r,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(i,j,r,j) - mo_bi_ortho_tc_two_e(i,j,j,r)) + res_l(2) -= tc_transition_matrix_mo(r,t,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(r,j,i,j) - mo_bi_ortho_tc_two_e(j,r,j,i)) enddo enddo do rr = 1, n_act_orb @@ -140,14 +146,21 @@ subroutine gradvec_tc_it(i,t,res_l, res_r) tc_transition_matrix_mo(u,r,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(u,i,r,t) - mo_bi_ortho_tc_two_e(u,i,t,r)) & + tc_transition_matrix_mo(r,u,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(i,r,t,u) - mo_bi_ortho_tc_two_e(i,r,u,t)) & ) + res_l(2) -= -0.5d0 * ( & + tc_transition_matrix_mo(r,u,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(r,t,u,i) - mo_bi_ortho_tc_two_e(t,r,u,i)) & + + tc_transition_matrix_mo(u,r,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(t,u,i,r) - mo_bi_ortho_tc_two_e(u,t,i,r)) & + ) do vv = 1, n_act_orb v = list_act(vv) res_r(2) += 0.5d0 * ( & mo_bi_ortho_tc_two_e(i,r,v,u) * tc_two_rdm(t,r,v,u) + mo_bi_ortho_tc_two_e(r,i,v,u) * tc_two_rdm(r,t,v,u) ) + res_l(2) -= 0.5d0 * ( & + mo_bi_ortho_tc_two_e(v,u,i,r) * tc_two_rdm(v,u,t,r) + mo_bi_ortho_tc_two_e(v,u,r,i) * tc_two_rdm(v,u,r,t) ) enddo enddo enddo - + res_r(0) = res_r(1) + res_r(2) + res_r(3) + res_l(0) = res_l(1) + res_l(2) + res_l(3) end subroutine gradvec_tc_ta(t,a,res_l, res_r) @@ -161,14 +174,90 @@ subroutine gradvec_tc_ta(t,a,res_l, res_r) END_DOC integer, intent(in) :: t,a double precision, intent(out) :: res_l(0:3),res_r(0:3) - integer :: rr,r,m - double precision :: dm + integer :: rr,r,j,jj,u,uu,v,vv + double precision :: res_r_inact_test, res_r_act_test + double precision :: res_l_inact_test, res_l_act_test res_r = 0.d0 res_l = 0.d0 do rr = 1, n_act_orb r = list_act(rr) res_l(1) += mo_bi_ortho_tc_one_e(a,r) * tc_transition_matrix_mo(t,r,1,1) - res_r(1) += -mo_bi_ortho_tc_one_e(r,a) * tc_transition_matrix_mo(r,t,1,1) + res_r(1) -= mo_bi_ortho_tc_one_e(r,a) * tc_transition_matrix_mo(r,t,1,1) enddo + + res_r_inact_test = 0.d0 + res_l_inact_test = 0.d0 + do jj = 1, n_core_inact_orb + j = list_core_inact(jj) + do rr = 1, n_act_orb + r = list_act(rr) + res_r_inact_test += -tc_transition_matrix_mo(r,t,1,1) * & + (2.d0 * mo_bi_ortho_tc_two_e(r,j,a,j) - mo_bi_ortho_tc_two_e(r,j,j,a)) + res_l_inact_test += -tc_transition_matrix_mo(t,r,1,1) * & + (2.d0 * mo_bi_ortho_tc_two_e(a,j,r,j) - mo_bi_ortho_tc_two_e(j,a,r,j)) + enddo + enddo + res_r_act_test = 0.d0 + res_l_act_test = 0.d0 + do rr = 1, n_act_orb + r = list_act(rr) + do vv = 1, n_act_orb + v = list_act(vv) + do uu = 1, n_act_orb + u = list_act(uu) + res_r_act_test += - (mo_bi_ortho_tc_two_e(v,r,u,a) * tc_two_rdm(r,v,t,u) & + +mo_bi_ortho_tc_two_e(v,r,a,u) * tc_two_rdm(r,v,u,t)) + res_l_act_test += - (mo_bi_ortho_tc_two_e(u,a,v,r) * tc_two_rdm(t,u,r,v) & + +mo_bi_ortho_tc_two_e(a,u,v,r) * tc_two_rdm(u,t,r,v)) + enddo + enddo + enddo + res_r_act_test *= 0.5d0 + res_l_act_test *= 0.5d0 + res_r(2) = res_r_inact_test + res_r_act_test + res_l(2) = res_l_inact_test + res_l_act_test + + integer :: m,x,y + double precision :: res_r_inact, res_r_act + if(.False.)then + ! test quantities + res_r_inact = 0.d0 + res_r_act = 0.d0 + do m = 1, mo_num + do x = 1, mo_num + do jj = 1, n_core_inact_orb + j = list_core_inact(jj) + res_r_inact += 0.5d0 * mo_bi_ortho_tc_two_e(t,j,m,x) * tc_two_rdm(a,j,m,x) & + -0.5d0 * mo_bi_ortho_tc_two_e(m,j,a,x) * tc_two_rdm(m,j,t,x) & + +0.5d0 * mo_bi_ortho_tc_two_e(j,t,m,x) * tc_two_rdm(j,a,m,x) & + -0.5d0 * mo_bi_ortho_tc_two_e(x,j,m,a) * tc_two_rdm(x,j,m,t) + enddo + do rr = 1, n_act_orb + r = list_act(rr) + res_r_act += 0.5d0 * mo_bi_ortho_tc_two_e(t,r,m,x) * tc_two_rdm(a,r,m,x) & + -0.5d0 * mo_bi_ortho_tc_two_e(m,r,a,x) * tc_two_rdm(m,r,t,x) & + +0.5d0 * mo_bi_ortho_tc_two_e(r,t,m,x) * tc_two_rdm(r,a,m,x) & + -0.5d0 * mo_bi_ortho_tc_two_e(x,r,m,a) * tc_two_rdm(x,r,m,t) + enddo + enddo + enddo + if(dabs(res_r_inact).gt.1.d-12)then + if(dabs(res_r_inact_test - res_r_inact).gt.1.d-10)then + print*,'inact' + print*,'t,a',t,a + print*,res_r_inact_test , res_r_inact, dabs(res_r_inact_test - res_r_inact) + endif + endif + if(dabs(res_r_act).gt.1.d-12)then + if(dabs(res_r_act_test - res_r_act).gt.1.d-10)then + print*,'act' + print*,'t,a',t,a + print*,res_r_act_test , res_r_act, dabs(res_r_act_test - res_r_act) + endif + endif + endif + + res_r(0) = res_r(1) + res_r(2) + res_r(3) + res_l(0) = res_l(1) + res_l(2) + res_l(3) end diff --git a/src/casscf_tc_bi/grad_old.irp.f b/src/casscf_tc_bi/grad_old.irp.f index e8440513..3f0ffb5e 100644 --- a/src/casscf_tc_bi/grad_old.irp.f +++ b/src/casscf_tc_bi/grad_old.irp.f @@ -39,11 +39,13 @@ enddo enddo +! print*,'old grad' do tt = 1, n_act_orb ihole = list_act(tt) do aa = 1, n_virt_orb ipart = list_virt(aa) indx = mat_idx_a_v(tt,aa) +! print*,indx,tt,aa call calc_grad_elem_h_tc(ihole,ipart,res_l, res_r) do ll = 0, 3 gradvec_detail_left_old (ll,indx)=res_l(ll) From bb80a3c2a2ac047c3bd435cc8ed28bb1dc1accee Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 25 Aug 2023 09:37:05 +0200 Subject: [PATCH 262/337] Minor fix in trexio import --- scripts/qp_import_trexio.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/qp_import_trexio.py b/scripts/qp_import_trexio.py index 2c829f5c..142411a6 100755 --- a/scripts/qp_import_trexio.py +++ b/scripts/qp_import_trexio.py @@ -459,8 +459,9 @@ def write_ezfio(trexio_filename, filename): beta = [ i for i in range(num_beta) ] if trexio.has_mo_spin(trexio_file): spin = trexio.read_mo_spin(trexio_file) - beta = [ i for i in range(mo_num) if spin[i] == 1 ] - beta = [ beta[i] for i in range(num_beta) ] + if max(spin) == 1: + beta = [ i for i in range(mo_num) if spin[i] == 1 ] + beta = [ beta[i] for i in range(num_beta) ] alpha = qp_bitmasks.BitMask(alpha) beta = qp_bitmasks.BitMask(beta ) From 8f6df3428371cffeb0a28b8c16b6da910a5ee7e4 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Tue, 29 Aug 2023 14:26:07 +0200 Subject: [PATCH 263/337] minor modif --- src/bi_ortho_mos/bi_density.irp.f | 2 -- src/tc_scf/tc_scf_dm.irp.f | 30 ++++++++++++++++++++++++++---- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/src/bi_ortho_mos/bi_density.irp.f b/src/bi_ortho_mos/bi_density.irp.f index 2dad9485..90fe9634 100644 --- a/src/bi_ortho_mos/bi_density.irp.f +++ b/src/bi_ortho_mos/bi_density.irp.f @@ -15,7 +15,6 @@ BEGIN_PROVIDER [double precision, TCSCF_bi_ort_dm_ao_alpha, (ao_num, ao_num) ] call dgemm( 'N', 'T', ao_num, ao_num, elec_alpha_num, 1.d0 & , mo_l_coef, size(mo_l_coef, 1), mo_r_coef, size(mo_r_coef, 1) & - !, mo_r_coef, size(mo_r_coef, 1), mo_l_coef, size(mo_l_coef, 1) & , 0.d0, TCSCF_bi_ort_dm_ao_alpha, size(TCSCF_bi_ort_dm_ao_alpha, 1) ) END_PROVIDER @@ -36,7 +35,6 @@ BEGIN_PROVIDER [ double precision, TCSCF_bi_ort_dm_ao_beta, (ao_num, ao_num) ] call dgemm( 'N', 'T', ao_num, ao_num, elec_beta_num, 1.d0 & , mo_l_coef, size(mo_l_coef, 1), mo_r_coef, size(mo_r_coef, 1) & - !, mo_r_coef, size(mo_r_coef, 1), mo_l_coef, size(mo_l_coef, 1) & , 0.d0, TCSCF_bi_ort_dm_ao_beta, size(TCSCF_bi_ort_dm_ao_beta, 1) ) END_PROVIDER diff --git a/src/tc_scf/tc_scf_dm.irp.f b/src/tc_scf/tc_scf_dm.irp.f index 07da8a58..bf31a4a1 100644 --- a/src/tc_scf/tc_scf_dm.irp.f +++ b/src/tc_scf/tc_scf_dm.irp.f @@ -1,46 +1,68 @@ ! --- -BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_beta, (ao_num, ao_num) ] +BEGIN_PROVIDER [double precision, TCSCF_density_matrix_ao_beta, (ao_num, ao_num)] BEGIN_DOC + ! ! TC-SCF transition density matrix on the AO basis for BETA electrons + ! END_DOC + implicit none if(bi_ortho) then + PROVIDE mo_l_coef mo_r_coef TCSCF_density_matrix_ao_beta = TCSCF_bi_ort_dm_ao_beta + else + TCSCF_density_matrix_ao_beta = SCF_density_matrix_ao_beta + endif + END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_alpha, (ao_num, ao_num) ] +BEGIN_PROVIDER [double precision, TCSCF_density_matrix_ao_alpha, (ao_num, ao_num)] BEGIN_DOC + ! ! TC-SCF transition density matrix on the AO basis for ALPHA electrons + ! END_DOC + implicit none if(bi_ortho) then + PROVIDE mo_l_coef mo_r_coef TCSCF_density_matrix_ao_alpha = TCSCF_bi_ort_dm_ao_alpha + else + TCSCF_density_matrix_ao_alpha = SCF_density_matrix_ao_alpha + endif + END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_tot, (ao_num, ao_num) ] - implicit none +BEGIN_PROVIDER [double precision, TCSCF_density_matrix_ao_tot, (ao_num, ao_num)] + BEGIN_DOC + ! ! TC-SCF transition density matrix on the AO basis for ALPHA+BETA electrons + ! END_DOC + + implicit none + TCSCF_density_matrix_ao_tot = TCSCF_density_matrix_ao_beta + TCSCF_density_matrix_ao_alpha + END_PROVIDER From 53041958a6ccbb18bfacb4bd623887dde33aa71d Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Tue, 29 Aug 2023 19:21:54 +0200 Subject: [PATCH 264/337] opt in v_ij_u_cst_mu_j1b_an: tested --- src/ao_many_one_e_ints/ao_erf_gauss.irp.f | 154 ++++++++++++++++++ .../grad_lapl_jmu_modif.irp.f | 131 ++++++++++++++- src/non_h_ints_mu/test_non_h_ints.irp.f | 44 ++++- 3 files changed, 317 insertions(+), 12 deletions(-) diff --git a/src/ao_many_one_e_ints/ao_erf_gauss.irp.f b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f index b1077161..823536cc 100644 --- a/src/ao_many_one_e_ints/ao_erf_gauss.irp.f +++ b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f @@ -1245,3 +1245,157 @@ end subroutine NAI_pol_x2_mult_erf_ao ! --- +subroutine NAI_pol_012_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_center, ints) + + BEGIN_DOC + ! + ! Computes the following integral : + ! + ! ints(1) = $\int_{-\infty}^{infty} dr x^0 * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + ! ints(2) = $\int_{-\infty}^{infty} dr x^1 * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! ints(3) = $\int_{-\infty}^{infty} dr y^1 * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! ints(4) = $\int_{-\infty}^{infty} dr z^1 * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + ! ints(5) = $\int_{-\infty}^{infty} dr x^2 * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! ints(6) = $\int_{-\infty}^{infty} dr y^2 * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! ints(7) = $\int_{-\infty}^{infty} dr z^2 * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + END_DOC + + include 'utils/constants.include.F' + + implicit none + + integer, intent(in) :: i_ao, j_ao + double precision, intent(in) :: beta, B_center(3), mu_in, C_center(3) + double precision, intent(out) :: ints(7) + + integer :: i, j, power_Ai(3), power_Aj(3), n_pt_in, m + integer :: power_A1(3), power_A2(3) + double precision :: Ai_center(3), Aj_center(3), alphai, alphaj, coef, coefi + double precision :: integral0, integral1, integral2 + + double precision, external :: NAI_pol_mult_erf_with1s + + ASSERT(beta .ge. 0.d0) + if(beta .lt. 1d-10) then + call NAI_pol_012_mult_erf_ao(i_ao, j_ao, mu_in, C_center, ints) + return + endif + + ints = 0.d0 + + power_Ai(1:3) = ao_power(i_ao,1:3) + power_Aj(1:3) = ao_power(j_ao,1:3) + + Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3) + Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3) + + n_pt_in = n_pt_max_integrals + + do i = 1, ao_prim_num(i_ao) + alphai = ao_expo_ordered_transp (i,i_ao) + coefi = ao_coef_normalized_ordered_transp(i,i_ao) + + do j = 1, ao_prim_num(j_ao) + alphaj = ao_expo_ordered_transp (j,j_ao) + coef = coefi * ao_coef_normalized_ordered_transp(j,j_ao) + + integral0 = NAI_pol_mult_erf_with1s(Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj, beta, B_center, C_center, n_pt_in, mu_in) + ints(1) += coef * integral0 + + do m = 1, 3 + + power_A1 = power_Ai + power_A1(m) += 1 + integral1 = NAI_pol_mult_erf_with1s(Ai_center, Aj_center, power_A1, power_Aj, alphai, alphaj, beta, B_center, C_center, n_pt_in, mu_in) + ints(1+m) += coef * (integral1 + Ai_center(m)*integral0) + + power_A2 = power_Ai + power_A2(m) += 2 + integral2 = NAI_pol_mult_erf_with1s(Ai_center, Aj_center, power_A2, power_Aj, alphai, alphaj, beta, B_center, C_center, n_pt_in, mu_in) + ints(4+m) += coef * (integral2 + Ai_center(m) * (2.d0*integral1 + Ai_center(m)*integral0)) + enddo + enddo + enddo + +end subroutine NAI_pol_012_mult_erf_ao_with1s + +! --- + +subroutine NAI_pol_012_mult_erf_ao(i_ao, j_ao, mu_in, C_center, ints) + + BEGIN_DOC + ! + ! Computes the following integral : + ! + ! int(1) = $\int_{-\infty}^{infty} dr x^0 * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + ! int(2) = $\int_{-\infty}^{infty} dr x^1 * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! int(3) = $\int_{-\infty}^{infty} dr y^1 * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! int(4) = $\int_{-\infty}^{infty} dr z^1 * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + ! int(5) = $\int_{-\infty}^{infty} dr x^2 * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! int(6) = $\int_{-\infty}^{infty} dr y^2 * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! int(7) = $\int_{-\infty}^{infty} dr z^2 * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$. + ! + END_DOC + + include 'utils/constants.include.F' + + implicit none + + integer, intent(in) :: i_ao, j_ao + double precision, intent(in) :: mu_in, C_center(3) + double precision, intent(out) :: ints(7) + + integer :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in, m + integer :: power_A1(3), power_A2(3) + double precision :: A_center(3), B_center(3), alpha, beta, coef + double precision :: integral0, integral1, integral2 + + double precision :: NAI_pol_mult_erf + + ints = 0.d0 + + num_A = ao_nucl(i_ao) + power_A(1:3) = ao_power(i_ao,1:3) + A_center(1:3) = nucl_coord(num_A,1:3) + num_B = ao_nucl(j_ao) + power_B(1:3) = ao_power(j_ao,1:3) + B_center(1:3) = nucl_coord(num_B,1:3) + + n_pt_in = n_pt_max_integrals + + do i = 1, ao_prim_num(i_ao) + alpha = ao_expo_ordered_transp(i,i_ao) + + do j = 1, ao_prim_num(j_ao) + beta = ao_expo_ordered_transp(j,j_ao) + coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao) + + integral0 = NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in) + ints(1) += coef * integral0 + + do m = 1, 3 + + power_A1 = power_A + power_A1(m) += 1 + integral1 = NAI_pol_mult_erf(A_center, B_center, power_A1, power_B, alpha, beta, C_center, n_pt_in, mu_in) + + ints(1+m) += coef * (integral1 + A_center(m)*integral0) + + power_A2 = power_A + power_A2(m) += 2 + integral2 = NAI_pol_mult_erf(A_center, B_center, power_A2, power_B, alpha, beta, C_center, n_pt_in, mu_in) + + ints(4+m) += coef * (integral2 + A_center(m) * (2.d0*integral1 + A_center(m)*integral0)) + enddo + enddo + enddo + +end subroutine NAI_pol_012_mult_erf_ao + +! --- + diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f index 9af3f9a9..24b33eb5 100644 --- a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f +++ b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f @@ -299,15 +299,12 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_an, (ao_num, ao_num, n_points_final_grid)] +BEGIN_PROVIDER [double precision, v_ij_u_cst_mu_j1b_an_old, (ao_num, ao_num, n_points_final_grid)] BEGIN_DOC ! ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2) u(mu, r12) ! - ! TODO - ! one subroutine for all integrals - ! END_DOC include 'constants.include.F' @@ -325,7 +322,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_an, (ao_num, ao_num, n_poin double precision, external :: overlap_gauss_r12_ao_with1s double precision, external :: NAI_pol_mult_erf_ao_with1s - print*, ' providing v_ij_u_cst_mu_j1b_an ...' + print*, ' providing v_ij_u_cst_mu_j1b_an_old ...' call wall_time(wall0) provide mu_erf final_grid_points j1b_pen @@ -333,7 +330,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_an, (ao_num, ao_num, n_poin ct = inv_sq_pi_2 / mu_erf - v_ij_u_cst_mu_j1b_an = 0.d0 + v_ij_u_cst_mu_j1b_an_old = 0.d0 !$OMP PARALLEL DEFAULT (NONE) & !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, & @@ -342,7 +339,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_an, (ao_num, ao_num, n_poin !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b2_size, & !$OMP final_grid_points, mu_erf, ct, & !$OMP List_all_comb_b2_coef, List_all_comb_b2_expo, & - !$OMP List_all_comb_b2_cent, v_ij_u_cst_mu_j1b_an) + !$OMP List_all_comb_b2_cent, v_ij_u_cst_mu_j1b_an_old) !$OMP DO do ipoint = 1, n_points_final_grid @@ -413,6 +410,125 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_an, (ao_num, ao_num, n_poin ! --- + v_ij_u_cst_mu_j1b_an_old(j,i,ipoint) = tmp + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + do ipoint = 1, n_points_final_grid + do i = 2, ao_num + do j = 1, i-1 + v_ij_u_cst_mu_j1b_an_old(j,i,ipoint) = v_ij_u_cst_mu_j1b_an_old(i,j,ipoint) + enddo + enddo + enddo + + call wall_time(wall1) + print*, ' wall time for v_ij_u_cst_mu_j1b_an_old', wall1 - wall0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, v_ij_u_cst_mu_j1b_an, (ao_num, ao_num, n_points_final_grid)] + + BEGIN_DOC + ! + ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2) u(mu, r12) + ! + END_DOC + + include 'constants.include.F' + + implicit none + integer :: i, j, ipoint, i_1s + double precision :: r(3), r1_2 + double precision :: int_o + double precision :: int_c(7), int_e(7) + double precision :: coef, beta, B_center(3) + double precision :: tmp, ct + double precision :: wall0, wall1 + + double precision, external :: overlap_gauss_r12_ao_with1s + double precision, external :: NAI_pol_mult_erf_ao_with1s + + print*, ' providing v_ij_u_cst_mu_j1b_an ...' + call wall_time(wall0) + + provide mu_erf final_grid_points j1b_pen + PROVIDE List_all_comb_b2_size List_all_comb_b2_coef List_all_comb_b2_expo List_all_comb_b2_cent + + ct = inv_sq_pi_2 / mu_erf + + v_ij_u_cst_mu_j1b_an = 0.d0 + + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, & + !$OMP r1_2, tmp, int_c, int_e, int_o) & + !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b2_size, & + !$OMP final_grid_points, mu_erf, ct, & + !$OMP List_all_comb_b2_coef, List_all_comb_b2_expo, & + !$OMP List_all_comb_b2_cent, v_ij_u_cst_mu_j1b_an) + !$OMP DO + do ipoint = 1, n_points_final_grid + + r(1) = final_grid_points(1,ipoint) + r(2) = final_grid_points(2,ipoint) + r(3) = final_grid_points(3,ipoint) + r1_2 = 0.5d0 * (r(1)*r(1) + r(2)*r(2) + r(3)*r(3)) + + do i = 1, ao_num + do j = i, ao_num + + ! --- + + coef = List_all_comb_b2_coef (1) + beta = List_all_comb_b2_expo (1) + B_center(1) = List_all_comb_b2_cent(1,1) + B_center(2) = List_all_comb_b2_cent(2,1) + B_center(3) = List_all_comb_b2_cent(3,1) + + call NAI_pol_012_mult_erf_ao_with1s(i, j, beta, B_center, 1.d+9, r, int_c) + call NAI_pol_012_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, int_e) + + int_o = overlap_gauss_r12_ao_with1s(B_center, beta, r, mu_erf*mu_erf, i, j) + + tmp = coef & + * ( r1_2 * (int_c(1) - int_e(1)) & + - r(1) * (int_c(2) - int_e(2)) - r(2) * (int_c(3) - int_e(3)) - r(3) * (int_c(4) - int_e(4)) & + + 0.5d0 * (int_c(5) + int_c(6) + int_c(7) - int_e(5) - int_e(6) - int_e(7)) & + - ct * int_o & + ) + + ! --- + + do i_1s = 2, List_all_comb_b2_size + + coef = List_all_comb_b2_coef (i_1s) + if(dabs(coef) .lt. 1d-15) cycle ! beta = 0.0 + beta = List_all_comb_b2_expo (i_1s) + B_center(1) = List_all_comb_b2_cent(1,i_1s) + B_center(2) = List_all_comb_b2_cent(2,i_1s) + B_center(3) = List_all_comb_b2_cent(3,i_1s) + + call NAI_pol_012_mult_erf_ao_with1s(i, j, beta, B_center, 1.d+9, r, int_c) + call NAI_pol_012_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, int_e) + + int_o = overlap_gauss_r12_ao_with1s(B_center, beta, r, mu_erf*mu_erf, i, j) + + tmp = tmp + coef & + * ( r1_2 * (int_c(1) - int_e(1)) & + - r(1) * (int_c(2) - int_e(2)) - r(2) * (int_c(3) - int_e(3)) - r(3) * (int_c(4) - int_e(4)) & + + 0.5d0 * (int_c(5) + int_c(6) + int_c(7) - int_e(5) - int_e(6) - int_e(7)) & + - ct * int_o & + ) + + enddo + + ! --- + v_ij_u_cst_mu_j1b_an(j,i,ipoint) = tmp enddo enddo @@ -434,4 +550,3 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_an, (ao_num, ao_num, n_poin END_PROVIDER ! --- - diff --git a/src/non_h_ints_mu/test_non_h_ints.irp.f b/src/non_h_ints_mu/test_non_h_ints.irp.f index aff53c2d..e7718a40 100644 --- a/src/non_h_ints_mu/test_non_h_ints.irp.f +++ b/src/non_h_ints_mu/test_non_h_ints.irp.f @@ -14,7 +14,9 @@ program test_non_h !call routine_grad_squared() !call routine_fit() - call test_ipp() + !call test_ipp() + + call test_v_ij_u_cst_mu_j1b_an() end ! --- @@ -545,9 +547,43 @@ end subroutine grad1_aos_ik_grad1_esquare ! --- - - - +subroutine test_v_ij_u_cst_mu_j1b_an() + + implicit none + integer :: i, j, ipoint + double precision :: I_old, I_new + double precision :: norm, accu, thr, diff + + PROVIDE v_ij_u_cst_mu_j1b_an_old v_ij_u_cst_mu_j1b_an + + thr = 1d-12 + norm = 0.d0 + accu = 0.d0 + do ipoint = 1, n_points_final_grid + do i = 1, ao_num + do j = 1, ao_num + + I_old = v_ij_u_cst_mu_j1b_an_old(j,i,ipoint) + I_new = v_ij_u_cst_mu_j1b_an (j,i,ipoint) + + diff = dabs(I_new-I_old) + if(diff .gt. thr) then + print *, ' problem on:', j, i, ipoint + print *, ' old value :', I_old + print *, ' new value :', I_new + stop + endif + + accu += diff + norm += dabs(I_old) + enddo + enddo + enddo + + print*, ' accuracy(%) = ', 100.d0 * accu / norm + + return +end subroutine test_v_ij_u_cst_mu_j1b_an() From 7670941650011edb515b8c466725d0c59c479554 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 30 Aug 2023 15:21:21 +0200 Subject: [PATCH 265/337] sleep --- src/scf_utils/roothaan_hall_scf.irp.f | 1 + 1 file changed, 1 insertion(+) diff --git a/src/scf_utils/roothaan_hall_scf.irp.f b/src/scf_utils/roothaan_hall_scf.irp.f index cf006035..730cb496 100644 --- a/src/scf_utils/roothaan_hall_scf.irp.f +++ b/src/scf_utils/roothaan_hall_scf.irp.f @@ -199,6 +199,7 @@ END_DOC write(6,*) if (converged) then write(6,*) 'SCF converged' + call sleep(1) ! When too fast, the MOs aren't saved. endif From 5ca864b7b08ca29453cb00dc482ffed77e20b840 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Wed, 30 Aug 2023 18:49:09 +0200 Subject: [PATCH 266/337] DGEMM for fock_3e_uhf_mo_cs: OK --- src/tc_scf/fock_3e_bi_ortho_uhf.irp.f | 364 +++++++++++++++++++++++++- src/tc_scf/test_int.irp.f | 46 +++- 2 files changed, 402 insertions(+), 8 deletions(-) diff --git a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f index 3e624941..5d663480 100644 --- a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f +++ b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f @@ -3,6 +3,356 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] + implicit none + integer :: a, b, i, j, ipoint + double precision :: ti, tf + double precision :: loc_1, loc_2 + double precision, allocatable :: tmpval_1(:), tmpvec_1(:,:) + double precision, allocatable :: tmpval_omp(:), tmpvec_omp(:,:), tmpten_omp(:,:,:) + double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:) + double precision, allocatable :: tmp_3(:,:,:), tmp_4(:,:,:) + + PROVIDE mo_l_coef mo_r_coef + + print *, ' PROVIDING fock_3e_uhf_mo_cs ...' + call wall_time(ti) + + ! --- + + allocate(tmpvec_1(n_points_final_grid,3), tmpval_1(n_points_final_grid)) + tmpvec_1 = 0.d0 + tmpval_1 = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, tmpval_omp, tmpvec_omp) & + !$OMP SHARED (n_points_final_grid, elec_beta_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmpval_1, tmpvec_1) + + allocate(tmpvec_omp(n_points_final_grid,3), tmpval_omp(n_points_final_grid)) + tmpvec_omp = 0.d0 + tmpval_omp = 0.d0 + + !$OMP DO + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmpvec_omp(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmpvec_omp(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmpvec_omp(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) + tmpval_omp(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + tmpvec_1(ipoint,1) += tmpvec_omp(ipoint,1) + tmpvec_1(ipoint,2) += tmpvec_omp(ipoint,2) + tmpvec_1(ipoint,3) += tmpvec_omp(ipoint,3) + tmpval_1(ipoint) += tmpval_omp(ipoint) + enddo + !$OMP END CRITICAL + + deallocate(tmpvec_omp, tmpval_omp) + + !$OMP END PARALLEL + + ! --- + + allocate(tmp_1(n_points_final_grid,5)) + tmp_1 = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, loc_1) & + !$OMP SHARED (n_points_final_grid, tmpval_1, tmpvec_1, tmp_1) + !$OMP DO + do ipoint = 1, n_points_final_grid + + loc_1 = -4.d0 * tmpval_1(ipoint) + + tmp_1(ipoint,1) = loc_1 * tmpvec_1(ipoint,1) + tmp_1(ipoint,2) = loc_1 * tmpvec_1(ipoint,2) + tmp_1(ipoint,3) = loc_1 * tmpvec_1(ipoint,3) + + tmp_1(ipoint,4) = -2.d0 * ( tmpvec_1(ipoint,1) * tmpvec_1(ipoint,1) & + + tmpvec_1(ipoint,2) * tmpvec_1(ipoint,2) & + + tmpvec_1(ipoint,3) * tmpvec_1(ipoint,3) ) + + tmp_1(ipoint,5) = tmpval_1(ipoint) + enddo + !$OMP END DO + !$OMP END PARALLEL + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, loc_1, tmpvec_omp) & + !$OMP SHARED (n_points_final_grid, elec_beta_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_1) + + allocate(tmpvec_omp(n_points_final_grid,4)) + tmpvec_omp = 0.d0 + + !$OMP DO + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_omp(ipoint,1) += 2.d0 * loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) + tmpvec_omp(ipoint,2) += 2.d0 * loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) + tmpvec_omp(ipoint,3) += 2.d0 * loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) + tmpvec_omp(ipoint,4) += ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + tmp_1(ipoint,1) += tmpvec_omp(ipoint,1) + tmp_1(ipoint,2) += tmpvec_omp(ipoint,2) + tmp_1(ipoint,3) += tmpvec_omp(ipoint,3) + tmp_1(ipoint,4) += tmpvec_omp(ipoint,4) + enddo + !$OMP END CRITICAL + + deallocate(tmpvec_omp) + !$OMP END PARALLEL + + ! --- + + allocate(tmp_2(n_points_final_grid,5,mo_num,mo_num)) + tmp_2 = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b) & + !$OMP SHARED (n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp_2) + !$OMP DO + do a = 1, mo_num + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) + tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) + tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) + tmp_2(ipoint,4,b,a) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) * mos_r_in_r_array_transp(ipoint,a) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b, i, tmpten_omp) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP tmp_2) + + allocate(tmpten_omp(n_points_final_grid,mo_num,mo_num)) + tmpten_omp = 0.d0 + + !$OMP DO + do a = 1, mo_num + do b = 1, mo_num + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmpten_omp(ipoint,b,a) += 2.d0 * final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + enddo + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do a = 1, mo_num + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,5,b,a) += tmpten_omp(ipoint,b,a) + enddo + enddo + enddo + !$OMP END CRITICAL + + deallocate(tmpten_omp) + + !$OMP END PARALLEL + + ! --- + + call dgemv( 'T', 5*n_points_final_grid, mo_num*mo_num, 1.d0 & + , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & + , tmp_1(1,1), 1 & + , 0.d0, fock_3e_uhf_mo_cs(1,1), 1) + + deallocate(tmp_1, tmp_2) + + ! --- + + allocate(tmp_3(n_points_final_grid,7,mo_num), tmp_4(n_points_final_grid,7,mo_num)) + tmp_3 = 0.d0 + tmp_4 = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, loc_1, loc_2) & + !$OMP SHARED (n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP final_weight_at_r_vector, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) + loc_2 = mos_r_in_r_array_transp(ipoint,b) + + tmp_3(ipoint,2,b) = loc_1 + tmp_3(ipoint,7,b) = loc_1 + + tmp_4(ipoint,1,b) = loc_2 + tmp_4(ipoint,6,b) = loc_2 + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2, tmpten_omp) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP tmpvec_1, tmp_3, tmp_4) + + allocate(tmpten_omp(n_points_final_grid,8,mo_num)) + tmpten_omp = 0.d0 + + !$OMP DO + do b = 1, mo_num + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_2 = mos_r_in_r_array_transp(ipoint,i) + + tmpten_omp(ipoint,1,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) + tmpten_omp(ipoint,2,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) + tmpten_omp(ipoint,3,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) + tmpten_omp(ipoint,4,b) += 2.d0 * loc_1 * ( tmpvec_1(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & + + tmpvec_1(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & + + tmpvec_1(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) + + tmpten_omp(ipoint,5,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) + tmpten_omp(ipoint,6,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) + tmpten_omp(ipoint,7,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) + tmpten_omp(ipoint,8,b) += 2.d0 * loc_2 * ( tmpvec_1(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + tmpvec_1(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + tmpvec_1(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp_3(ipoint,3,b) += tmpten_omp(ipoint,1,b) + tmp_3(ipoint,4,b) += tmpten_omp(ipoint,2,b) + tmp_3(ipoint,5,b) += tmpten_omp(ipoint,3,b) + tmp_3(ipoint,6,b) += tmpten_omp(ipoint,4,b) + + tmp_4(ipoint,3,b) += tmpten_omp(ipoint,5,b) + tmp_4(ipoint,4,b) += tmpten_omp(ipoint,6,b) + tmp_4(ipoint,5,b) += tmpten_omp(ipoint,7,b) + tmp_4(ipoint,7,b) += tmpten_omp(ipoint,8,b) + enddo + enddo + !$OMP END CRITICAL + + deallocate(tmpten_omp) + + !$OMP END PARALLEL + + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, tmpten_omp) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP tmp_3, tmp_4) + + allocate(tmpten_omp(n_points_final_grid,2,mo_num)) + tmpten_omp = 0.d0 + + !$OMP DO + do b = 1, mo_num + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) + loc_2 = mos_r_in_r_array_transp(ipoint,i) + + tmpten_omp(ipoint,1,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) + + tmpten_omp(ipoint,2,b) -= loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) + enddo + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp_3(ipoint,1,b) += tmpten_omp(ipoint,1,b) + tmp_4(ipoint,2,b) += tmpten_omp(ipoint,2,b) + enddo + enddo + !$OMP END CRITICAL + + deallocate(tmpten_omp) + + !$OMP END PARALLEL + + ! --- + + call dgemm( 'T', 'N', mo_num, mo_num, 7*n_points_final_grid, 1.d0 & + , tmp_3(1,1,1), 7*n_points_final_grid & + , tmp_4(1,1,1), 7*n_points_final_grid & + , 1.d0, fock_3e_uhf_mo_cs(1,1), mo_num) + + deallocate(tmp_3, tmp_4) + + ! --- + + call wall_time(tf) + print *, ' total Wall time for fock_3e_uhf_mo_cs =', tf - ti + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs_old, (mo_num, mo_num)] + implicit none integer :: a, b, i, j double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia @@ -12,14 +362,14 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] PROVIDE mo_l_coef mo_r_coef call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) - !print *, ' PROVIDING fock_3e_uhf_mo_cs ...' - !call wall_time(ti) + print *, ' PROVIDING fock_3e_uhf_mo_cs_old ...' + call wall_time(ti) - fock_3e_uhf_mo_cs = 0.d0 + fock_3e_uhf_mo_cs_old = 0.d0 !$OMP PARALLEL DEFAULT (NONE) & !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & - !$OMP SHARED (mo_num, elec_beta_num, fock_3e_uhf_mo_cs) + !$OMP SHARED (mo_num, elec_beta_num, fock_3e_uhf_mo_cs_old) allocate(tmp(mo_num,mo_num)) tmp = 0.d0 @@ -54,7 +404,7 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] !$OMP CRITICAL do a = 1, mo_num do b = 1, mo_num - fock_3e_uhf_mo_cs(b,a) += tmp(b,a) + fock_3e_uhf_mo_cs_old(b,a) += tmp(b,a) enddo enddo !$OMP END CRITICAL @@ -62,8 +412,8 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] deallocate(tmp) !$OMP END PARALLEL - !call wall_time(tf) - !print *, ' total Wall time for fock_3e_uhf_mo_cs =', tf - ti + call wall_time(tf) + print *, ' total Wall time for fock_3e_uhf_mo_cs_old =', tf - ti END_PROVIDER diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f index 649d0f3e..d7780497 100644 --- a/src/tc_scf/test_int.irp.f +++ b/src/tc_scf/test_int.irp.f @@ -54,7 +54,10 @@ program test_ints !!PROVIDE TC_HF_energy VARTC_HF_energy !!print *, ' TC_HF_energy = ', TC_HF_energy !!print *, ' VARTC_HF_energy = ', VARTC_HF_energy - call test_old_ints +! call test_old_ints + + call test_fock_3e_uhf_mo_cs() + end ! --- @@ -1096,3 +1099,44 @@ subroutine test_int2_grad1_u12_ao_test print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 end + +! --- + +subroutine test_fock_3e_uhf_mo_cs() + + implicit none + integer :: i, j + double precision :: I_old, I_new + double precision :: diff_tot, diff, thr_ih, norm + + PROVIDE fock_3e_uhf_mo_cs fock_3e_uhf_mo_cs_old + + thr_ih = 1d-10 + norm = 0.d0 + diff_tot = 0.d0 + + do i = 1, mo_num + do j = 1, mo_num + + I_old = fock_3e_uhf_mo_cs_old(j,i) + I_new = fock_3e_uhf_mo_cs (j,i) + + diff = dabs(I_old - I_new) + if(diff .gt. thr_ih) then + print *, ' problem on ', j, i + print *, ' old value = ', I_old + print *, ' new value = ', I_new + stop + endif + + norm += dabs(I_old) + diff_tot += diff + enddo + enddo + + print *, ' diff tot (%) = ', 100.d0 * diff_tot / norm + + return +end subroutine test_fock_3e_uhf_mo_cs + + From 024d9019bb470916f55c8a599208e49e44531ac7 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Thu, 31 Aug 2023 17:45:12 +0200 Subject: [PATCH 267/337] F(3e) CS optim --- src/tc_scf/fock_3e_bi_ortho_uhf.irp.f | 256 +++++++++----------------- 1 file changed, 92 insertions(+), 164 deletions(-) diff --git a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f index 5d663480..ce343f9b 100644 --- a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f +++ b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f @@ -6,9 +6,9 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] implicit none integer :: a, b, i, j, ipoint double precision :: ti, tf - double precision :: loc_1, loc_2 - double precision, allocatable :: tmpval_1(:), tmpvec_1(:,:) - double precision, allocatable :: tmpval_omp(:), tmpvec_omp(:,:), tmpten_omp(:,:,:) + double precision :: loc_1, loc_2, loc_3 + double precision, allocatable :: Okappa(:), Jkappa(:,:) + double precision, allocatable :: tmp_omp_d1(:), tmp_omp_d2(:,:) double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:) double precision, allocatable :: tmp_3(:,:,:), tmp_4(:,:,:) @@ -19,96 +19,81 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] ! --- - allocate(tmpvec_1(n_points_final_grid,3), tmpval_1(n_points_final_grid)) - tmpvec_1 = 0.d0 - tmpval_1 = 0.d0 + allocate(Jkappa(n_points_final_grid,3), Okappa(n_points_final_grid)) + Jkappa = 0.d0 + Okappa = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, tmpval_omp, tmpvec_omp) & + !$OMP PRIVATE (ipoint, i, tmp_omp_d1, tmp_omp_d2) & !$OMP SHARED (n_points_final_grid, elec_beta_num, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmpval_1, tmpvec_1) + !$OMP int2_grad1_u12_bimo_t, Okappa, Jkappa) - allocate(tmpvec_omp(n_points_final_grid,3), tmpval_omp(n_points_final_grid)) - tmpvec_omp = 0.d0 - tmpval_omp = 0.d0 + allocate(tmp_omp_d2(n_points_final_grid,3), tmp_omp_d1(n_points_final_grid)) + tmp_omp_d2 = 0.d0 + tmp_omp_d1 = 0.d0 !$OMP DO do i = 1, elec_beta_num do ipoint = 1, n_points_final_grid - tmpvec_omp(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) - tmpvec_omp(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) - tmpvec_omp(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) - tmpval_omp(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) + tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) enddo enddo !$OMP END DO NOWAIT !$OMP CRITICAL do ipoint = 1, n_points_final_grid - tmpvec_1(ipoint,1) += tmpvec_omp(ipoint,1) - tmpvec_1(ipoint,2) += tmpvec_omp(ipoint,2) - tmpvec_1(ipoint,3) += tmpvec_omp(ipoint,3) - tmpval_1(ipoint) += tmpval_omp(ipoint) + Jkappa(ipoint,1) += tmp_omp_d2(ipoint,1) + Jkappa(ipoint,2) += tmp_omp_d2(ipoint,2) + Jkappa(ipoint,3) += tmp_omp_d2(ipoint,3) + Okappa(ipoint) += tmp_omp_d1(ipoint) enddo !$OMP END CRITICAL - deallocate(tmpvec_omp, tmpval_omp) + deallocate(tmp_omp_d2, tmp_omp_d1) !$OMP END PARALLEL ! --- - allocate(tmp_1(n_points_final_grid,5)) - tmp_1 = 0.d0 + allocate(tmp_1(n_points_final_grid,4)) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, loc_1) & - !$OMP SHARED (n_points_final_grid, tmpval_1, tmpvec_1, tmp_1) - !$OMP DO do ipoint = 1, n_points_final_grid - loc_1 = -4.d0 * tmpval_1(ipoint) + loc_1 = 2.d0 * Okappa(ipoint) - tmp_1(ipoint,1) = loc_1 * tmpvec_1(ipoint,1) - tmp_1(ipoint,2) = loc_1 * tmpvec_1(ipoint,2) - tmp_1(ipoint,3) = loc_1 * tmpvec_1(ipoint,3) + tmp_1(ipoint,1) = loc_1 * Jkappa(ipoint,1) + tmp_1(ipoint,2) = loc_1 * Jkappa(ipoint,2) + tmp_1(ipoint,3) = loc_1 * Jkappa(ipoint,3) - tmp_1(ipoint,4) = -2.d0 * ( tmpvec_1(ipoint,1) * tmpvec_1(ipoint,1) & - + tmpvec_1(ipoint,2) * tmpvec_1(ipoint,2) & - + tmpvec_1(ipoint,3) * tmpvec_1(ipoint,3) ) - - tmp_1(ipoint,5) = tmpval_1(ipoint) + tmp_1(ipoint,4) = Okappa(ipoint) enddo - !$OMP END DO - !$OMP END PARALLEL !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, loc_1, tmpvec_omp) & + !$OMP PRIVATE (ipoint, i, j, loc_1, tmp_omp_d2) & !$OMP SHARED (n_points_final_grid, elec_beta_num, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & !$OMP int2_grad1_u12_bimo_t, tmp_1) - allocate(tmpvec_omp(n_points_final_grid,4)) - tmpvec_omp = 0.d0 + allocate(tmp_omp_d2(n_points_final_grid,3)) + tmp_omp_d2 = 0.d0 - !$OMP DO + !$OMP DO COLLAPSE(2) do i = 1, elec_beta_num do j = 1, elec_beta_num do ipoint = 1, n_points_final_grid loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_omp(ipoint,1) += 2.d0 * loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) - tmpvec_omp(ipoint,2) += 2.d0 * loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) - tmpvec_omp(ipoint,3) += 2.d0 * loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) - tmpvec_omp(ipoint,4) += ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + tmp_omp_d2(ipoint,1) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) + tmp_omp_d2(ipoint,2) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) + tmp_omp_d2(ipoint,3) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) enddo enddo enddo @@ -116,19 +101,18 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] !$OMP CRITICAL do ipoint = 1, n_points_final_grid - tmp_1(ipoint,1) += tmpvec_omp(ipoint,1) - tmp_1(ipoint,2) += tmpvec_omp(ipoint,2) - tmp_1(ipoint,3) += tmpvec_omp(ipoint,3) - tmp_1(ipoint,4) += tmpvec_omp(ipoint,4) + tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) + tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) + tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) enddo !$OMP END CRITICAL - deallocate(tmpvec_omp) + deallocate(tmp_omp_d2) !$OMP END PARALLEL ! --- - allocate(tmp_2(n_points_final_grid,5,mo_num,mo_num)) + allocate(tmp_2(n_points_final_grid,4,mo_num,mo_num)) tmp_2 = 0.d0 !$OMP PARALLEL & @@ -138,14 +122,13 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & !$OMP tmp_2) - !$OMP DO + !$OMP DO COLLAPSE(2) do a = 1, mo_num do b = 1, mo_num do ipoint = 1, n_points_final_grid tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) - tmp_2(ipoint,4,b,a) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) * mos_r_in_r_array_transp(ipoint,a) enddo enddo enddo @@ -154,75 +137,56 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, a, b, i, tmpten_omp) & + !$OMP PRIVATE (ipoint, a, b, i) & !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & !$OMP tmp_2) - - allocate(tmpten_omp(n_points_final_grid,mo_num,mo_num)) - tmpten_omp = 0.d0 - - !$OMP DO + !$OMP DO COLLAPSE(2) do a = 1, mo_num do b = 1, mo_num + tmp_2(:,4,b,a) = 0.d0 do i = 1, elec_beta_num do ipoint = 1, n_points_final_grid - tmpten_omp(ipoint,b,a) += 2.d0 * final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + tmp_2(ipoint,4,b,a) -= final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) enddo enddo enddo enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do a = 1, mo_num - do b = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp_2(ipoint,5,b,a) += tmpten_omp(ipoint,b,a) - enddo - enddo - enddo - !$OMP END CRITICAL - - deallocate(tmpten_omp) - + !$OMP END DO !$OMP END PARALLEL ! --- - - call dgemv( 'T', 5*n_points_final_grid, mo_num*mo_num, 1.d0 & - , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & - , tmp_1(1,1), 1 & + + call dgemv( 'T', 4*n_points_final_grid, mo_num*mo_num, -2.d0 & + , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & + , tmp_1(1,1), 1 & , 0.d0, fock_3e_uhf_mo_cs(1,1), 1) deallocate(tmp_1, tmp_2) ! --- - allocate(tmp_3(n_points_final_grid,7,mo_num), tmp_4(n_points_final_grid,7,mo_num)) - tmp_3 = 0.d0 - tmp_4 = 0.d0 + allocate(tmp_3(n_points_final_grid,5,mo_num), tmp_4(n_points_final_grid,5,mo_num)) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (ipoint, b, loc_1, loc_2) & !$OMP SHARED (n_points_final_grid, mo_num, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP final_weight_at_r_vector, tmp_3, tmp_4) + !$OMP final_weight_at_r_vector, Jkappa, tmp_3, tmp_4) !$OMP DO do b = 1, mo_num + tmp_3(:,:,b) = 0.d0 + tmp_4(:,:,b) = 0.d0 do ipoint = 1, n_points_final_grid + tmp_3(ipoint,1,b) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) - loc_2 = mos_r_in_r_array_transp(ipoint,b) - - tmp_3(ipoint,2,b) = loc_1 - tmp_3(ipoint,7,b) = loc_1 - - tmp_4(ipoint,1,b) = loc_2 - tmp_4(ipoint,6,b) = loc_2 + tmp_4(ipoint,1,b) = -2.d0 * mos_r_in_r_array_transp(ipoint,b) * ( Jkappa(ipoint,1) * Jkappa(ipoint,1) & + + Jkappa(ipoint,2) * Jkappa(ipoint,2) & + + Jkappa(ipoint,3) * Jkappa(ipoint,3) ) + tmp_4(ipoint,5,b) = mos_r_in_r_array_transp(ipoint,b) enddo enddo !$OMP END DO @@ -230,15 +194,11 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2, tmpten_omp) & + !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2) & !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP tmpvec_1, tmp_3, tmp_4) - - allocate(tmpten_omp(n_points_final_grid,8,mo_num)) - tmpten_omp = 0.d0 - + !$OMP Jkappa, tmp_3, tmp_4) !$OMP DO do b = 1, mo_num do i = 1, elec_beta_num @@ -247,57 +207,32 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) loc_2 = mos_r_in_r_array_transp(ipoint,i) - tmpten_omp(ipoint,1,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) - tmpten_omp(ipoint,2,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) - tmpten_omp(ipoint,3,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) - tmpten_omp(ipoint,4,b) += 2.d0 * loc_1 * ( tmpvec_1(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & - + tmpvec_1(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & - + tmpvec_1(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) - - tmpten_omp(ipoint,5,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) - tmpten_omp(ipoint,6,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) - tmpten_omp(ipoint,7,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) - tmpten_omp(ipoint,8,b) += 2.d0 * loc_2 * ( tmpvec_1(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & - + tmpvec_1(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & - + tmpvec_1(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + tmp_3(ipoint,2,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) + tmp_3(ipoint,3,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) + tmp_3(ipoint,4,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) + tmp_3(ipoint,5,b) += 2.d0 * loc_1 * ( Jkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & + + Jkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & + + Jkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) + + tmp_4(ipoint,2,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) + tmp_4(ipoint,3,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) + tmp_4(ipoint,4,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) + tmp_4(ipoint,1,b) += 2.d0 * loc_2 * ( Jkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + Jkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + Jkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) enddo enddo enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do b = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp_3(ipoint,3,b) += tmpten_omp(ipoint,1,b) - tmp_3(ipoint,4,b) += tmpten_omp(ipoint,2,b) - tmp_3(ipoint,5,b) += tmpten_omp(ipoint,3,b) - tmp_3(ipoint,6,b) += tmpten_omp(ipoint,4,b) - - tmp_4(ipoint,3,b) += tmpten_omp(ipoint,5,b) - tmp_4(ipoint,4,b) += tmpten_omp(ipoint,6,b) - tmp_4(ipoint,5,b) += tmpten_omp(ipoint,7,b) - tmp_4(ipoint,7,b) += tmpten_omp(ipoint,8,b) - enddo - enddo - !$OMP END CRITICAL - - deallocate(tmpten_omp) - + !$OMP END DO !$OMP END PARALLEL - - !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, tmpten_omp) & + !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, loc_3) & !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & !$OMP tmp_3, tmp_4) - - allocate(tmpten_omp(n_points_final_grid,2,mo_num)) - tmpten_omp = 0.d0 - !$OMP DO do b = 1, mo_num do i = 1, elec_beta_num @@ -305,42 +240,35 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] do ipoint = 1, n_points_final_grid loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) - loc_2 = mos_r_in_r_array_transp(ipoint,i) + loc_2 = mos_r_in_r_array_transp(ipoint,b) + loc_3 = mos_r_in_r_array_transp(ipoint,i) - tmpten_omp(ipoint,1,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) + tmp_3(ipoint,5,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) - tmpten_omp(ipoint,2,b) -= loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) + tmp_4(ipoint,1,b) += ( loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) & + - loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) ) enddo enddo enddo enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do b = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp_3(ipoint,1,b) += tmpten_omp(ipoint,1,b) - tmp_4(ipoint,2,b) += tmpten_omp(ipoint,2,b) - enddo - enddo - !$OMP END CRITICAL - - deallocate(tmpten_omp) - + !$OMP END DO !$OMP END PARALLEL ! --- - call dgemm( 'T', 'N', mo_num, mo_num, 7*n_points_final_grid, 1.d0 & - , tmp_3(1,1,1), 7*n_points_final_grid & - , tmp_4(1,1,1), 7*n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num, 5*n_points_final_grid, 1.d0 & + , tmp_3(1,1,1), 5*n_points_final_grid & + , tmp_4(1,1,1), 5*n_points_final_grid & , 1.d0, fock_3e_uhf_mo_cs(1,1), mo_num) deallocate(tmp_3, tmp_4) + deallocate(Jkappa, Okappa) ! --- From e0a11ad21db49d19e8395e650e0db8c6189096a7 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Fri, 1 Sep 2023 09:34:54 +0200 Subject: [PATCH 268/337] alpha part: 1st and 2nd terms of OS --- src/tc_scf/fock_3e_bi_ortho_uhf.irp.f | 453 +++++++++++++++++++++++++- src/tc_scf/fock_tc.irp.f | 26 -- src/tc_scf/fock_three_bi_ortho.irp.f | 2 +- src/tc_scf/test_int.irp.f | 89 ++++- 4 files changed, 525 insertions(+), 45 deletions(-) diff --git a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f index ce343f9b..3a68ffc6 100644 --- a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f +++ b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f @@ -113,7 +113,6 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] ! --- allocate(tmp_2(n_points_final_grid,4,mo_num,mo_num)) - tmp_2 = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & @@ -347,7 +346,7 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] +BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a_old, (mo_num, mo_num)] BEGIN_DOC ! @@ -366,17 +365,18 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] PROVIDE mo_l_coef mo_r_coef PROVIDE fock_3e_uhf_mo_cs - !print *, ' Providing fock_3e_uhf_mo_a ...' - !call wall_time(ti) + print *, ' Providing fock_3e_uhf_mo_a_old ...' + call wall_time(ti) o = elec_beta_num + 1 call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) - fock_3e_uhf_mo_a = fock_3e_uhf_mo_cs + PROVIDE fock_3e_uhf_mo_cs + fock_3e_uhf_mo_a_old = fock_3e_uhf_mo_cs !$OMP PARALLEL DEFAULT (NONE) & !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & - !$OMP SHARED (mo_num, o, elec_alpha_num, elec_beta_num, fock_3e_uhf_mo_a) + !$OMP SHARED (mo_num, o, elec_alpha_num, elec_beta_num, fock_3e_uhf_mo_a_old) allocate(tmp(mo_num,mo_num)) tmp = 0.d0 @@ -460,7 +460,7 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] !$OMP CRITICAL do a = 1, mo_num do b = 1, mo_num - fock_3e_uhf_mo_a(b,a) += tmp(b,a) + fock_3e_uhf_mo_a_old(b,a) += tmp(b,a) enddo enddo !$OMP END CRITICAL @@ -468,19 +468,21 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] deallocate(tmp) !$OMP END PARALLEL - !call wall_time(tf) - !print *, ' Wall time for fock_3e_uhf_mo_a =', tf - ti + call wall_time(tf) + print *, ' Wall time for fock_3e_uhf_mo_a_old =', tf - ti END_PROVIDER ! --- -BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)] +BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b_old, (mo_num, mo_num)] BEGIN_DOC + ! ! BETA part of the Fock matrix from three-electron terms ! ! WARNING :: non hermitian if bi-ortho MOS used + ! END_DOC implicit none @@ -491,17 +493,18 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)] PROVIDE mo_l_coef mo_r_coef - !print *, ' PROVIDING fock_3e_uhf_mo_b ...' - !call wall_time(ti) + print *, ' PROVIDING fock_3e_uhf_mo_b_old ...' + call wall_time(ti) o = elec_beta_num + 1 call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) - fock_3e_uhf_mo_b = fock_3e_uhf_mo_cs + PROVIDE fock_3e_uhf_mo_cs_old + fock_3e_uhf_mo_b_old = fock_3e_uhf_mo_cs_old !$OMP PARALLEL DEFAULT (NONE) & !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & - !$OMP SHARED (mo_num, o, elec_alpha_num, elec_beta_num, fock_3e_uhf_mo_b) + !$OMP SHARED (mo_num, o, elec_alpha_num, elec_beta_num, fock_3e_uhf_mo_b_old) allocate(tmp(mo_num,mo_num)) tmp = 0.d0 @@ -575,7 +578,7 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)] !$OMP CRITICAL do a = 1, mo_num do b = 1, mo_num - fock_3e_uhf_mo_b(b,a) += tmp(b,a) + fock_3e_uhf_mo_b_old(b,a) += tmp(b,a) enddo enddo !$OMP END CRITICAL @@ -583,8 +586,8 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)] deallocate(tmp) !$OMP END PARALLEL - !call wall_time(tf) - !print *, ' total Wall time for fock_3e_uhf_mo_b =', tf - ti + call wall_time(tf) + print *, ' total Wall time for fock_3e_uhf_mo_b_old =', tf - ti END_PROVIDER @@ -760,5 +763,421 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_b, (ao_num, ao_num)] END_PROVIDER +! --- + + BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] +&BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)] + + BEGIN_DOC + ! + ! Open Shell part of the Fock matrix from three-electron terms + ! + ! WARNING :: non hermitian if bi-ortho MOS used + ! + END_DOC + + implicit none + integer :: a, b, i, j, ipoint, o + double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia + double precision :: loc_1, loc_2, loc_3, loc_4 + double precision :: ti, tf + double precision, allocatable :: tmp(:,:) + double precision, allocatable :: Okappa(:), Jkappa(:,:), Obarkappa(:), Jbarkappa(:,:) + double precision, allocatable :: tmp_omp_d1(:), tmp_omp_d2(:,:) + double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:) + double precision, allocatable :: tmp_3(:,:,:), tmp_4(:,:,:) + + PROVIDE mo_l_coef mo_r_coef + PROVIDE fock_3e_uhf_mo_cs + + print *, ' Providing fock_3e_uhf_mo_a and fock_3e_uhf_mo_b ...' + call wall_time(ti) + + o = elec_beta_num + 1 + call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) + + PROVIDE fock_3e_uhf_mo_cs + fock_3e_uhf_mo_a = fock_3e_uhf_mo_cs + fock_3e_uhf_mo_b = fock_3e_uhf_mo_cs + + ! --- + + allocate(Jkappa(n_points_final_grid,3), Okappa(n_points_final_grid)) + allocate(Jbarkappa(n_points_final_grid,3), Obarkappa(n_points_final_grid)) + Jkappa = 0.d0 + Okappa = 0.d0 + Jbarkappa = 0.d0 + Obarkappa = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, tmp_omp_d1, tmp_omp_d2) & + !$OMP SHARED (n_points_final_grid, elec_beta_num, elec_alpha_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, Okappa, Jkappa, Obarkappa, Jbarkappa) + + allocate(tmp_omp_d2(n_points_final_grid,3), tmp_omp_d1(n_points_final_grid)) + + tmp_omp_d2 = 0.d0 + tmp_omp_d1 = 0.d0 + !$OMP DO + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) + tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + Jkappa(ipoint,1) += tmp_omp_d2(ipoint,1) + Jkappa(ipoint,2) += tmp_omp_d2(ipoint,2) + Jkappa(ipoint,3) += tmp_omp_d2(ipoint,3) + Okappa(ipoint) += tmp_omp_d1(ipoint) + enddo + !$OMP END CRITICAL + + tmp_omp_d2 = 0.d0 + tmp_omp_d1 = 0.d0 + !$OMP DO + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) + tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + Jbarkappa(ipoint,1) += tmp_omp_d2(ipoint,1) + Jbarkappa(ipoint,2) += tmp_omp_d2(ipoint,2) + Jbarkappa(ipoint,3) += tmp_omp_d2(ipoint,3) + Obarkappa(ipoint) += tmp_omp_d1(ipoint) + enddo + !$OMP END CRITICAL + + deallocate(tmp_omp_d2, tmp_omp_d1) + !$OMP END PARALLEL + + ! --- + + allocate(tmp_1(n_points_final_grid,5)) + + do ipoint = 1, n_points_final_grid + + loc_1 = -2.d0 * Okappa (ipoint) + loc_2 = -2.d0 * Obarkappa(ipoint) + + tmp_1(ipoint,1) = loc_1 * Jbarkappa(ipoint,1) + loc_2 * Jkappa(ipoint,1) + tmp_1(ipoint,2) = loc_1 * Jbarkappa(ipoint,2) + loc_2 * Jkappa(ipoint,2) + tmp_1(ipoint,3) = loc_1 * Jbarkappa(ipoint,3) + loc_2 * Jkappa(ipoint,3) + + tmp_1(ipoint,4) = Obarkappa(ipoint) + tmp_1(ipoint,5) = -loc_1 + enddo + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, loc_1, loc_2, tmp_omp_d2) & + !$OMP SHARED (n_points_final_grid, elec_beta_num, elec_alpha_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_1) + + allocate(tmp_omp_d2(n_points_final_grid,3)) + tmp_omp_d2 = 0.d0 + + !$OMP DO COLLAPSE(2) + do i = 1, elec_beta_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + loc_2 = mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + + tmp_omp_d2(ipoint,1) += loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,1,j,i) + tmp_omp_d2(ipoint,2) += loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,2,j,i) + tmp_omp_d2(ipoint,3) += loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) + tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) + tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) + enddo + !$OMP END CRITICAL + + deallocate(tmp_omp_d2) + !$OMP END PARALLEL + + ! --- + + allocate(tmp_2(n_points_final_grid,5,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b) & + !$OMP SHARED (n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp_2) + !$OMP DO COLLAPSE(2) + do a = 1, mo_num + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) + tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) + tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b, i) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP tmp_2) + !$OMP DO COLLAPSE(2) + do a = 1, mo_num + do b = 1, mo_num + + tmp_2(:,4,b,a) = 0.d0 + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,4,b,a) += final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + enddo + enddo + + tmp_2(:,5,b,a) = 0.d0 + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,5,b,a) += final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + call dgemv( 'T', 5*n_points_final_grid, mo_num*mo_num, 1.d0 & + , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & + , tmp_1(1,1), 1 & + , 1.d0, fock_3e_uhf_mo_a(1,1), 1) + + deallocate(tmp_1, tmp_2) + + ! --- + + allocate(tmp_3(n_points_final_grid,8,mo_num), tmp_4(n_points_final_grid,8,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, loc_1, loc_2) & + !$OMP SHARED (n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP final_weight_at_r_vector, Jkappa, Jbarkappa, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + tmp_3(:,:,b) = 0.d0 + tmp_4(:,:,b) = 0.d0 + do ipoint = 1, n_points_final_grid + + tmp_3(ipoint,1,b) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) + + loc_1 = -2.d0 * mos_r_in_r_array_transp(ipoint,b) + tmp_4(ipoint,1,b) = loc_1 * ( Jbarkappa(ipoint,1) * Jkappa(ipoint,1) & + + Jbarkappa(ipoint,2) * Jkappa(ipoint,2) & + + Jbarkappa(ipoint,3) * Jkappa(ipoint,3) ) + + tmp_4(ipoint,8,b) = mos_r_in_r_array_transp(ipoint,b) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2, loc_3, loc_4) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP Jkappa, Jbarkappa, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_2 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,2,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) + tmp_3(ipoint,3,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) + tmp_3(ipoint,4,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) + tmp_3(ipoint,8,b) += loc_1 * ( Jbarkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & + + Jbarkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & + + Jbarkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) + + tmp_4(ipoint,5,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) + tmp_4(ipoint,6,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) + tmp_4(ipoint,7,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) + tmp_4(ipoint,1,b) += loc_2 * ( Jbarkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + Jbarkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + Jbarkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + enddo + enddo + + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_3 = 2.d0 * loc_1 + loc_2 = mos_r_in_r_array_transp(ipoint,i) + loc_4 = 2.d0 * loc_2 + + tmp_3(ipoint,5,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) + tmp_3(ipoint,6,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) + tmp_3(ipoint,7,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) + tmp_3(ipoint,8,b) += loc_3 * ( Jkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & + + Jkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & + + Jkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) + + tmp_4(ipoint,2,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) + tmp_4(ipoint,3,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) + tmp_4(ipoint,4,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) + tmp_4(ipoint,1,b) += loc_4 * ( Jkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + Jkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + Jkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, loc_3) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + do i = 1, elec_beta_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) + loc_2 = mos_r_in_r_array_transp(ipoint,b) + loc_3 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) + + tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) + tmp_4(ipoint,1,b) += loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_3 = mos_r_in_r_array_transp(ipoint,j) + + tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,b,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,b,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + + tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,j,i) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + int2_grad1_u12_bimo_t(ipoint,2,j,i) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + int2_grad1_u12_bimo_t(ipoint,3,j,i) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + call dgemm( 'T', 'N', mo_num, mo_num, 8*n_points_final_grid, 1.d0 & + , tmp_3(1,1,1), 8*n_points_final_grid & + , tmp_4(1,1,1), 8*n_points_final_grid & + , 1.d0, fock_3e_uhf_mo_a(1,1), mo_num) + + deallocate(tmp_3, tmp_4) + deallocate(Jkappa, Okappa) + + ! --- + + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & + !$OMP SHARED (mo_num, o, elec_alpha_num, elec_beta_num, fock_3e_uhf_mo_a) + + allocate(tmp(mo_num,mo_num)) + tmp = 0.d0 + + !$OMP DO + do a = 1, mo_num + do b = 1, mo_num + + do j = o, elec_alpha_num + do i = o, elec_alpha_num + + call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) + call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) + call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) + call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) + call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) + call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) + + tmp(b,a) -= 0.5d0 * ( I_bij_aij & + + I_bij_ija & + + I_bij_jai & + - I_bij_aji & + - I_bij_iaj & + - I_bij_jia ) + + enddo + enddo + + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do a = 1, mo_num + do b = 1, mo_num + fock_3e_uhf_mo_a(b,a) += tmp(b,a) + enddo + enddo + !$OMP END CRITICAL + + deallocate(tmp) + !$OMP END PARALLEL + + call wall_time(tf) + print *, ' Wall time for fock_3e_uhf_mo_a =', tf - ti + +END_PROVIDER + ! --- diff --git a/src/tc_scf/fock_tc.irp.f b/src/tc_scf/fock_tc.irp.f index 0ae515bb..fcca29ac 100644 --- a/src/tc_scf/fock_tc.irp.f +++ b/src/tc_scf/fock_tc.irp.f @@ -190,30 +190,14 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num) ] if(bi_ortho) then - !allocate(tmp(ao_num,ao_num)) - !tmp = Fock_matrix_tc_ao_alpha - !if(three_body_h_tc) then - ! tmp += fock_3e_uhf_ao_a - !endif - !call ao_to_mo_bi_ortho(tmp, size(tmp, 1), Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1)) - !deallocate(tmp) - PROVIDE mo_l_coef mo_r_coef - !call wall_time(tt0) call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) & , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) ) - !call wall_time(tt1) - !print*, ' 2-e term:', tt1-tt0 if(three_body_h_tc) then - !call wall_time(tt0) - !PROVIDE fock_a_tot_3e_bi_orth - !Fock_matrix_tc_mo_alpha += fock_a_tot_3e_bi_orth PROVIDE fock_3e_uhf_mo_a Fock_matrix_tc_mo_alpha += fock_3e_uhf_mo_a - !call wall_time(tt1) - !print*, ' 3-e term:', tt1-tt0 endif else @@ -241,19 +225,9 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_beta, (mo_num,mo_num) ] if(bi_ortho) then - !allocate(tmp(ao_num,ao_num)) - !tmp = Fock_matrix_tc_ao_beta - !if(three_body_h_tc) then - ! tmp += fock_3e_uhf_ao_b - !endif - !call ao_to_mo_bi_ortho(tmp, size(tmp, 1), Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1)) - !deallocate(tmp) - call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) & , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) ) if(three_body_h_tc) then - !PROVIDE fock_b_tot_3e_bi_orth - !Fock_matrix_tc_mo_beta += fock_b_tot_3e_bi_orth PROVIDE fock_3e_uhf_mo_b Fock_matrix_tc_mo_beta += fock_3e_uhf_mo_b endif diff --git a/src/tc_scf/fock_three_bi_ortho.irp.f b/src/tc_scf/fock_three_bi_ortho.irp.f index 5d2f199c..8475c387 100644 --- a/src/tc_scf/fock_three_bi_ortho.irp.f +++ b/src/tc_scf/fock_three_bi_ortho.irp.f @@ -34,7 +34,7 @@ BEGIN_PROVIDER [double precision, fock_a_tot_3e_bi_orth, (mo_num, mo_num)] enddo !call wall_time(t1) - !print*, ' Wall time for fock_a_tot_3e_bi_orth =', t1-t0 + !print*, ' Wall time for fock_a_tot_3e_bi_orth =', t1 - t0 END_PROVIDER diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f index d7780497..b925c9df 100644 --- a/src/tc_scf/test_int.irp.f +++ b/src/tc_scf/test_int.irp.f @@ -57,6 +57,7 @@ program test_ints ! call test_old_ints call test_fock_3e_uhf_mo_cs() + call test_fock_3e_uhf_mo_a() end @@ -1109,6 +1110,13 @@ subroutine test_fock_3e_uhf_mo_cs() double precision :: I_old, I_new double precision :: diff_tot, diff, thr_ih, norm +! double precision :: t0, t1 +! print*, ' Providing fock_a_tot_3e_bi_orth ...' +! call wall_time(t0) +! PROVIDE fock_a_tot_3e_bi_orth +! call wall_time(t1) +! print*, ' Wall time for fock_a_tot_3e_bi_orth =', t1 - t0 + PROVIDE fock_3e_uhf_mo_cs fock_3e_uhf_mo_cs_old thr_ih = 1d-10 @@ -1123,7 +1131,7 @@ subroutine test_fock_3e_uhf_mo_cs() diff = dabs(I_old - I_new) if(diff .gt. thr_ih) then - print *, ' problem on ', j, i + print *, ' problem in fock_3e_uhf_mo_cs on ', j, i print *, ' old value = ', I_old print *, ' new value = ', I_new stop @@ -1139,4 +1147,83 @@ subroutine test_fock_3e_uhf_mo_cs() return end subroutine test_fock_3e_uhf_mo_cs +! --- + +subroutine test_fock_3e_uhf_mo_a() + + implicit none + integer :: i, j + double precision :: I_old, I_new + double precision :: diff_tot, diff, thr_ih, norm + + PROVIDE fock_3e_uhf_mo_a fock_3e_uhf_mo_a_old + + thr_ih = 1d-10 + norm = 0.d0 + diff_tot = 0.d0 + + do i = 1, mo_num + do j = 1, mo_num + + I_old = fock_3e_uhf_mo_a_old(j,i) + I_new = fock_3e_uhf_mo_a (j,i) + + diff = dabs(I_old - I_new) + if(diff .gt. thr_ih) then + print *, ' problem in fock_3e_uhf_mo_a on ', j, i + print *, ' old value = ', I_old + print *, ' new value = ', I_new + stop + endif + + norm += dabs(I_old) + diff_tot += diff + enddo + enddo + + print *, ' diff tot (%) = ', 100.d0 * diff_tot / norm + + return +end subroutine test_fock_3e_uhf_mo_a + +! --- + +subroutine test_fock_3e_uhf_mo_b() + + implicit none + integer :: i, j + double precision :: I_old, I_new + double precision :: diff_tot, diff, thr_ih, norm + + PROVIDE fock_3e_uhf_mo_b fock_3e_uhf_mo_b_old + + thr_ih = 1d-10 + norm = 0.d0 + diff_tot = 0.d0 + + do i = 1, mo_num + do j = 1, mo_num + + I_old = fock_3e_uhf_mo_b_old(j,i) + I_new = fock_3e_uhf_mo_b (j,i) + + diff = dabs(I_old - I_new) + if(diff .gt. thr_ih) then + print *, ' problem in fock_3e_uhf_mo_b on ', j, i + print *, ' old value = ', I_old + print *, ' new value = ', I_new + stop + endif + + norm += dabs(I_old) + diff_tot += diff + enddo + enddo + + print *, ' diff tot (%) = ', 100.d0 * diff_tot / norm + + return +end subroutine test_fock_3e_uhf_mo_b + +! --- From 8f0f4d3135c0c709d92012800e941930eb9de852 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Fri, 1 Sep 2023 10:25:12 +0200 Subject: [PATCH 269/337] alpha part: full OS --- src/tc_scf/fock_3e_bi_ortho_uhf.irp.f | 148 +++++++++++++------------- 1 file changed, 75 insertions(+), 73 deletions(-) diff --git a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f index 3a68ffc6..bb3025f3 100644 --- a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f +++ b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f @@ -777,11 +777,9 @@ END_PROVIDER END_DOC implicit none - integer :: a, b, i, j, ipoint, o - double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia + integer :: a, b, i, j, ipoint double precision :: loc_1, loc_2, loc_3, loc_4 double precision :: ti, tf - double precision, allocatable :: tmp(:,:) double precision, allocatable :: Okappa(:), Jkappa(:,:), Obarkappa(:), Jbarkappa(:,:) double precision, allocatable :: tmp_omp_d1(:), tmp_omp_d2(:,:) double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:) @@ -793,9 +791,6 @@ END_PROVIDER print *, ' Providing fock_3e_uhf_mo_a and fock_3e_uhf_mo_b ...' call wall_time(ti) - o = elec_beta_num + 1 - call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) - PROVIDE fock_3e_uhf_mo_cs fock_3e_uhf_mo_a = fock_3e_uhf_mo_cs fock_3e_uhf_mo_b = fock_3e_uhf_mo_cs @@ -871,13 +866,14 @@ END_PROVIDER loc_1 = -2.d0 * Okappa (ipoint) loc_2 = -2.d0 * Obarkappa(ipoint) + loc_3 = Obarkappa(ipoint) - tmp_1(ipoint,1) = loc_1 * Jbarkappa(ipoint,1) + loc_2 * Jkappa(ipoint,1) - tmp_1(ipoint,2) = loc_1 * Jbarkappa(ipoint,2) + loc_2 * Jkappa(ipoint,2) - tmp_1(ipoint,3) = loc_1 * Jbarkappa(ipoint,3) + loc_2 * Jkappa(ipoint,3) + tmp_1(ipoint,1) = (loc_1 - loc_3) * Jbarkappa(ipoint,1) + loc_2 * Jkappa(ipoint,1) + tmp_1(ipoint,2) = (loc_1 - loc_3) * Jbarkappa(ipoint,2) + loc_2 * Jkappa(ipoint,2) + tmp_1(ipoint,3) = (loc_1 - loc_3) * Jbarkappa(ipoint,3) + loc_2 * Jkappa(ipoint,3) tmp_1(ipoint,4) = Obarkappa(ipoint) - tmp_1(ipoint,5) = -loc_1 + tmp_1(ipoint,5) = loc_3 - loc_1 enddo @@ -889,8 +885,8 @@ END_PROVIDER !$OMP int2_grad1_u12_bimo_t, tmp_1) allocate(tmp_omp_d2(n_points_final_grid,3)) - tmp_omp_d2 = 0.d0 + tmp_omp_d2 = 0.d0 !$OMP DO COLLAPSE(2) do i = 1, elec_beta_num do j = elec_beta_num+1, elec_alpha_num @@ -906,12 +902,34 @@ END_PROVIDER enddo enddo !$OMP END DO NOWAIT - !$OMP CRITICAL do ipoint = 1, n_points_final_grid - tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) - tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) - tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) + tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) + tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) + tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) + enddo + !$OMP END CRITICAL + + tmp_omp_d2 = 0.d0 + !$OMP DO COLLAPSE(2) + do i = elec_beta_num+1, elec_alpha_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + + tmp_omp_d2(ipoint,1) += loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) + tmp_omp_d2(ipoint,2) += loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) + tmp_omp_d2(ipoint,3) += loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) + enddo + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) + tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) + tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) enddo !$OMP END CRITICAL @@ -1001,10 +1019,11 @@ END_PROVIDER tmp_3(ipoint,1,b) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) - loc_1 = -2.d0 * mos_r_in_r_array_transp(ipoint,b) - tmp_4(ipoint,1,b) = loc_1 * ( Jbarkappa(ipoint,1) * Jkappa(ipoint,1) & - + Jbarkappa(ipoint,2) * Jkappa(ipoint,2) & - + Jbarkappa(ipoint,3) * Jkappa(ipoint,3) ) + loc_1 = -2.0d0 * mos_r_in_r_array_transp(ipoint,b) + + tmp_4(ipoint,1,b) = loc_1 * ( Jbarkappa(ipoint,1) * (Jkappa(ipoint,1) + 0.25d0 * Jbarkappa(ipoint,1)) & + + Jbarkappa(ipoint,2) * (Jkappa(ipoint,2) + 0.25d0 * Jbarkappa(ipoint,2)) & + + Jbarkappa(ipoint,3) * (Jkappa(ipoint,3) + 0.25d0 * Jbarkappa(ipoint,3)) ) tmp_4(ipoint,8,b) = mos_r_in_r_array_transp(ipoint,b) enddo @@ -1055,16 +1074,22 @@ END_PROVIDER tmp_3(ipoint,5,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) tmp_3(ipoint,6,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) tmp_3(ipoint,7,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) - tmp_3(ipoint,8,b) += loc_3 * ( Jkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & - + Jkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & - + Jkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) + + tmp_3(ipoint,8,b) += loc_3 * ( (Jkappa(ipoint,1) + 0.5d0 * Jbarkappa(ipoint,1)) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & + + (Jkappa(ipoint,2) + 0.5d0 * Jbarkappa(ipoint,2)) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & + + (Jkappa(ipoint,3) + 0.5d0 * Jbarkappa(ipoint,3)) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) + tmp_4(ipoint,1,b) += loc_4 * ( (Jkappa(ipoint,1) + 0.5d0 * Jbarkappa(ipoint,1)) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + (Jkappa(ipoint,2) + 0.5d0 * Jbarkappa(ipoint,2)) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + (Jkappa(ipoint,3) + 0.5d0 * Jbarkappa(ipoint,3)) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + tmp_4(ipoint,2,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) tmp_4(ipoint,3,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) tmp_4(ipoint,4,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) - tmp_4(ipoint,1,b) += loc_4 * ( Jkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & - + Jkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & - + Jkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + + tmp_4(ipoint,5,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) + tmp_4(ipoint,6,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) + tmp_4(ipoint,7,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) enddo enddo enddo @@ -1080,6 +1105,7 @@ END_PROVIDER !$OMP tmp_3, tmp_4) !$OMP DO do b = 1, mo_num + do i = 1, elec_beta_num do j = elec_beta_num+1, elec_alpha_num do ipoint = 1, n_points_final_grid @@ -1095,6 +1121,7 @@ END_PROVIDER tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) + tmp_4(ipoint,1,b) += loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) @@ -1112,6 +1139,29 @@ END_PROVIDER enddo enddo enddo + + do i = elec_beta_num+1, elec_alpha_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) + loc_2 = 0.5d0 * mos_r_in_r_array_transp(ipoint,b) + loc_3 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) + + tmp_4(ipoint,1,b) += loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + + tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) + enddo + enddo + enddo enddo !$OMP END DO !$OMP END PARALLEL @@ -1126,54 +1176,6 @@ END_PROVIDER deallocate(tmp_3, tmp_4) deallocate(Jkappa, Okappa) - ! --- - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & - !$OMP SHARED (mo_num, o, elec_alpha_num, elec_beta_num, fock_3e_uhf_mo_a) - - allocate(tmp(mo_num,mo_num)) - tmp = 0.d0 - - !$OMP DO - do a = 1, mo_num - do b = 1, mo_num - - do j = o, elec_alpha_num - do i = o, elec_alpha_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( I_bij_aij & - + I_bij_ija & - + I_bij_jai & - - I_bij_aji & - - I_bij_iaj & - - I_bij_jia ) - - enddo - enddo - - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do a = 1, mo_num - do b = 1, mo_num - fock_3e_uhf_mo_a(b,a) += tmp(b,a) - enddo - enddo - !$OMP END CRITICAL - - deallocate(tmp) - !$OMP END PARALLEL - call wall_time(tf) print *, ' Wall time for fock_3e_uhf_mo_a =', tf - ti From ff9a57c978d772723470d4ebd131707310154a52 Mon Sep 17 00:00:00 2001 From: eginer Date: Fri, 1 Sep 2023 11:35:28 +0200 Subject: [PATCH 270/337] added some stuffs for TC-CASSCF --- external/ezfio | 2 +- external/irpf90 | 2 +- src/casscf_tc_bi/det_manip.irp.f | 125 +++++++++++++ src/casscf_tc_bi/grad_dm.irp.f | 4 +- src/casscf_tc_bi/test_tc_casscf.irp.f | 252 ++++++++++++++++++++++++++ src/cisd/lccsd_prov.irp.f | 8 +- 6 files changed, 387 insertions(+), 6 deletions(-) create mode 100644 src/casscf_tc_bi/det_manip.irp.f create mode 100644 src/casscf_tc_bi/test_tc_casscf.irp.f diff --git a/external/ezfio b/external/ezfio index d5805497..ed1df9f3 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit d5805497fa0ef30e70e055cde1ecec2963303e93 +Subproject commit ed1df9f3c1f51752656ca98da5693a4119add05c diff --git a/external/irpf90 b/external/irpf90 index 0007f72f..33ca5e10 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit 0007f72f677fe7d61c5e1ed461882cb239517102 +Subproject commit 33ca5e1018f3bbb5e695e6ee558f5dac0753b271 diff --git a/src/casscf_tc_bi/det_manip.irp.f b/src/casscf_tc_bi/det_manip.irp.f new file mode 100644 index 00000000..d8c309a4 --- /dev/null +++ b/src/casscf_tc_bi/det_manip.irp.f @@ -0,0 +1,125 @@ +use bitmasks + +subroutine do_signed_mono_excitation(key1,key2,nu,ihole,ipart, & + ispin,phase,ierr) + BEGIN_DOC + ! we create the mono-excitation, and determine, if possible, + ! the phase and the number in the list of determinants + END_DOC + implicit none + integer(bit_kind) :: key1(N_int,2),key2(N_int,2) + integer(bit_kind), allocatable :: keytmp(:,:) + integer :: exc(0:2,2,2),ihole,ipart,ierr,nu,ispin + real*8 :: phase + logical :: found + allocate(keytmp(N_int,2)) + + nu=-1 + phase=1.D0 + ierr=0 + call det_copy(key1,key2,N_int) + ! write(6,*) ' key2 before excitation ',ihole,' -> ',ipart,' spin = ',ispin + ! call print_det(key2,N_int) + call do_single_excitation(key2,ihole,ipart,ispin,ierr) + ! write(6,*) ' key2 after ',ihole,' -> ',ipart,' spin = ',ispin + ! call print_det(key2,N_int) + ! write(6,*) ' excitation ',ihole,' -> ',ipart,' gives ierr = ',ierr + if (ierr.eq.1) then + ! excitation is possible + ! get the phase + call get_single_excitation(key1,key2,exc,phase,N_int) + ! get the number in the list + found=.false. + nu=0 + + !TODO BOTTLENECK + do while (.not.found) + nu+=1 + if (nu.gt.N_det) then + ! the determinant is possible, but not in the list + found=.true. + nu=-1 + else + call det_extract(keytmp,nu,N_int) + integer :: i,ii + found=.true. + do ii=1,2 + do i=1,N_int + if (keytmp(i,ii).ne.key2(i,ii)) then + found=.false. + end if + end do + end do + end if + end do + end if + ! + ! we found the new string, the phase, and possibly the number in the list + ! +end subroutine do_signed_mono_excitation + +subroutine det_extract(key,nu,Nint) + BEGIN_DOC + ! extract a determinant from the list of determinants + END_DOC + implicit none + integer :: ispin,i,nu,Nint + integer(bit_kind) :: key(Nint,2) + do ispin=1,2 + do i=1,Nint + key(i,ispin)=psi_det(i,ispin,nu) + end do + end do +end subroutine det_extract + +subroutine det_copy(key1,key2,Nint) + use bitmasks ! you need to include the bitmasks_module.f90 features + BEGIN_DOC + ! copy a determinant from key1 to key2 + END_DOC + implicit none + integer :: ispin,i,Nint + integer(bit_kind) :: key1(Nint,2),key2(Nint,2) + do ispin=1,2 + do i=1,Nint + key2(i,ispin)=key1(i,ispin) + end do + end do +end subroutine det_copy + +subroutine do_spinfree_mono_excitation(key_in,key_out1,key_out2 & + ,nu1,nu2,ihole,ipart,phase1,phase2,ierr,jerr) + BEGIN_DOC + ! we create the spin-free mono-excitation E_pq=(a^+_p a_q + a^+_P a_Q) + ! we may create two determinants as result + ! + END_DOC + implicit none + integer(bit_kind) :: key_in(N_int,2),key_out1(N_int,2) + integer(bit_kind) :: key_out2(N_int,2) + integer :: ihole,ipart,ierr,jerr,nu1,nu2 + integer :: ispin + real*8 :: phase1,phase2 + + ! write(6,*) ' applying E_',ipart,ihole,' on determinant ' + ! call print_det(key_in,N_int) + + ! spin alpha + ispin=1 + call do_signed_mono_excitation(key_in,key_out1,nu1,ihole & + ,ipart,ispin,phase1,ierr) + ! if (ierr.eq.1) then + ! write(6,*) ' 1 result is ',nu1,phase1 + ! call print_det(key_out1,N_int) + ! end if + ! spin beta + ispin=2 + call do_signed_mono_excitation(key_in,key_out2,nu2,ihole & + ,ipart,ispin,phase2,jerr) + ! if (jerr.eq.1) then + ! write(6,*) ' 2 result is ',nu2,phase2 + ! call print_det(key_out2,N_int) + ! end if + +end subroutine do_spinfree_mono_excitation + diff --git a/src/casscf_tc_bi/grad_dm.irp.f b/src/casscf_tc_bi/grad_dm.irp.f index be19e6f0..047b5718 100644 --- a/src/casscf_tc_bi/grad_dm.irp.f +++ b/src/casscf_tc_bi/grad_dm.irp.f @@ -193,7 +193,7 @@ subroutine gradvec_tc_ta(t,a,res_l, res_r) r = list_act(rr) res_r_inact_test += -tc_transition_matrix_mo(r,t,1,1) * & (2.d0 * mo_bi_ortho_tc_two_e(r,j,a,j) - mo_bi_ortho_tc_two_e(r,j,j,a)) - res_l_inact_test += -tc_transition_matrix_mo(t,r,1,1) * & + res_l_inact_test -= -tc_transition_matrix_mo(t,r,1,1) * & (2.d0 * mo_bi_ortho_tc_two_e(a,j,r,j) - mo_bi_ortho_tc_two_e(j,a,r,j)) enddo enddo @@ -207,7 +207,7 @@ subroutine gradvec_tc_ta(t,a,res_l, res_r) u = list_act(uu) res_r_act_test += - (mo_bi_ortho_tc_two_e(v,r,u,a) * tc_two_rdm(r,v,t,u) & +mo_bi_ortho_tc_two_e(v,r,a,u) * tc_two_rdm(r,v,u,t)) - res_l_act_test += - (mo_bi_ortho_tc_two_e(u,a,v,r) * tc_two_rdm(t,u,r,v) & + res_l_act_test -= - (mo_bi_ortho_tc_two_e(u,a,v,r) * tc_two_rdm(t,u,r,v) & +mo_bi_ortho_tc_two_e(a,u,v,r) * tc_two_rdm(u,t,r,v)) enddo enddo diff --git a/src/casscf_tc_bi/test_tc_casscf.irp.f b/src/casscf_tc_bi/test_tc_casscf.irp.f new file mode 100644 index 00000000..baa50c0f --- /dev/null +++ b/src/casscf_tc_bi/test_tc_casscf.irp.f @@ -0,0 +1,252 @@ +program tc_bi_ortho + + BEGIN_DOC + ! + ! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together + ! with the energy. Saves the left-right wave functions at the end. + ! + END_DOC + + my_grid_becke = .True. + my_n_pt_r_grid = 30 + my_n_pt_a_grid = 50 + read_wf = .True. + touch read_wf + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + print*, ' nb of states = ', N_states + print*, ' nb of det = ', N_det + +! call routine_i_h_psi +! call routine_grad + call routine_grad_num_dm_one_body +end + +subroutine routine_i_h_psi + implicit none + integer :: i,j + double precision :: i_H_chi_array(0:3,N_states),i_H_phi_array(0:3,N_states) + double precision :: hmono, htwoe, hthree, htot + double precision :: accu_l_hmono, accu_l_htwoe, accu_l_hthree, accu_l_htot + double precision :: accu_r_hmono, accu_r_htwoe, accu_r_hthree, accu_r_htot + double precision :: test_l_hmono, test_l_htwoe, test_l_hthree, test_l_htot + double precision :: test_r_hmono, test_r_htwoe, test_r_hthree, test_r_htot + + test_l_hmono = 0.d0 + test_l_htwoe = 0.d0 + test_l_hthree= 0.d0 + test_l_htot = 0.d0 + test_r_hmono = 0.d0 + test_r_htwoe = 0.d0 + test_r_hthree= 0.d0 + test_r_htot = 0.d0 + + do i = 1, N_det + call i_H_tc_psi_phi(psi_det(1,1,i),psi_det,psi_l_coef_bi_ortho,psi_r_coef_bi_ortho,& + N_int,N_det,N_det,N_states,i_H_chi_array,i_H_phi_array) + accu_l_hmono = 0.d0 + accu_l_htwoe = 0.d0 + accu_l_hthree= 0.d0 + accu_l_htot = 0.d0 + accu_r_hmono = 0.d0 + accu_r_htwoe = 0.d0 + accu_r_hthree= 0.d0 + accu_r_htot = 0.d0 + do j = 1, N_det + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + accu_l_hmono += psi_l_coef_bi_ortho(j,1) * hmono + accu_l_htwoe += psi_l_coef_bi_ortho(j,1) * htwoe + accu_l_hthree += psi_l_coef_bi_ortho(j,1) * hthree + accu_l_htot += psi_l_coef_bi_ortho(j,1) * htot + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) + accu_r_hmono += psi_r_coef_bi_ortho(j,1) * hmono + accu_r_htwoe += psi_r_coef_bi_ortho(j,1) * htwoe + accu_r_hthree += psi_r_coef_bi_ortho(j,1) * hthree + accu_r_htot += psi_r_coef_bi_ortho(j,1) * htot + enddo + test_l_htot += dabs(i_H_chi_array(0,1)-accu_l_htot) + test_l_hmono += dabs(i_H_chi_array(1,1)-accu_l_hmono) + test_l_htwoe += dabs(i_H_chi_array(2,1)-accu_l_htwoe) + test_l_hthree += dabs(i_H_chi_array(3,1)-accu_l_hthree) + + test_r_htot += dabs(i_H_phi_array(0,1)-accu_r_htot) + test_r_hmono += dabs(i_H_phi_array(1,1)-accu_r_hmono) + test_r_htwoe += dabs(i_H_phi_array(2,1)-accu_r_htwoe) + test_r_hthree += dabs(i_H_phi_array(3,1)-accu_r_hthree) + + enddo + + test_l_htot *= 1.D0/dble(N_det) + test_l_hmono *= 1.D0/dble(N_det) + test_l_htwoe *= 1.D0/dble(N_det) + test_l_hthree *= 1.D0/dble(N_det) + + test_r_htot *= 1.D0/dble(N_det) + test_r_hmono *= 1.D0/dble(N_det) + test_r_htwoe *= 1.D0/dble(N_det) + test_r_hthree *= 1.D0/dble(N_det) + + print*,'**************************' + print*,'test_l_htot = ',test_l_htot + print*,'test_l_hmono = ',test_l_hmono + print*,'test_l_htwoe = ',test_l_htwoe + print*,'test_l_hthree = ',test_l_hthree + print*,'**************************' + print*,'test_r_htot = ',test_r_htot + print*,'test_r_hmono = ',test_r_hmono + print*,'test_r_htwoe = ',test_r_htwoe + print*,'test_r_hthree = ',test_r_hthree + +end + +subroutine routine_grad_num + implicit none + integer :: indx,ihole,ipart + integer :: p,q + double precision :: accu_l, accu_r + double precision :: contrib_l, contrib_r + + accu_l = 0.d0 + accu_r = 0.d0 + do indx=1,nMonoEx + q = excit(1,indx) + p = excit(2,indx) + contrib_l = dabs(dabs(gradvec_detail_left_old(0,indx)) - 2.D0 * dabs( Fock_matrix_tc_mo_tot(q,p))) + contrib_r = dabs(dabs(gradvec_detail_right_old(0,indx)) -2.D0 * dabs( Fock_matrix_tc_mo_tot(p,q))) + if(contrib_l.gt.1.d-10.or.contrib_r.gt.1.d-10)then + print*,indx,q,p + print*,gradvec_detail_left_old(0,indx),gradvec_detail_right_old(0,indx) + print*,2.D0* Fock_matrix_tc_mo_tot(q,p), 2.d0* Fock_matrix_tc_mo_tot(p,q) + endif + accu_l += contrib_l + accu_r += contrib_r + enddo + print*,'accu_l,accu_r' + print*,accu_l,accu_r + +! do i = 1, nMonoEx +! print*,i,gradvec_old(i) +! enddo + +end + +subroutine routine_grad_num_dm_one_body + implicit none + integer :: indx,ii,i,a,aa,tt,t,ibody + double precision :: accu_l, accu_r,ref_r, new_r, ref_l, new_l + double precision :: contrib_l, contrib_r + double precision :: res_l(0:3),res_r(0:3) + + ibody = 2 ! check only the two-body term + provide gradvec_detail_left_old gradvec_tc_l + if(.True.)then + print*,'**************************' + print*,'**************************' + print*,'testing inactive-->virtual' + accu_l = 0.d0 + accu_r = 0.d0 + do ii = 1, n_core_inact_orb + do aa = 1, n_virt_orb + indx = mat_idx_c_v(ii,aa) + ref_l = gradvec_detail_left_old(ibody,indx) + new_l = gradvec_tc_l(ibody,indx) + contrib_l = dabs(dabs(ref_l) - dabs(new_l)) + ref_r = gradvec_detail_right_old(ibody,indx) + new_r = gradvec_tc_r(ibody,indx) + contrib_r = dabs(dabs(ref_r) - dabs(new_r)) + i = list_core_inact(ii) + a = list_virt(aa) +! if(i==1.and.a==9)then +! print*,i,a,ref_r, new_r +! stop +! endif + if(contrib_l.gt.1.d-10.or.contrib_r.gt.1.d-10)then + print*,'---------' + print*,'warning !' + print*,indx,i,a,ii,aa + print*,ref_l, new_l, contrib_l + print*,ref_r, new_r, contrib_r + print*,gradvec_detail_left_old(0,indx),gradvec_tc_l(0,indx) + print*,gradvec_detail_right_old(0,indx),gradvec_tc_r(0,indx) + print*,'---------' + endif + accu_l += contrib_l + accu_r += contrib_r + enddo + enddo + print*,'accu_l,accu_r' + print*,accu_l,accu_r + print*,'**************************' + print*,'**************************' + endif + + ibody = 2 ! check only the two-body term + if(.True.)then + print*,'**************************' + print*,'**************************' + print*,'testing inactive-->active' + accu_l = 0.d0 + accu_r = 0.d0 + do ii = 1, n_core_inact_orb + do tt = 1, n_act_orb + indx = mat_idx_c_a(ii,tt) + ref_l = gradvec_detail_left_old(ibody,indx) + new_l = gradvec_tc_l(ibody,indx) + contrib_l = dabs(dabs(ref_l) - dabs(new_l)) + ref_r = gradvec_detail_right_old(ibody,indx) + new_r = gradvec_tc_r(ibody,indx) + contrib_r = dabs(dabs(ref_r) - dabs(new_r)) + if(contrib_l.gt.1.d-10.or.contrib_r.gt.1.d-10)then + print*,'---------' + print*,'warning !' + i = list_core_inact(ii) + t = list_act(tt) + print*,indx,i,t + print*,ref_l, new_l, contrib_l + print*,ref_r, new_r, contrib_r + print*,'---------' + endif + accu_l += contrib_l + accu_r += contrib_r + enddo + enddo + print*,'accu_l,accu_r' + print*,accu_l,accu_r + endif + + if(.True.)then + print*,'**************************' + print*,'**************************' + print*,'testing active-->virtual ' + accu_l = 0.d0 + accu_r = 0.d0 + do tt = 1, n_act_orb + do aa = 1, n_virt_orb + indx = mat_idx_a_v(tt,aa) + ref_l = gradvec_detail_left_old(ibody,indx) + new_l = gradvec_tc_l(ibody,indx) + contrib_l = dabs(dabs(ref_l) - dabs(new_l)) + ref_r = gradvec_detail_right_old(ibody,indx) + new_r = gradvec_tc_r(ibody,indx) + contrib_r = dabs(dabs(ref_r) - dabs(new_r)) + if(contrib_l.gt.1.d-10.or.contrib_r.gt.1.d-10)then + print*,'---------' + print*,'warning !' + a = list_virt(aa) + t = list_act(tt) + print*,indx,t,a + print*,ref_l, new_l, contrib_l + print*,ref_r, new_r, contrib_r +! print*,gradvec_detail_right_old(0,indx),gradvec_tc_r(0,indx) + print*,'---------' + endif + accu_l += contrib_l + accu_r += contrib_r + enddo + enddo + print*,'accu_l,accu_r' + print*,accu_l,accu_r + endif + + +end diff --git a/src/cisd/lccsd_prov.irp.f b/src/cisd/lccsd_prov.irp.f index 38149ac9..8338cf81 100644 --- a/src/cisd/lccsd_prov.irp.f +++ b/src/cisd/lccsd_prov.irp.f @@ -3,7 +3,7 @@ implicit none double precision, allocatable :: Dress_jj(:), H_jj(:), u_in(:,:) double precision :: ebefore, eafter, ecorr, thresh - integer :: i,it + integer :: i,it,degree logical :: converged external H_u_0_nstates_openmp allocate(Dress_jj(N_det),H_jj(N_det),u_in(N_det,N_states_diag)) @@ -31,7 +31,11 @@ print*,'ecorr = ',ecorr Dress_jj(1) = 0.d0 do i = 2, N_det - if(ecorr + H_jj(i) .gt. H_jj(1))then + if(ecorr + H_jj(i) .lt. H_jj(1))then + print*,'Warning, some dets are not dressed: ' + call get_excitation_degree(ref_bitmask,psi_det(1,1,i),degree,N_int) + print*,'degree, Delta E, coef', degree, H_jj(i)-H_jj(1), u_in(i,1)/u_in(1,1) + else Dress_jj(i) = ecorr endif enddo From 7d04a650b7f417ef6deeec34f005029e61eb7bd0 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 2 Sep 2023 16:47:24 +0200 Subject: [PATCH 271/337] CS & OS of alpha & beta --- src/tc_scf/fock_3e_bi_ortho_cs.irp.f | 280 +++++ src/tc_scf/fock_3e_bi_ortho_os.irp.f | 536 ++++++++++ src/tc_scf/fock_3e_bi_ortho_uhf.irp.f | 1168 +-------------------- src/tc_scf/fock_3e_bi_ortho_uhf_old.irp.f | 490 +++++++++ src/tc_scf/test_int.irp.f | 1 + 5 files changed, 1320 insertions(+), 1155 deletions(-) create mode 100644 src/tc_scf/fock_3e_bi_ortho_cs.irp.f create mode 100644 src/tc_scf/fock_3e_bi_ortho_os.irp.f create mode 100644 src/tc_scf/fock_3e_bi_ortho_uhf_old.irp.f diff --git a/src/tc_scf/fock_3e_bi_ortho_cs.irp.f b/src/tc_scf/fock_3e_bi_ortho_cs.irp.f new file mode 100644 index 00000000..0b883865 --- /dev/null +++ b/src/tc_scf/fock_3e_bi_ortho_cs.irp.f @@ -0,0 +1,280 @@ + +! --- + +BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] + + implicit none + integer :: a, b, i, j, ipoint + double precision :: ti, tf + double precision :: loc_1, loc_2, loc_3 + double precision, allocatable :: Okappa(:), Jkappa(:,:) + double precision, allocatable :: tmp_omp_d1(:), tmp_omp_d2(:,:) + double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:) + double precision, allocatable :: tmp_3(:,:,:), tmp_4(:,:,:) + + PROVIDE mo_l_coef mo_r_coef + + !print *, ' PROVIDING fock_3e_uhf_mo_cs ...' + !call wall_time(ti) + + ! --- + + allocate(Jkappa(n_points_final_grid,3), Okappa(n_points_final_grid)) + Jkappa = 0.d0 + Okappa = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, tmp_omp_d1, tmp_omp_d2) & + !$OMP SHARED (n_points_final_grid, elec_beta_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, Okappa, Jkappa) + + allocate(tmp_omp_d2(n_points_final_grid,3), tmp_omp_d1(n_points_final_grid)) + tmp_omp_d2 = 0.d0 + tmp_omp_d1 = 0.d0 + + !$OMP DO + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) + tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + Jkappa(ipoint,1) += tmp_omp_d2(ipoint,1) + Jkappa(ipoint,2) += tmp_omp_d2(ipoint,2) + Jkappa(ipoint,3) += tmp_omp_d2(ipoint,3) + Okappa(ipoint) += tmp_omp_d1(ipoint) + enddo + !$OMP END CRITICAL + + deallocate(tmp_omp_d2, tmp_omp_d1) + + !$OMP END PARALLEL + + ! --- + + allocate(tmp_1(n_points_final_grid,4)) + + do ipoint = 1, n_points_final_grid + + loc_1 = 2.d0 * Okappa(ipoint) + + tmp_1(ipoint,1) = loc_1 * Jkappa(ipoint,1) + tmp_1(ipoint,2) = loc_1 * Jkappa(ipoint,2) + tmp_1(ipoint,3) = loc_1 * Jkappa(ipoint,3) + + tmp_1(ipoint,4) = Okappa(ipoint) + enddo + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, loc_1, tmp_omp_d2) & + !$OMP SHARED (n_points_final_grid, elec_beta_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_1) + + allocate(tmp_omp_d2(n_points_final_grid,3)) + tmp_omp_d2 = 0.d0 + + !$OMP DO COLLAPSE(2) + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + + tmp_omp_d2(ipoint,1) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) + tmp_omp_d2(ipoint,2) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) + tmp_omp_d2(ipoint,3) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) + tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) + tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) + enddo + !$OMP END CRITICAL + + deallocate(tmp_omp_d2) + !$OMP END PARALLEL + + ! --- + + allocate(tmp_2(n_points_final_grid,4,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b) & + !$OMP SHARED (n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp_2) + !$OMP DO COLLAPSE(2) + do a = 1, mo_num + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) + tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) + tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b, i) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP tmp_2) + !$OMP DO COLLAPSE(2) + do a = 1, mo_num + do b = 1, mo_num + tmp_2(:,4,b,a) = 0.d0 + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,4,b,a) -= final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + call dgemv( 'T', 4*n_points_final_grid, mo_num*mo_num, -2.d0 & + , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & + , tmp_1(1,1), 1 & + , 0.d0, fock_3e_uhf_mo_cs(1,1), 1) + + deallocate(tmp_1, tmp_2) + + ! --- + + allocate(tmp_3(n_points_final_grid,5,mo_num), tmp_4(n_points_final_grid,5,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, loc_1, loc_2) & + !$OMP SHARED (n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP final_weight_at_r_vector, Jkappa, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + tmp_3(:,:,b) = 0.d0 + tmp_4(:,:,b) = 0.d0 + do ipoint = 1, n_points_final_grid + tmp_3(ipoint,1,b) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) + + tmp_4(ipoint,1,b) = -2.d0 * mos_r_in_r_array_transp(ipoint,b) * ( Jkappa(ipoint,1) * Jkappa(ipoint,1) & + + Jkappa(ipoint,2) * Jkappa(ipoint,2) & + + Jkappa(ipoint,3) * Jkappa(ipoint,3) ) + tmp_4(ipoint,5,b) = mos_r_in_r_array_transp(ipoint,b) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP Jkappa, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_2 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,2,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) + tmp_3(ipoint,3,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) + tmp_3(ipoint,4,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) + tmp_3(ipoint,5,b) += 2.d0 * loc_1 * ( Jkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & + + Jkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & + + Jkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) + + tmp_4(ipoint,2,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) + tmp_4(ipoint,3,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) + tmp_4(ipoint,4,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) + tmp_4(ipoint,1,b) += 2.d0 * loc_2 * ( Jkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + Jkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + Jkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, loc_3) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) + loc_2 = mos_r_in_r_array_transp(ipoint,b) + loc_3 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,5,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) + + tmp_4(ipoint,1,b) += ( loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) & + - loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) ) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + call dgemm( 'T', 'N', mo_num, mo_num, 5*n_points_final_grid, 1.d0 & + , tmp_3(1,1,1), 5*n_points_final_grid & + , tmp_4(1,1,1), 5*n_points_final_grid & + , 1.d0, fock_3e_uhf_mo_cs(1,1), mo_num) + + deallocate(tmp_3, tmp_4) + deallocate(Jkappa, Okappa) + + ! --- + + !call wall_time(tf) + !print *, ' total Wall time for fock_3e_uhf_mo_cs =', tf - ti + +END_PROVIDER + +! --- + diff --git a/src/tc_scf/fock_3e_bi_ortho_os.irp.f b/src/tc_scf/fock_3e_bi_ortho_os.irp.f new file mode 100644 index 00000000..4bbce720 --- /dev/null +++ b/src/tc_scf/fock_3e_bi_ortho_os.irp.f @@ -0,0 +1,536 @@ + +! --- + + BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a_os, (mo_num, mo_num)] +&BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b_os, (mo_num, mo_num)] + + BEGIN_DOC + ! + ! Open Shell part of the Fock matrix from three-electron terms + ! + ! WARNING :: non hermitian if bi-ortho MOS used + ! + END_DOC + + implicit none + integer :: a, b, i, j, ipoint + double precision :: loc_1, loc_2, loc_3, loc_4 + double precision :: ti, tf + double precision, allocatable :: Okappa(:), Jkappa(:,:), Obarkappa(:), Jbarkappa(:,:) + double precision, allocatable :: tmp_omp_d1(:), tmp_omp_d2(:,:) + double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:) + double precision, allocatable :: tmp_3(:,:,:), tmp_4(:,:,:) + + PROVIDE mo_l_coef mo_r_coef + + !print *, ' Providing fock_3e_uhf_mo_a_os and fock_3e_uhf_mo_b_os ...' + !call wall_time(ti) + + ! --- + + allocate(Jkappa(n_points_final_grid,3), Okappa(n_points_final_grid)) + allocate(Jbarkappa(n_points_final_grid,3), Obarkappa(n_points_final_grid)) + Jkappa = 0.d0 + Okappa = 0.d0 + Jbarkappa = 0.d0 + Obarkappa = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, tmp_omp_d1, tmp_omp_d2) & + !$OMP SHARED (n_points_final_grid, elec_beta_num, elec_alpha_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, Okappa, Jkappa, Obarkappa, Jbarkappa) + + allocate(tmp_omp_d2(n_points_final_grid,3), tmp_omp_d1(n_points_final_grid)) + + tmp_omp_d2 = 0.d0 + tmp_omp_d1 = 0.d0 + !$OMP DO + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) + tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + Jkappa(ipoint,1) += tmp_omp_d2(ipoint,1) + Jkappa(ipoint,2) += tmp_omp_d2(ipoint,2) + Jkappa(ipoint,3) += tmp_omp_d2(ipoint,3) + Okappa(ipoint) += tmp_omp_d1(ipoint) + enddo + !$OMP END CRITICAL + + tmp_omp_d2 = 0.d0 + tmp_omp_d1 = 0.d0 + !$OMP DO + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) + tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + Jbarkappa(ipoint,1) += tmp_omp_d2(ipoint,1) + Jbarkappa(ipoint,2) += tmp_omp_d2(ipoint,2) + Jbarkappa(ipoint,3) += tmp_omp_d2(ipoint,3) + Obarkappa(ipoint) += tmp_omp_d1(ipoint) + enddo + !$OMP END CRITICAL + + deallocate(tmp_omp_d2, tmp_omp_d1) + !$OMP END PARALLEL + + ! --- + + allocate(tmp_1(n_points_final_grid,4)) + + do ipoint = 1, n_points_final_grid + + loc_1 = -2.d0 * Okappa (ipoint) + loc_2 = -2.d0 * Obarkappa(ipoint) + loc_3 = Obarkappa(ipoint) + + tmp_1(ipoint,1) = (loc_1 - loc_3) * Jbarkappa(ipoint,1) + loc_2 * Jkappa(ipoint,1) + tmp_1(ipoint,2) = (loc_1 - loc_3) * Jbarkappa(ipoint,2) + loc_2 * Jkappa(ipoint,2) + tmp_1(ipoint,3) = (loc_1 - loc_3) * Jbarkappa(ipoint,3) + loc_2 * Jkappa(ipoint,3) + + tmp_1(ipoint,4) = Obarkappa(ipoint) + enddo + + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, loc_1, loc_2, tmp_omp_d2) & + !$OMP SHARED (n_points_final_grid, elec_beta_num, elec_alpha_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_1) + + allocate(tmp_omp_d2(n_points_final_grid,3)) + + tmp_omp_d2 = 0.d0 + !$OMP DO COLLAPSE(2) + do i = 1, elec_beta_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + loc_2 = mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + + tmp_omp_d2(ipoint,1) += loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,1,j,i) + tmp_omp_d2(ipoint,2) += loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,2,j,i) + tmp_omp_d2(ipoint,3) += loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) + tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) + tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) + enddo + !$OMP END CRITICAL + + tmp_omp_d2 = 0.d0 + !$OMP DO COLLAPSE(2) + do i = elec_beta_num+1, elec_alpha_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + + tmp_omp_d2(ipoint,1) += loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) + tmp_omp_d2(ipoint,2) += loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) + tmp_omp_d2(ipoint,3) += loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) + enddo + enddo + enddo + !$OMP END DO NOWAIT + !$OMP CRITICAL + do ipoint = 1, n_points_final_grid + tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) + tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) + tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) + enddo + !$OMP END CRITICAL + + deallocate(tmp_omp_d2) + !$OMP END PARALLEL + + ! --- + + allocate(tmp_2(n_points_final_grid,4,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b) & + !$OMP SHARED (n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp_2) + !$OMP DO COLLAPSE(2) + do a = 1, mo_num + do b = 1, mo_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) + tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) + tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b, i) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP tmp_2) + !$OMP DO COLLAPSE(2) + do a = 1, mo_num + do b = 1, mo_num + + tmp_2(:,4,b,a) = 0.d0 + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,4,b,a) += final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + call dgemv( 'T', 4*n_points_final_grid, mo_num*mo_num, 1.d0 & + , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & + , tmp_1(1,1), 1 & + , 0.d0, fock_3e_uhf_mo_b_os(1,1), 1) + + deallocate(tmp_1, tmp_2) + + ! --- + + allocate(tmp_3(n_points_final_grid,2,mo_num), tmp_4(n_points_final_grid,2,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, loc_1, loc_2) & + !$OMP SHARED (n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP final_weight_at_r_vector, Jkappa, Jbarkappa, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + tmp_3(:,:,b) = 0.d0 + tmp_4(:,:,b) = 0.d0 + do ipoint = 1, n_points_final_grid + + tmp_3(ipoint,1,b) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) + + loc_1 = -2.0d0 * mos_r_in_r_array_transp(ipoint,b) + + tmp_4(ipoint,1,b) = loc_1 * ( Jbarkappa(ipoint,1) * (Jkappa(ipoint,1) + 0.25d0 * Jbarkappa(ipoint,1)) & + + Jbarkappa(ipoint,2) * (Jkappa(ipoint,2) + 0.25d0 * Jbarkappa(ipoint,2)) & + + Jbarkappa(ipoint,3) * (Jkappa(ipoint,3) + 0.25d0 * Jbarkappa(ipoint,3)) ) + + tmp_4(ipoint,2,b) = mos_r_in_r_array_transp(ipoint,b) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2, loc_3, loc_4) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP Jkappa, Jbarkappa, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_2 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,2,b) += loc_1 * ( Jbarkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & + + Jbarkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & + + Jbarkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) + + tmp_4(ipoint,1,b) += loc_2 * ( Jbarkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + Jbarkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + Jbarkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, loc_3) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + do i = 1, elec_beta_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_2 = mos_r_in_r_array_transp(ipoint,b) + + tmp_4(ipoint,1,b) += loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + enddo + enddo + enddo + + do i = elec_beta_num+1, elec_alpha_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_2 = 0.5d0 * mos_r_in_r_array_transp(ipoint,b) + + tmp_4(ipoint,1,b) += loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + call dgemm( 'T', 'N', mo_num, mo_num, 2*n_points_final_grid, 1.d0 & + , tmp_3(1,1,1), 2*n_points_final_grid & + , tmp_4(1,1,1), 2*n_points_final_grid & + , 1.d0, fock_3e_uhf_mo_b_os(1,1), mo_num) + + deallocate(tmp_3, tmp_4) + + + + + ! --- + + fock_3e_uhf_mo_a_os = fock_3e_uhf_mo_b_os + + allocate(tmp_1(n_points_final_grid,1)) + + do ipoint = 1, n_points_final_grid + tmp_1(ipoint,1) = Obarkappa(ipoint) + 2.d0 * Okappa(ipoint) + enddo + + allocate(tmp_2(n_points_final_grid,1,mo_num,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, a, b, i) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP tmp_2) + !$OMP DO COLLAPSE(2) + do a = 1, mo_num + do b = 1, mo_num + + tmp_2(:,1,b,a) = 0.d0 + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + tmp_2(ipoint,1,b,a) += final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemv( 'T', n_points_final_grid, mo_num*mo_num, 1.d0 & + , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & + , tmp_1(1,1), 1 & + , 1.d0, fock_3e_uhf_mo_a_os(1,1), 1) + + deallocate(tmp_1, tmp_2) + + ! --- + + allocate(tmp_3(n_points_final_grid,8,mo_num), tmp_4(n_points_final_grid,8,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b) & + !$OMP SHARED (n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP final_weight_at_r_vector, Jkappa, Jbarkappa, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + tmp_3(:,:,b) = 0.d0 + tmp_4(:,:,b) = 0.d0 + do ipoint = 1, n_points_final_grid + + tmp_3(ipoint,1,b) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) + + tmp_4(ipoint,8,b) = mos_r_in_r_array_transp(ipoint,b) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2, loc_3, loc_4) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP Jkappa, Jbarkappa, tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_2 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,2,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) + tmp_3(ipoint,3,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) + tmp_3(ipoint,4,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) + + tmp_4(ipoint,5,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) + tmp_4(ipoint,6,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) + tmp_4(ipoint,7,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) + enddo + enddo + + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_3 = 2.d0 * loc_1 + loc_2 = mos_r_in_r_array_transp(ipoint,i) + loc_4 = 2.d0 * loc_2 + + tmp_3(ipoint,5,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) + tmp_3(ipoint,6,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) + tmp_3(ipoint,7,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) + + tmp_3(ipoint,8,b) += loc_3 * ( (Jkappa(ipoint,1) + 0.5d0 * Jbarkappa(ipoint,1)) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & + + (Jkappa(ipoint,2) + 0.5d0 * Jbarkappa(ipoint,2)) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & + + (Jkappa(ipoint,3) + 0.5d0 * Jbarkappa(ipoint,3)) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) + + tmp_4(ipoint,1,b) += loc_4 * ( (Jkappa(ipoint,1) + 0.5d0 * Jbarkappa(ipoint,1)) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + (Jkappa(ipoint,2) + 0.5d0 * Jbarkappa(ipoint,2)) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + (Jkappa(ipoint,3) + 0.5d0 * Jbarkappa(ipoint,3)) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + + tmp_4(ipoint,2,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) + tmp_4(ipoint,3,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) + tmp_4(ipoint,4,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) + + tmp_4(ipoint,5,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) + tmp_4(ipoint,6,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) + tmp_4(ipoint,7,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, loc_3) & + !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP tmp_3, tmp_4) + !$OMP DO + do b = 1, mo_num + + do i = 1, elec_beta_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) + loc_2 = mos_r_in_r_array_transp(ipoint,b) + loc_3 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) + + tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) + loc_3 = mos_r_in_r_array_transp(ipoint,j) + + tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,b,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,b,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + + tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,j,i) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & + + int2_grad1_u12_bimo_t(ipoint,2,j,i) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & + + int2_grad1_u12_bimo_t(ipoint,3,j,i) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) + enddo + enddo + enddo + + do i = elec_beta_num+1, elec_alpha_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) + loc_2 = 0.5d0 * mos_r_in_r_array_transp(ipoint,b) + loc_3 = mos_r_in_r_array_transp(ipoint,i) + + tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & + + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & + + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) + + tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + call dgemm( 'T', 'N', mo_num, mo_num, 8*n_points_final_grid, 1.d0 & + , tmp_3(1,1,1), 8*n_points_final_grid & + , tmp_4(1,1,1), 8*n_points_final_grid & + , 1.d0, fock_3e_uhf_mo_a_os(1,1), mo_num) + + deallocate(tmp_3, tmp_4) + deallocate(Jkappa, Okappa) + + !call wall_time(tf) + !print *, ' Wall time for fock_3e_uhf_mo_a_os and fock_3e_uhf_mo_b_os =', tf - ti + +END_PROVIDER + +! --- + diff --git a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f index bb3025f3..baa73c8f 100644 --- a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f +++ b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f @@ -1,768 +1,4 @@ -! --- - -BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)] - - implicit none - integer :: a, b, i, j, ipoint - double precision :: ti, tf - double precision :: loc_1, loc_2, loc_3 - double precision, allocatable :: Okappa(:), Jkappa(:,:) - double precision, allocatable :: tmp_omp_d1(:), tmp_omp_d2(:,:) - double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:) - double precision, allocatable :: tmp_3(:,:,:), tmp_4(:,:,:) - - PROVIDE mo_l_coef mo_r_coef - - print *, ' PROVIDING fock_3e_uhf_mo_cs ...' - call wall_time(ti) - - ! --- - - allocate(Jkappa(n_points_final_grid,3), Okappa(n_points_final_grid)) - Jkappa = 0.d0 - Okappa = 0.d0 - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, tmp_omp_d1, tmp_omp_d2) & - !$OMP SHARED (n_points_final_grid, elec_beta_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, Okappa, Jkappa) - - allocate(tmp_omp_d2(n_points_final_grid,3), tmp_omp_d1(n_points_final_grid)) - tmp_omp_d2 = 0.d0 - tmp_omp_d1 = 0.d0 - - !$OMP DO - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) - tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) - tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) - tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do ipoint = 1, n_points_final_grid - Jkappa(ipoint,1) += tmp_omp_d2(ipoint,1) - Jkappa(ipoint,2) += tmp_omp_d2(ipoint,2) - Jkappa(ipoint,3) += tmp_omp_d2(ipoint,3) - Okappa(ipoint) += tmp_omp_d1(ipoint) - enddo - !$OMP END CRITICAL - - deallocate(tmp_omp_d2, tmp_omp_d1) - - !$OMP END PARALLEL - - ! --- - - allocate(tmp_1(n_points_final_grid,4)) - - do ipoint = 1, n_points_final_grid - - loc_1 = 2.d0 * Okappa(ipoint) - - tmp_1(ipoint,1) = loc_1 * Jkappa(ipoint,1) - tmp_1(ipoint,2) = loc_1 * Jkappa(ipoint,2) - tmp_1(ipoint,3) = loc_1 * Jkappa(ipoint,3) - - tmp_1(ipoint,4) = Okappa(ipoint) - enddo - - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, loc_1, tmp_omp_d2) & - !$OMP SHARED (n_points_final_grid, elec_beta_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp_1) - - allocate(tmp_omp_d2(n_points_final_grid,3)) - tmp_omp_d2 = 0.d0 - - !$OMP DO COLLAPSE(2) - do i = 1, elec_beta_num - do j = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) - - tmp_omp_d2(ipoint,1) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) - tmp_omp_d2(ipoint,2) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) - tmp_omp_d2(ipoint,3) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do ipoint = 1, n_points_final_grid - tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) - tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) - tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) - enddo - !$OMP END CRITICAL - - deallocate(tmp_omp_d2) - !$OMP END PARALLEL - - ! --- - - allocate(tmp_2(n_points_final_grid,4,mo_num,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, a, b) & - !$OMP SHARED (n_points_final_grid, mo_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp_2) - !$OMP DO COLLAPSE(2) - do a = 1, mo_num - do b = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) - tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) - tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, a, b, i) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP tmp_2) - !$OMP DO COLLAPSE(2) - do a = 1, mo_num - do b = 1, mo_num - tmp_2(:,4,b,a) = 0.d0 - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - tmp_2(ipoint,4,b,a) -= final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - ! --- - - call dgemv( 'T', 4*n_points_final_grid, mo_num*mo_num, -2.d0 & - , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & - , tmp_1(1,1), 1 & - , 0.d0, fock_3e_uhf_mo_cs(1,1), 1) - - deallocate(tmp_1, tmp_2) - - ! --- - - allocate(tmp_3(n_points_final_grid,5,mo_num), tmp_4(n_points_final_grid,5,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, loc_1, loc_2) & - !$OMP SHARED (n_points_final_grid, mo_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP final_weight_at_r_vector, Jkappa, tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - tmp_3(:,:,b) = 0.d0 - tmp_4(:,:,b) = 0.d0 - do ipoint = 1, n_points_final_grid - tmp_3(ipoint,1,b) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) - - tmp_4(ipoint,1,b) = -2.d0 * mos_r_in_r_array_transp(ipoint,b) * ( Jkappa(ipoint,1) * Jkappa(ipoint,1) & - + Jkappa(ipoint,2) * Jkappa(ipoint,2) & - + Jkappa(ipoint,3) * Jkappa(ipoint,3) ) - tmp_4(ipoint,5,b) = mos_r_in_r_array_transp(ipoint,b) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP Jkappa, tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) - loc_2 = mos_r_in_r_array_transp(ipoint,i) - - tmp_3(ipoint,2,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) - tmp_3(ipoint,3,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) - tmp_3(ipoint,4,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) - tmp_3(ipoint,5,b) += 2.d0 * loc_1 * ( Jkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & - + Jkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & - + Jkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) - - tmp_4(ipoint,2,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) - tmp_4(ipoint,3,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) - tmp_4(ipoint,4,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) - tmp_4(ipoint,1,b) += 2.d0 * loc_2 * ( Jkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & - + Jkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & - + Jkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, loc_3) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - do i = 1, elec_beta_num - do j = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) - loc_2 = mos_r_in_r_array_transp(ipoint,b) - loc_3 = mos_r_in_r_array_transp(ipoint,i) - - tmp_3(ipoint,5,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) - - tmp_4(ipoint,1,b) += ( loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) & - - loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) ) - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - ! --- - - call dgemm( 'T', 'N', mo_num, mo_num, 5*n_points_final_grid, 1.d0 & - , tmp_3(1,1,1), 5*n_points_final_grid & - , tmp_4(1,1,1), 5*n_points_final_grid & - , 1.d0, fock_3e_uhf_mo_cs(1,1), mo_num) - - deallocate(tmp_3, tmp_4) - deallocate(Jkappa, Okappa) - - ! --- - - call wall_time(tf) - print *, ' total Wall time for fock_3e_uhf_mo_cs =', tf - ti - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs_old, (mo_num, mo_num)] - - implicit none - integer :: a, b, i, j - double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia - double precision :: ti, tf - double precision, allocatable :: tmp(:,:) - - PROVIDE mo_l_coef mo_r_coef - call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) - - print *, ' PROVIDING fock_3e_uhf_mo_cs_old ...' - call wall_time(ti) - - fock_3e_uhf_mo_cs_old = 0.d0 - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & - !$OMP SHARED (mo_num, elec_beta_num, fock_3e_uhf_mo_cs_old) - - allocate(tmp(mo_num,mo_num)) - tmp = 0.d0 - - !$OMP DO - do a = 1, mo_num - do b = 1, mo_num - - do j = 1, elec_beta_num - do i = 1, elec_beta_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( 4.d0 * I_bij_aij & - + I_bij_ija & - + I_bij_jai & - - 2.d0 * I_bij_aji & - - 2.d0 * I_bij_iaj & - - 2.d0 * I_bij_jia ) - - enddo - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do a = 1, mo_num - do b = 1, mo_num - fock_3e_uhf_mo_cs_old(b,a) += tmp(b,a) - enddo - enddo - !$OMP END CRITICAL - - deallocate(tmp) - !$OMP END PARALLEL - - call wall_time(tf) - print *, ' total Wall time for fock_3e_uhf_mo_cs_old =', tf - ti - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a_old, (mo_num, mo_num)] - - BEGIN_DOC - ! - ! ALPHA part of the Fock matrix from three-electron terms - ! - ! WARNING :: non hermitian if bi-ortho MOS used - ! - END_DOC - - implicit none - integer :: a, b, i, j, o - double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia - double precision :: ti, tf - double precision, allocatable :: tmp(:,:) - - PROVIDE mo_l_coef mo_r_coef - PROVIDE fock_3e_uhf_mo_cs - - print *, ' Providing fock_3e_uhf_mo_a_old ...' - call wall_time(ti) - - o = elec_beta_num + 1 - call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) - - PROVIDE fock_3e_uhf_mo_cs - fock_3e_uhf_mo_a_old = fock_3e_uhf_mo_cs - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & - !$OMP SHARED (mo_num, o, elec_alpha_num, elec_beta_num, fock_3e_uhf_mo_a_old) - - allocate(tmp(mo_num,mo_num)) - tmp = 0.d0 - - !$OMP DO - do a = 1, mo_num - do b = 1, mo_num - - ! --- - - do j = o, elec_alpha_num - do i = 1, elec_beta_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & - + I_bij_ija & - + I_bij_jai & - - I_bij_aji & - - I_bij_iaj & - - 2.d0 * I_bij_jia ) - - enddo - enddo - - ! --- - - do j = 1, elec_beta_num - do i = o, elec_alpha_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & - + I_bij_ija & - + I_bij_jai & - - I_bij_aji & - - 2.d0 * I_bij_iaj & - - I_bij_jia ) - - enddo - enddo - - ! --- - - do j = o, elec_alpha_num - do i = o, elec_alpha_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( I_bij_aij & - + I_bij_ija & - + I_bij_jai & - - I_bij_aji & - - I_bij_iaj & - - I_bij_jia ) - - enddo - enddo - - ! --- - - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do a = 1, mo_num - do b = 1, mo_num - fock_3e_uhf_mo_a_old(b,a) += tmp(b,a) - enddo - enddo - !$OMP END CRITICAL - - deallocate(tmp) - !$OMP END PARALLEL - - call wall_time(tf) - print *, ' Wall time for fock_3e_uhf_mo_a_old =', tf - ti - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b_old, (mo_num, mo_num)] - - BEGIN_DOC - ! - ! BETA part of the Fock matrix from three-electron terms - ! - ! WARNING :: non hermitian if bi-ortho MOS used - ! - END_DOC - - implicit none - integer :: a, b, i, j, o - double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia - double precision :: ti, tf - double precision, allocatable :: tmp(:,:) - - PROVIDE mo_l_coef mo_r_coef - - print *, ' PROVIDING fock_3e_uhf_mo_b_old ...' - call wall_time(ti) - - o = elec_beta_num + 1 - call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) - - PROVIDE fock_3e_uhf_mo_cs_old - fock_3e_uhf_mo_b_old = fock_3e_uhf_mo_cs_old - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & - !$OMP SHARED (mo_num, o, elec_alpha_num, elec_beta_num, fock_3e_uhf_mo_b_old) - - allocate(tmp(mo_num,mo_num)) - tmp = 0.d0 - - !$OMP DO - do a = 1, mo_num - do b = 1, mo_num - - ! --- - - do j = o, elec_alpha_num - do i = 1, elec_beta_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & - - I_bij_aji & - - I_bij_iaj ) - - enddo - enddo - - ! --- - - do j = 1, elec_beta_num - do i = o, elec_alpha_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & - - I_bij_aji & - - I_bij_jia ) - - enddo - enddo - - ! --- - - do j = o, elec_alpha_num - do i = o, elec_alpha_num - - call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) - call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) - call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) - call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) - call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) - call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) - - tmp(b,a) -= 0.5d0 * ( I_bij_aij & - - I_bij_aji ) - - enddo - enddo - - ! --- - - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do a = 1, mo_num - do b = 1, mo_num - fock_3e_uhf_mo_b_old(b,a) += tmp(b,a) - enddo - enddo - !$OMP END CRITICAL - - deallocate(tmp) - !$OMP END PARALLEL - - call wall_time(tf) - print *, ' total Wall time for fock_3e_uhf_mo_b_old =', tf - ti - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_a, (ao_num, ao_num)] - - BEGIN_DOC - ! - ! Equations (B6) and (B7) - ! - ! g <--> gamma - ! d <--> delta - ! e <--> eta - ! k <--> kappa - ! - END_DOC - - implicit none - integer :: g, d, e, k, mu, nu - double precision :: dm_ge_a, dm_ge_b, dm_ge - double precision :: dm_dk_a, dm_dk_b, dm_dk - double precision :: i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu - double precision :: ti, tf - double precision, allocatable :: f_tmp(:,:) - - print *, ' PROVIDING fock_3e_uhf_ao_a ...' - call wall_time(ti) - - fock_3e_uhf_ao_a = 0.d0 - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp, & - !$OMP i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) & - !$OMP SHARED (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_a) - - allocate(f_tmp(ao_num,ao_num)) - f_tmp = 0.d0 - - !$OMP DO - do g = 1, ao_num - do e = 1, ao_num - dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e) - dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e) - dm_ge = dm_ge_a + dm_ge_b - do d = 1, ao_num - do k = 1, ao_num - dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k) - dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k) - dm_dk = dm_dk_a + dm_dk_b - do mu = 1, ao_num - do nu = 1, ao_num - call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek) - call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu) - call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue) - call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke) - call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk) - call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu) - f_tmp(mu,nu) -= 0.5d0 * ( dm_ge * dm_dk * i_mugd_nuek & - + dm_ge_a * dm_dk_a * i_mugd_eknu & - + dm_ge_a * dm_dk_a * i_mugd_knue & - - dm_ge_a * dm_dk * i_mugd_enuk & - - dm_ge * dm_dk_a * i_mugd_kenu & - - dm_ge_a * dm_dk_a * i_mugd_nuke & - - dm_ge_b * dm_dk_b * i_mugd_nuke ) - enddo - enddo - enddo - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do mu = 1, ao_num - do nu = 1, ao_num - fock_3e_uhf_ao_a(mu,nu) += f_tmp(mu,nu) - enddo - enddo - !$OMP END CRITICAL - - deallocate(f_tmp) - !$OMP END PARALLEL - - call wall_time(tf) - print *, ' total Wall time for fock_3e_uhf_ao_a =', tf - ti - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_b, (ao_num, ao_num)] - - BEGIN_DOC - ! - ! Equations (B6) and (B7) - ! - ! g <--> gamma - ! d <--> delta - ! e <--> eta - ! k <--> kappa - ! - END_DOC - - implicit none - integer :: g, d, e, k, mu, nu - double precision :: dm_ge_a, dm_ge_b, dm_ge - double precision :: dm_dk_a, dm_dk_b, dm_dk - double precision :: i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu - double precision :: ti, tf - double precision, allocatable :: f_tmp(:,:) - - print *, ' PROVIDING fock_3e_uhf_ao_b ...' - call wall_time(ti) - - fock_3e_uhf_ao_b = 0.d0 - - !$OMP PARALLEL DEFAULT (NONE) & - !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp, & - !$OMP i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) & - !$OMP SHARED (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_b) - - allocate(f_tmp(ao_num,ao_num)) - f_tmp = 0.d0 - - !$OMP DO - do g = 1, ao_num - do e = 1, ao_num - dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e) - dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e) - dm_ge = dm_ge_a + dm_ge_b - do d = 1, ao_num - do k = 1, ao_num - dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k) - dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k) - dm_dk = dm_dk_a + dm_dk_b - do mu = 1, ao_num - do nu = 1, ao_num - call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek) - call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu) - call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue) - call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke) - call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk) - call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu) - f_tmp(mu,nu) -= 0.5d0 * ( dm_ge * dm_dk * i_mugd_nuek & - + dm_ge_b * dm_dk_b * i_mugd_eknu & - + dm_ge_b * dm_dk_b * i_mugd_knue & - - dm_ge_b * dm_dk * i_mugd_enuk & - - dm_ge * dm_dk_b * i_mugd_kenu & - - dm_ge_b * dm_dk_b * i_mugd_nuke & - - dm_ge_a * dm_dk_a * i_mugd_nuke ) - enddo - enddo - enddo - enddo - enddo - enddo - !$OMP END DO NOWAIT - - !$OMP CRITICAL - do mu = 1, ao_num - do nu = 1, ao_num - fock_3e_uhf_ao_b(mu,nu) += f_tmp(mu,nu) - enddo - enddo - !$OMP END CRITICAL - - deallocate(f_tmp) - !$OMP END PARALLEL - - call wall_time(tf) - print *, ' total Wall time for fock_3e_uhf_ao_b =', tf - ti - -END_PROVIDER - ! --- BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] @@ -770,414 +6,36 @@ END_PROVIDER BEGIN_DOC ! - ! Open Shell part of the Fock matrix from three-electron terms + ! Fock matrix from three-electron terms ! ! WARNING :: non hermitian if bi-ortho MOS used ! END_DOC implicit none - integer :: a, b, i, j, ipoint - double precision :: loc_1, loc_2, loc_3, loc_4 - double precision :: ti, tf - double precision, allocatable :: Okappa(:), Jkappa(:,:), Obarkappa(:), Jbarkappa(:,:) - double precision, allocatable :: tmp_omp_d1(:), tmp_omp_d2(:,:) - double precision, allocatable :: tmp_1(:,:), tmp_2(:,:,:,:) - double precision, allocatable :: tmp_3(:,:,:), tmp_4(:,:,:) + double precision :: ti, tf PROVIDE mo_l_coef mo_r_coef - PROVIDE fock_3e_uhf_mo_cs - print *, ' Providing fock_3e_uhf_mo_a and fock_3e_uhf_mo_b ...' - call wall_time(ti) + !print *, ' Providing fock_3e_uhf_mo_a and fock_3e_uhf_mo_b ...' + !call wall_time(ti) + ! CLOSED-SHELL PART PROVIDE fock_3e_uhf_mo_cs fock_3e_uhf_mo_a = fock_3e_uhf_mo_cs fock_3e_uhf_mo_b = fock_3e_uhf_mo_cs - ! --- + if(elec_alpha_num .ne. elec_beta_num) then - allocate(Jkappa(n_points_final_grid,3), Okappa(n_points_final_grid)) - allocate(Jbarkappa(n_points_final_grid,3), Obarkappa(n_points_final_grid)) - Jkappa = 0.d0 - Okappa = 0.d0 - Jbarkappa = 0.d0 - Obarkappa = 0.d0 + ! OPEN-SHELL PART + PROVIDE fock_3e_uhf_mo_a_os fock_3e_uhf_mo_b_os - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, tmp_omp_d1, tmp_omp_d2) & - !$OMP SHARED (n_points_final_grid, elec_beta_num, elec_alpha_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, Okappa, Jkappa, Obarkappa, Jbarkappa) + fock_3e_uhf_mo_a += fock_3e_uhf_mo_a_os + fock_3e_uhf_mo_b += fock_3e_uhf_mo_b_os + endif - allocate(tmp_omp_d2(n_points_final_grid,3), tmp_omp_d1(n_points_final_grid)) - - tmp_omp_d2 = 0.d0 - tmp_omp_d1 = 0.d0 - !$OMP DO - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) - tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) - tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) - tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - !$OMP END DO NOWAIT - !$OMP CRITICAL - do ipoint = 1, n_points_final_grid - Jkappa(ipoint,1) += tmp_omp_d2(ipoint,1) - Jkappa(ipoint,2) += tmp_omp_d2(ipoint,2) - Jkappa(ipoint,3) += tmp_omp_d2(ipoint,3) - Okappa(ipoint) += tmp_omp_d1(ipoint) - enddo - !$OMP END CRITICAL - - tmp_omp_d2 = 0.d0 - tmp_omp_d1 = 0.d0 - !$OMP DO - do i = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - tmp_omp_d2(ipoint,1) += int2_grad1_u12_bimo_t(ipoint,1,i,i) - tmp_omp_d2(ipoint,2) += int2_grad1_u12_bimo_t(ipoint,2,i,i) - tmp_omp_d2(ipoint,3) += int2_grad1_u12_bimo_t(ipoint,3,i,i) - tmp_omp_d1(ipoint) += mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - enddo - enddo - !$OMP END DO NOWAIT - !$OMP CRITICAL - do ipoint = 1, n_points_final_grid - Jbarkappa(ipoint,1) += tmp_omp_d2(ipoint,1) - Jbarkappa(ipoint,2) += tmp_omp_d2(ipoint,2) - Jbarkappa(ipoint,3) += tmp_omp_d2(ipoint,3) - Obarkappa(ipoint) += tmp_omp_d1(ipoint) - enddo - !$OMP END CRITICAL - - deallocate(tmp_omp_d2, tmp_omp_d1) - !$OMP END PARALLEL - - ! --- - - allocate(tmp_1(n_points_final_grid,5)) - - do ipoint = 1, n_points_final_grid - - loc_1 = -2.d0 * Okappa (ipoint) - loc_2 = -2.d0 * Obarkappa(ipoint) - loc_3 = Obarkappa(ipoint) - - tmp_1(ipoint,1) = (loc_1 - loc_3) * Jbarkappa(ipoint,1) + loc_2 * Jkappa(ipoint,1) - tmp_1(ipoint,2) = (loc_1 - loc_3) * Jbarkappa(ipoint,2) + loc_2 * Jkappa(ipoint,2) - tmp_1(ipoint,3) = (loc_1 - loc_3) * Jbarkappa(ipoint,3) + loc_2 * Jkappa(ipoint,3) - - tmp_1(ipoint,4) = Obarkappa(ipoint) - tmp_1(ipoint,5) = loc_3 - loc_1 - enddo - - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, loc_1, loc_2, tmp_omp_d2) & - !$OMP SHARED (n_points_final_grid, elec_beta_num, elec_alpha_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, tmp_1) - - allocate(tmp_omp_d2(n_points_final_grid,3)) - - tmp_omp_d2 = 0.d0 - !$OMP DO COLLAPSE(2) - do i = 1, elec_beta_num - do j = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) - loc_2 = mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) - - tmp_omp_d2(ipoint,1) += loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,1,j,i) - tmp_omp_d2(ipoint,2) += loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,2,j,i) - tmp_omp_d2(ipoint,3) += loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) + loc_2 * int2_grad1_u12_bimo_t(ipoint,3,j,i) - enddo - enddo - enddo - !$OMP END DO NOWAIT - !$OMP CRITICAL - do ipoint = 1, n_points_final_grid - tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) - tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) - tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) - enddo - !$OMP END CRITICAL - - tmp_omp_d2 = 0.d0 - !$OMP DO COLLAPSE(2) - do i = elec_beta_num+1, elec_alpha_num - do j = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - loc_1 = mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) - - tmp_omp_d2(ipoint,1) += loc_1 * int2_grad1_u12_bimo_t(ipoint,1,i,j) - tmp_omp_d2(ipoint,2) += loc_1 * int2_grad1_u12_bimo_t(ipoint,2,i,j) - tmp_omp_d2(ipoint,3) += loc_1 * int2_grad1_u12_bimo_t(ipoint,3,i,j) - enddo - enddo - enddo - !$OMP END DO NOWAIT - !$OMP CRITICAL - do ipoint = 1, n_points_final_grid - tmp_1(ipoint,1) += tmp_omp_d2(ipoint,1) - tmp_1(ipoint,2) += tmp_omp_d2(ipoint,2) - tmp_1(ipoint,3) += tmp_omp_d2(ipoint,3) - enddo - !$OMP END CRITICAL - - deallocate(tmp_omp_d2) - !$OMP END PARALLEL - - ! --- - - allocate(tmp_2(n_points_final_grid,5,mo_num,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, a, b) & - !$OMP SHARED (n_points_final_grid, mo_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp_2) - !$OMP DO COLLAPSE(2) - do a = 1, mo_num - do b = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp_2(ipoint,1,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,b,a) - tmp_2(ipoint,2,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,b,a) - tmp_2(ipoint,3,b,a) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,b,a) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, a, b, i) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP tmp_2) - !$OMP DO COLLAPSE(2) - do a = 1, mo_num - do b = 1, mo_num - - tmp_2(:,4,b,a) = 0.d0 - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - tmp_2(ipoint,4,b,a) += final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) - enddo - enddo - - tmp_2(:,5,b,a) = 0.d0 - do i = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - tmp_2(ipoint,5,b,a) += final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,a) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,a) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,a) ) - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - ! --- - - call dgemv( 'T', 5*n_points_final_grid, mo_num*mo_num, 1.d0 & - , tmp_2(1,1,1,1), size(tmp_2, 1) * size(tmp_2, 2) & - , tmp_1(1,1), 1 & - , 1.d0, fock_3e_uhf_mo_a(1,1), 1) - - deallocate(tmp_1, tmp_2) - - ! --- - - allocate(tmp_3(n_points_final_grid,8,mo_num), tmp_4(n_points_final_grid,8,mo_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, loc_1, loc_2) & - !$OMP SHARED (n_points_final_grid, mo_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP final_weight_at_r_vector, Jkappa, Jbarkappa, tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - tmp_3(:,:,b) = 0.d0 - tmp_4(:,:,b) = 0.d0 - do ipoint = 1, n_points_final_grid - - tmp_3(ipoint,1,b) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,b) - - loc_1 = -2.0d0 * mos_r_in_r_array_transp(ipoint,b) - - tmp_4(ipoint,1,b) = loc_1 * ( Jbarkappa(ipoint,1) * (Jkappa(ipoint,1) + 0.25d0 * Jbarkappa(ipoint,1)) & - + Jbarkappa(ipoint,2) * (Jkappa(ipoint,2) + 0.25d0 * Jbarkappa(ipoint,2)) & - + Jbarkappa(ipoint,3) * (Jkappa(ipoint,3) + 0.25d0 * Jbarkappa(ipoint,3)) ) - - tmp_4(ipoint,8,b) = mos_r_in_r_array_transp(ipoint,b) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, i, loc_1, loc_2, loc_3, loc_4) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP Jkappa, Jbarkappa, tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - - do i = 1, elec_beta_num - do ipoint = 1, n_points_final_grid - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) - loc_2 = mos_r_in_r_array_transp(ipoint,i) - - tmp_3(ipoint,2,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) - tmp_3(ipoint,3,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) - tmp_3(ipoint,4,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) - tmp_3(ipoint,8,b) += loc_1 * ( Jbarkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & - + Jbarkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & - + Jbarkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) - - tmp_4(ipoint,5,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) - tmp_4(ipoint,6,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) - tmp_4(ipoint,7,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) - tmp_4(ipoint,1,b) += loc_2 * ( Jbarkappa(ipoint,1) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & - + Jbarkappa(ipoint,2) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & - + Jbarkappa(ipoint,3) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) - enddo - enddo - - do i = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) - loc_3 = 2.d0 * loc_1 - loc_2 = mos_r_in_r_array_transp(ipoint,i) - loc_4 = 2.d0 * loc_2 - - tmp_3(ipoint,5,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,1,b,i) - tmp_3(ipoint,6,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,2,b,i) - tmp_3(ipoint,7,b) -= loc_1 * int2_grad1_u12_bimo_t(ipoint,3,b,i) - - tmp_3(ipoint,8,b) += loc_3 * ( (Jkappa(ipoint,1) + 0.5d0 * Jbarkappa(ipoint,1)) * int2_grad1_u12_bimo_t(ipoint,1,b,i) & - + (Jkappa(ipoint,2) + 0.5d0 * Jbarkappa(ipoint,2)) * int2_grad1_u12_bimo_t(ipoint,2,b,i) & - + (Jkappa(ipoint,3) + 0.5d0 * Jbarkappa(ipoint,3)) * int2_grad1_u12_bimo_t(ipoint,3,b,i) ) - - tmp_4(ipoint,1,b) += loc_4 * ( (Jkappa(ipoint,1) + 0.5d0 * Jbarkappa(ipoint,1)) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & - + (Jkappa(ipoint,2) + 0.5d0 * Jbarkappa(ipoint,2)) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & - + (Jkappa(ipoint,3) + 0.5d0 * Jbarkappa(ipoint,3)) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) - - tmp_4(ipoint,2,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) - tmp_4(ipoint,3,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) - tmp_4(ipoint,4,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) - - tmp_4(ipoint,5,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,1,i,b) - tmp_4(ipoint,6,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,2,i,b) - tmp_4(ipoint,7,b) += loc_2 * int2_grad1_u12_bimo_t(ipoint,3,i,b) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, b, i, j, loc_1, loc_2, loc_3) & - !$OMP SHARED (n_points_final_grid, mo_num, elec_beta_num, elec_alpha_num, & - !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP tmp_3, tmp_4) - !$OMP DO - do b = 1, mo_num - - do i = 1, elec_beta_num - do j = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) - loc_2 = mos_r_in_r_array_transp(ipoint,b) - loc_3 = mos_r_in_r_array_transp(ipoint,i) - - tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) - - tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) - - tmp_4(ipoint,1,b) += loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) - loc_3 = mos_r_in_r_array_transp(ipoint,j) - - tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,b,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,b,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) - - tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,j,i) * int2_grad1_u12_bimo_t(ipoint,1,i,b) & - + int2_grad1_u12_bimo_t(ipoint,2,j,i) * int2_grad1_u12_bimo_t(ipoint,2,i,b) & - + int2_grad1_u12_bimo_t(ipoint,3,j,i) * int2_grad1_u12_bimo_t(ipoint,3,i,b) ) - enddo - enddo - enddo - - do i = elec_beta_num+1, elec_alpha_num - do j = elec_beta_num+1, elec_alpha_num - do ipoint = 1, n_points_final_grid - - loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) - loc_2 = 0.5d0 * mos_r_in_r_array_transp(ipoint,b) - loc_3 = mos_r_in_r_array_transp(ipoint,i) - - tmp_3(ipoint,8,b) -= loc_1 * ( int2_grad1_u12_bimo_t(ipoint,1,b,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & - + int2_grad1_u12_bimo_t(ipoint,2,b,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & - + int2_grad1_u12_bimo_t(ipoint,3,b,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) - - tmp_4(ipoint,1,b) += loc_2 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) - - tmp_4(ipoint,1,b) -= loc_3 * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,b) & - + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,b) & - + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,b) ) - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - ! --- - - call dgemm( 'T', 'N', mo_num, mo_num, 8*n_points_final_grid, 1.d0 & - , tmp_3(1,1,1), 8*n_points_final_grid & - , tmp_4(1,1,1), 8*n_points_final_grid & - , 1.d0, fock_3e_uhf_mo_a(1,1), mo_num) - - deallocate(tmp_3, tmp_4) - deallocate(Jkappa, Okappa) - - call wall_time(tf) - print *, ' Wall time for fock_3e_uhf_mo_a =', tf - ti + !call wall_time(tf) + !print *, ' Wall time for fock_3e_uhf_mo_a and fock_3e_uhf_mo_b =', tf - ti END_PROVIDER diff --git a/src/tc_scf/fock_3e_bi_ortho_uhf_old.irp.f b/src/tc_scf/fock_3e_bi_ortho_uhf_old.irp.f new file mode 100644 index 00000000..3bf6bd85 --- /dev/null +++ b/src/tc_scf/fock_3e_bi_ortho_uhf_old.irp.f @@ -0,0 +1,490 @@ + +! --- + +BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs_old, (mo_num, mo_num)] + + implicit none + integer :: a, b, i, j + double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia + double precision :: ti, tf + double precision, allocatable :: tmp(:,:) + + PROVIDE mo_l_coef mo_r_coef + call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) + + !print *, ' PROVIDING fock_3e_uhf_mo_cs_old ...' + !call wall_time(ti) + + fock_3e_uhf_mo_cs_old = 0.d0 + + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & + !$OMP SHARED (mo_num, elec_beta_num, fock_3e_uhf_mo_cs_old) + + allocate(tmp(mo_num,mo_num)) + tmp = 0.d0 + + !$OMP DO + do a = 1, mo_num + do b = 1, mo_num + + do j = 1, elec_beta_num + do i = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) + call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) + call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) + call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) + call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) + call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) + + tmp(b,a) -= 0.5d0 * ( 4.d0 * I_bij_aij & + + I_bij_ija & + + I_bij_jai & + - 2.d0 * I_bij_aji & + - 2.d0 * I_bij_iaj & + - 2.d0 * I_bij_jia ) + + enddo + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do a = 1, mo_num + do b = 1, mo_num + fock_3e_uhf_mo_cs_old(b,a) += tmp(b,a) + enddo + enddo + !$OMP END CRITICAL + + deallocate(tmp) + !$OMP END PARALLEL + + !call wall_time(tf) + !print *, ' total Wall time for fock_3e_uhf_mo_cs_old =', tf - ti + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a_old, (mo_num, mo_num)] + + BEGIN_DOC + ! + ! ALPHA part of the Fock matrix from three-electron terms + ! + ! WARNING :: non hermitian if bi-ortho MOS used + ! + END_DOC + + implicit none + integer :: a, b, i, j, o + double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia + double precision :: ti, tf + double precision, allocatable :: tmp(:,:) + + PROVIDE mo_l_coef mo_r_coef + PROVIDE fock_3e_uhf_mo_cs + + !print *, ' Providing fock_3e_uhf_mo_a_old ...' + !call wall_time(ti) + + o = elec_beta_num + 1 + call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) + + PROVIDE fock_3e_uhf_mo_cs_old + fock_3e_uhf_mo_a_old = fock_3e_uhf_mo_cs_old + + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & + !$OMP SHARED (mo_num, o, elec_alpha_num, elec_beta_num, fock_3e_uhf_mo_a_old) + + allocate(tmp(mo_num,mo_num)) + tmp = 0.d0 + + !$OMP DO + do a = 1, mo_num + do b = 1, mo_num + + ! --- + + do j = o, elec_alpha_num + do i = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) + call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) + call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) + call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) + call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) + call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) + + tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & + + I_bij_ija & + + I_bij_jai & + - I_bij_aji & + - I_bij_iaj & + - 2.d0 * I_bij_jia ) + + enddo + enddo + + ! --- + + do j = 1, elec_beta_num + do i = o, elec_alpha_num + + call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) + call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) + call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) + call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) + call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) + call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) + + tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & + + I_bij_ija & + + I_bij_jai & + - I_bij_aji & + - 2.d0 * I_bij_iaj & + - I_bij_jia ) + + enddo + enddo + + ! --- + + do j = o, elec_alpha_num + do i = o, elec_alpha_num + + call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) + call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) + call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) + call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) + call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) + call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) + + tmp(b,a) -= 0.5d0 * ( I_bij_aij & + + I_bij_ija & + + I_bij_jai & + - I_bij_aji & + - I_bij_iaj & + - I_bij_jia ) + + enddo + enddo + + ! --- + + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do a = 1, mo_num + do b = 1, mo_num + fock_3e_uhf_mo_a_old(b,a) += tmp(b,a) + enddo + enddo + !$OMP END CRITICAL + + deallocate(tmp) + !$OMP END PARALLEL + + !call wall_time(tf) + !print *, ' Wall time for fock_3e_uhf_mo_a_old =', tf - ti + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b_old, (mo_num, mo_num)] + + BEGIN_DOC + ! + ! BETA part of the Fock matrix from three-electron terms + ! + ! WARNING :: non hermitian if bi-ortho MOS used + ! + END_DOC + + implicit none + integer :: a, b, i, j, o + double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia + double precision :: ti, tf + double precision, allocatable :: tmp(:,:) + + PROVIDE mo_l_coef mo_r_coef + + !print *, ' PROVIDING fock_3e_uhf_mo_b_old ...' + !call wall_time(ti) + + o = elec_beta_num + 1 + call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij) + + PROVIDE fock_3e_uhf_mo_cs_old + fock_3e_uhf_mo_b_old = fock_3e_uhf_mo_cs_old + + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) & + !$OMP SHARED (mo_num, o, elec_alpha_num, elec_beta_num, fock_3e_uhf_mo_b_old) + + allocate(tmp(mo_num,mo_num)) + tmp = 0.d0 + + !$OMP DO + do a = 1, mo_num + do b = 1, mo_num + + ! --- + + do j = o, elec_alpha_num + do i = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) + call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) + call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) + call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) + call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) + call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) + + tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & + - I_bij_aji & + - I_bij_iaj ) + + enddo + enddo + + ! --- + + do j = 1, elec_beta_num + do i = o, elec_alpha_num + + call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) + call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) + call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) + call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) + call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) + call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) + + tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij & + - I_bij_aji & + - I_bij_jia ) + + enddo + enddo + + ! --- + + do j = o, elec_alpha_num + do i = o, elec_alpha_num + + call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij) + call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija) + call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai) + call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji) + call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj) + call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia) + + tmp(b,a) -= 0.5d0 * ( I_bij_aij & + - I_bij_aji ) + + enddo + enddo + + ! --- + + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do a = 1, mo_num + do b = 1, mo_num + fock_3e_uhf_mo_b_old(b,a) += tmp(b,a) + enddo + enddo + !$OMP END CRITICAL + + deallocate(tmp) + !$OMP END PARALLEL + + !call wall_time(tf) + !print *, ' total Wall time for fock_3e_uhf_mo_b_old =', tf - ti + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_a, (ao_num, ao_num)] + + BEGIN_DOC + ! + ! Equations (B6) and (B7) + ! + ! g <--> gamma + ! d <--> delta + ! e <--> eta + ! k <--> kappa + ! + END_DOC + + implicit none + integer :: g, d, e, k, mu, nu + double precision :: dm_ge_a, dm_ge_b, dm_ge + double precision :: dm_dk_a, dm_dk_b, dm_dk + double precision :: i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu + double precision :: ti, tf + double precision, allocatable :: f_tmp(:,:) + + !print *, ' PROVIDING fock_3e_uhf_ao_a ...' + !call wall_time(ti) + + fock_3e_uhf_ao_a = 0.d0 + + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp, & + !$OMP i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) & + !$OMP SHARED (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_a) + + allocate(f_tmp(ao_num,ao_num)) + f_tmp = 0.d0 + + !$OMP DO + do g = 1, ao_num + do e = 1, ao_num + dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e) + dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e) + dm_ge = dm_ge_a + dm_ge_b + do d = 1, ao_num + do k = 1, ao_num + dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k) + dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k) + dm_dk = dm_dk_a + dm_dk_b + do mu = 1, ao_num + do nu = 1, ao_num + call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek) + call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu) + call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue) + call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke) + call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk) + call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu) + f_tmp(mu,nu) -= 0.5d0 * ( dm_ge * dm_dk * i_mugd_nuek & + + dm_ge_a * dm_dk_a * i_mugd_eknu & + + dm_ge_a * dm_dk_a * i_mugd_knue & + - dm_ge_a * dm_dk * i_mugd_enuk & + - dm_ge * dm_dk_a * i_mugd_kenu & + - dm_ge_a * dm_dk_a * i_mugd_nuke & + - dm_ge_b * dm_dk_b * i_mugd_nuke ) + enddo + enddo + enddo + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do mu = 1, ao_num + do nu = 1, ao_num + fock_3e_uhf_ao_a(mu,nu) += f_tmp(mu,nu) + enddo + enddo + !$OMP END CRITICAL + + deallocate(f_tmp) + !$OMP END PARALLEL + + !call wall_time(tf) + !print *, ' total Wall time for fock_3e_uhf_ao_a =', tf - ti + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_b, (ao_num, ao_num)] + + BEGIN_DOC + ! + ! Equations (B6) and (B7) + ! + ! g <--> gamma + ! d <--> delta + ! e <--> eta + ! k <--> kappa + ! + END_DOC + + implicit none + integer :: g, d, e, k, mu, nu + double precision :: dm_ge_a, dm_ge_b, dm_ge + double precision :: dm_dk_a, dm_dk_b, dm_dk + double precision :: i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu + double precision :: ti, tf + double precision, allocatable :: f_tmp(:,:) + + !print *, ' PROVIDING fock_3e_uhf_ao_b ...' + !call wall_time(ti) + + fock_3e_uhf_ao_b = 0.d0 + + !$OMP PARALLEL DEFAULT (NONE) & + !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp, & + !$OMP i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) & + !$OMP SHARED (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_b) + + allocate(f_tmp(ao_num,ao_num)) + f_tmp = 0.d0 + + !$OMP DO + do g = 1, ao_num + do e = 1, ao_num + dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e) + dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e) + dm_ge = dm_ge_a + dm_ge_b + do d = 1, ao_num + do k = 1, ao_num + dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k) + dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k) + dm_dk = dm_dk_a + dm_dk_b + do mu = 1, ao_num + do nu = 1, ao_num + call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek) + call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu) + call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue) + call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke) + call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk) + call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu) + f_tmp(mu,nu) -= 0.5d0 * ( dm_ge * dm_dk * i_mugd_nuek & + + dm_ge_b * dm_dk_b * i_mugd_eknu & + + dm_ge_b * dm_dk_b * i_mugd_knue & + - dm_ge_b * dm_dk * i_mugd_enuk & + - dm_ge * dm_dk_b * i_mugd_kenu & + - dm_ge_b * dm_dk_b * i_mugd_nuke & + - dm_ge_a * dm_dk_a * i_mugd_nuke ) + enddo + enddo + enddo + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + do mu = 1, ao_num + do nu = 1, ao_num + fock_3e_uhf_ao_b(mu,nu) += f_tmp(mu,nu) + enddo + enddo + !$OMP END CRITICAL + + deallocate(f_tmp) + !$OMP END PARALLEL + + !call wall_time(tf) + !print *, ' total Wall time for fock_3e_uhf_ao_b =', tf - ti + +END_PROVIDER + +! --- + diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f index b925c9df..f09da461 100644 --- a/src/tc_scf/test_int.irp.f +++ b/src/tc_scf/test_int.irp.f @@ -58,6 +58,7 @@ program test_ints call test_fock_3e_uhf_mo_cs() call test_fock_3e_uhf_mo_a() + call test_fock_3e_uhf_mo_b() end From 90ff4af1a093c3190dc9de033a9a2f56b31935ac Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Sat, 2 Sep 2023 17:17:55 +0200 Subject: [PATCH 272/337] fock(3e) DGEMM --- src/tc_scf/fock_tc.irp.f | 1 + src/tc_scf/test_int.irp.f | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/tc_scf/fock_tc.irp.f b/src/tc_scf/fock_tc.irp.f index fcca29ac..282f9873 100644 --- a/src/tc_scf/fock_tc.irp.f +++ b/src/tc_scf/fock_tc.irp.f @@ -227,6 +227,7 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_beta, (mo_num,mo_num) ] call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) & , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) ) + if(three_body_h_tc) then PROVIDE fock_3e_uhf_mo_b Fock_matrix_tc_mo_beta += fock_3e_uhf_mo_b diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f index f09da461..4aa67d04 100644 --- a/src/tc_scf/test_int.irp.f +++ b/src/tc_scf/test_int.irp.f @@ -1120,7 +1120,7 @@ subroutine test_fock_3e_uhf_mo_cs() PROVIDE fock_3e_uhf_mo_cs fock_3e_uhf_mo_cs_old - thr_ih = 1d-10 + thr_ih = 1d-8 norm = 0.d0 diff_tot = 0.d0 @@ -1135,7 +1135,7 @@ subroutine test_fock_3e_uhf_mo_cs() print *, ' problem in fock_3e_uhf_mo_cs on ', j, i print *, ' old value = ', I_old print *, ' new value = ', I_new - stop + !stop endif norm += dabs(I_old) @@ -1159,7 +1159,7 @@ subroutine test_fock_3e_uhf_mo_a() PROVIDE fock_3e_uhf_mo_a fock_3e_uhf_mo_a_old - thr_ih = 1d-10 + thr_ih = 1d-8 norm = 0.d0 diff_tot = 0.d0 @@ -1174,7 +1174,7 @@ subroutine test_fock_3e_uhf_mo_a() print *, ' problem in fock_3e_uhf_mo_a on ', j, i print *, ' old value = ', I_old print *, ' new value = ', I_new - stop + !stop endif norm += dabs(I_old) @@ -1198,7 +1198,7 @@ subroutine test_fock_3e_uhf_mo_b() PROVIDE fock_3e_uhf_mo_b fock_3e_uhf_mo_b_old - thr_ih = 1d-10 + thr_ih = 1d-8 norm = 0.d0 diff_tot = 0.d0 @@ -1213,7 +1213,7 @@ subroutine test_fock_3e_uhf_mo_b() print *, ' problem in fock_3e_uhf_mo_b on ', j, i print *, ' old value = ', I_old print *, ' new value = ', I_new - stop + !stop endif norm += dabs(I_old) From 63f6404f8e3dfcb67a3827ad712713ab2f11735a Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Sat, 2 Sep 2023 18:11:05 +0200 Subject: [PATCH 273/337] added OPENMP on 1/r12 --- src/non_h_ints_mu/total_tc_int.irp.f | 37 ++++++++++++++++---- src/tc_scf/fock_3e_bi_ortho_uhf.irp.f | 50 ++++++++++++++++++++++----- 2 files changed, 72 insertions(+), 15 deletions(-) diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f index 158ee2fb..58103643 100644 --- a/src/non_h_ints_mu/total_tc_int.irp.f +++ b/src/non_h_ints_mu/total_tc_int.irp.f @@ -159,25 +159,48 @@ BEGIN_PROVIDER [double precision, ao_two_e_coul, (ao_num, ao_num, ao_num, ao_num ! END_DOC - integer :: i, j, k, l - double precision :: integral - double precision, external :: get_ao_two_e_integral + integer :: i, j, k, l + double precision :: integral + double precision, allocatable :: tmp(:) + double precision, external :: get_ao_two_e_integral PROVIDE ao_integrals_map + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(ao_num, ao_two_e_coul, ao_integrals_map) & + !$OMP PRIVATE(i, j, k, l) + !$OMP DO do j = 1, ao_num do l = 1, ao_num do i = 1, ao_num do k = 1, ao_num - ! < 1:k, 2:l | 1:i, 2:j > - integral = get_ao_two_e_integral(i, j, k, l, ao_integrals_map) - - ao_two_e_coul(k,i,l,j) = integral + ao_two_e_coul(k,i,l,j) = get_ao_two_e_integral(i, j, k, l, ao_integrals_map) enddo enddo enddo enddo + !$OMP END DO + !$OMP END PARALLEL + + +! TODO +! allocate(tmp(ao_num)) +! +! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,l,j,k,tmp) +! do j = 1, ao_num +! do l = 1, ao_num +! do i = 1, ao_num +! call get_ao_two_e_integrals(i, l, l, ao_num, tmp(1)) +! do k = 1, ao_num +! ao_two_e_coul(k,i,l,j) = tmp(k) +! enddo +! enddo +! enddo +! enddo +! !$OMP END PARALLEL DO +! +! deallocate(tmp) END_PROVIDER diff --git a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f index baa73c8f..63a1e162 100644 --- a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f +++ b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f @@ -1,12 +1,11 @@ ! --- - BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] -&BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)] +BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)] BEGIN_DOC ! - ! Fock matrix from three-electron terms + ! Fock matrix alpha from three-electron terms ! ! WARNING :: non hermitian if bi-ortho MOS used ! @@ -17,25 +16,60 @@ PROVIDE mo_l_coef mo_r_coef - !print *, ' Providing fock_3e_uhf_mo_a and fock_3e_uhf_mo_b ...' + !print *, ' Providing fock_3e_uhf_mo_a ...' !call wall_time(ti) ! CLOSED-SHELL PART PROVIDE fock_3e_uhf_mo_cs fock_3e_uhf_mo_a = fock_3e_uhf_mo_cs - fock_3e_uhf_mo_b = fock_3e_uhf_mo_cs if(elec_alpha_num .ne. elec_beta_num) then ! OPEN-SHELL PART - PROVIDE fock_3e_uhf_mo_a_os fock_3e_uhf_mo_b_os + PROVIDE fock_3e_uhf_mo_a_os fock_3e_uhf_mo_a += fock_3e_uhf_mo_a_os - fock_3e_uhf_mo_b += fock_3e_uhf_mo_b_os endif !call wall_time(tf) - !print *, ' Wall time for fock_3e_uhf_mo_a and fock_3e_uhf_mo_b =', tf - ti + !print *, ' Wall time for fock_3e_uhf_mo_a =', tf - ti + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)] + + BEGIN_DOC + ! + ! Fock matrix beta from three-electron terms + ! + ! WARNING :: non hermitian if bi-ortho MOS used + ! + END_DOC + + implicit none + double precision :: ti, tf + + PROVIDE mo_l_coef mo_r_coef + + !print *, ' Providing and fock_3e_uhf_mo_b ...' + !call wall_time(ti) + + ! CLOSED-SHELL PART + PROVIDE fock_3e_uhf_mo_cs + fock_3e_uhf_mo_b = fock_3e_uhf_mo_cs + + if(elec_alpha_num .ne. elec_beta_num) then + + ! OPEN-SHELL PART + PROVIDE fock_3e_uhf_mo_b_os + + fock_3e_uhf_mo_b += fock_3e_uhf_mo_b_os + endif + + !call wall_time(tf) + !print *, ' Wall time for fock_3e_uhf_mo_b =', tf - ti END_PROVIDER From 38f8b96d414c89664ce89c3d89f2191094f0eedb Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Mon, 4 Sep 2023 22:27:18 +0200 Subject: [PATCH 274/337] numerical integrals: 1 shot --> blocks over r2 --- src/non_h_ints_mu/grad_squared.irp.f | 18 - src/non_h_ints_mu/grad_squared_manu.irp.f | 66 --- src/non_h_ints_mu/jast_deriv.irp.f | 533 ++++++++++++++++++++-- src/non_h_ints_mu/new_grad_tc.irp.f | 16 - src/non_h_ints_mu/tc_integ.irp.f | 314 ++++++++++--- src/non_h_ints_mu/test_non_h_ints.irp.f | 100 +++- src/non_h_ints_mu/total_tc_int.irp.f | 28 +- src/tc_scf/tc_scf.irp.f | 10 +- 8 files changed, 845 insertions(+), 240 deletions(-) diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f index 44a6ae65..8c6d35dc 100644 --- a/src/non_h_ints_mu/grad_squared.irp.f +++ b/src/non_h_ints_mu/grad_squared.irp.f @@ -425,7 +425,6 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao ! an additional term is added here directly instead of ! being added in int2_grad1_u12_square_ao for performance - ! note that the factor PROVIDE int2_u2_j1b2 @@ -465,25 +464,8 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao ! --- deallocate(b_mat) - call sum_A_At(tc_grad_square_ao(1,1,1,1), ao_num*ao_num) - !!$OMP PARALLEL & - !!$OMP DEFAULT (NONE) & - !!$OMP PRIVATE (i, j, k, l) & - !!$OMP SHARED (ac_mat, tc_grad_square_ao, ao_num) - !!$OMP DO SCHEDULE (static) - ! do j = 1, ao_num - ! do l = 1, ao_num - ! do i = 1, ao_num - ! do k = 1, ao_num - ! tc_grad_square_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) - ! enddo - ! enddo - ! enddo - ! enddo - !!$OMP END DO - !!$OMP END PARALLEL endif if(write_tc_integ.and.mpi_master) then diff --git a/src/non_h_ints_mu/grad_squared_manu.irp.f b/src/non_h_ints_mu/grad_squared_manu.irp.f index 66f3c693..dcfeff47 100644 --- a/src/non_h_ints_mu/grad_squared_manu.irp.f +++ b/src/non_h_ints_mu/grad_squared_manu.irp.f @@ -67,72 +67,6 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu deallocate(tmp, b_mat) call sum_A_At(tc_grad_square_ao_test(1,1,1,1), ao_num*ao_num) - !do i = 1, ao_num - ! do j = 1, ao_num - ! do k = i, ao_num - - ! do l = max(j,k), ao_num - ! tc_grad_square_ao_test(i,j,k,l) = 0.5d0 * (tc_grad_square_ao_test(i,j,k,l) + tc_grad_square_ao_test(k,l,i,j)) - ! tc_grad_square_ao_test(k,l,i,j) = tc_grad_square_ao_test(i,j,k,l) - ! end do - - ! !if (j.eq.k) then - ! ! do l = j+1, ao_num - ! ! tc_grad_square_ao_test(i,j,k,l) = 0.5d0 * (tc_grad_square_ao_test(i,j,k,l) + tc_grad_square_ao_test(k,l,i,j)) - ! ! tc_grad_square_ao_test(k,l,i,j) = tc_grad_square_ao_test(i,j,k,l) - ! ! end do - ! !else - ! ! do l = j, ao_num - ! ! tc_grad_square_ao_test(i,j,k,l) = 0.5d0 * (tc_grad_square_ao_test(i,j,k,l) + tc_grad_square_ao_test(k,l,i,j)) - ! ! tc_grad_square_ao_test(k,l,i,j) = tc_grad_square_ao_test(i,j,k,l) - ! ! enddo - ! !endif - - ! enddo - ! enddo - !enddo - !tc_grad_square_ao_test = 2.d0 * tc_grad_square_ao_test - ! !$OMP PARALLEL & - ! !$OMP DEFAULT (NONE) & - ! !$OMP PRIVATE (i, j, k, l) & - ! !$OMP SHARED (tc_grad_square_ao_test, ao_num) - ! !$OMP DO SCHEDULE (static) - ! integer :: ii - ! ii = 0 - ! do j = 1, ao_num - ! do l = 1, ao_num - ! do i = 1, ao_num - ! do k = 1, ao_num - ! if((i.lt.j) .and. (k.lt.l)) cycle - ! ii = ii + 1 - ! tc_grad_square_ao_test(k,i,l,j) = tc_grad_square_ao_test(k,i,l,j) + tc_grad_square_ao_test(l,j,k,i) - ! enddo - ! enddo - ! enddo - ! enddo - ! print *, ' ii =', ii - ! !$OMP END DO - ! !$OMP END PARALLEL - - ! !$OMP PARALLEL & - ! !$OMP DEFAULT (NONE) & - ! !$OMP PRIVATE (i, j, k, l) & - ! !$OMP SHARED (tc_grad_square_ao_test, ao_num) - ! !$OMP DO SCHEDULE (static) - ! do j = 1, ao_num - ! do l = 1, ao_num - ! do i = 1, j-1 - ! do k = 1, l-1 - ! ii = ii + 1 - ! tc_grad_square_ao_test(k,i,l,j) = tc_grad_square_ao_test(l,j,k,i) - ! enddo - ! enddo - ! enddo - ! enddo - ! print *, ' ii =', ii - ! print *, ao_num * ao_num * ao_num * ao_num - ! !$OMP END DO - ! !$OMP END PARALLEL endif diff --git a/src/non_h_ints_mu/jast_deriv.irp.f b/src/non_h_ints_mu/jast_deriv.irp.f index 859f2aa5..082412f9 100644 --- a/src/non_h_ints_mu/jast_deriv.irp.f +++ b/src/non_h_ints_mu/jast_deriv.irp.f @@ -32,7 +32,8 @@ grad1_u12_num = 0.d0 grad1_u12_squared_num = 0.d0 - if(j1b_type .eq. 100) then + if( (j1b_type .eq. 100) .or. & + (j1b_type .ge. 200) .and. (j1b_type .lt. 300) ) then !$OMP PARALLEL & !$OMP DEFAULT (NONE) & @@ -111,42 +112,6 @@ !$OMP END DO !$OMP END PARALLEL - elseif((j1b_type .ge. 200) .and. (j1b_type .lt. 300)) then - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, jpoint, r1, r2, grad1_u2b, dx, dy, dz) & - !$OMP SHARED (n_points_final_grid, n_points_extra_final_grid, final_grid_points, & - !$OMP final_grid_points_extra, grad1_u12_num, grad1_u12_squared_num) - !$OMP DO SCHEDULE (static) - do ipoint = 1, n_points_final_grid ! r1 - - r1(1) = final_grid_points(1,ipoint) - r1(2) = final_grid_points(2,ipoint) - r1(3) = final_grid_points(3,ipoint) - - do jpoint = 1, n_points_extra_final_grid ! r2 - - r2(1) = final_grid_points_extra(1,jpoint) - r2(2) = final_grid_points_extra(2,jpoint) - r2(3) = final_grid_points_extra(3,jpoint) - - call grad1_j12_mu(r1, r2, grad1_u2b) - - dx = grad1_u2b(1) - dy = grad1_u2b(2) - dz = grad1_u2b(3) - - grad1_u12_num(jpoint,ipoint,1) = dx - grad1_u12_num(jpoint,ipoint,2) = dy - grad1_u12_num(jpoint,ipoint,3) = dz - - grad1_u12_squared_num(jpoint,ipoint) = dx*dx + dy*dy + dz*dz - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - else print *, ' j1b_type = ', j1b_type, 'not implemented yet' @@ -158,6 +123,239 @@ END_PROVIDER ! --- +subroutine get_grad1_u12_r1_seq(r1, n_grid2, resx, resy, resz) + + BEGIN_DOC + ! + ! grad_1 u(r1,r2) + ! + ! this will be integrated numerically over r2: + ! we use grid for r1 and extra_grid for r2 + ! + ! for 99 < j1b_type < 199 + ! + ! u(r1,r2) = j12_mu(r12) x v(r1) x v(r2) + ! grad1 u(r1, r2) = [(grad1 j12_mu) v(r1) + j12_mu grad1 v(r1)] v(r2) + ! + END_DOC + + implicit none + integer, intent(in) :: n_grid2 + double precision, intent(in) :: r1(3) + double precision, intent(out) :: resx(n_grid2), resy(n_grid2), resz(n_grid2) + + double precision :: v1b_r1 + double precision :: grad1_v1b(3) + double precision, allocatable :: v1b_r2(:) + double precision, allocatable :: u2b_r12(:) + double precision, allocatable :: gradx1_u2b(:), grady1_u2b(:), gradz1_u2b(:) + double precision, external :: j1b_nucl + + PROVIDE j1b_type + PROVIDE final_grid_points_extra + + if( (j1b_type .eq. 100) .or. & + (j1b_type .ge. 200) .and. (j1b_type .lt. 300) ) then + + call grad1_j12_mu_r1_seq(r1, n_grid2, resx, resy, resz) + + elseif((j1b_type .gt. 100) .and. (j1b_type .lt. 200)) then + + allocate(v1b_r2(n_grid2)) + allocate(u2b_r12(n_grid2)) + allocate(gradx1_u2b(n_grid2)) + allocate(grady1_u2b(n_grid2)) + allocate(gradz1_u2b(n_grid2)) + + v1b_r1 = j1b_nucl(r1) + call grad1_j1b_nucl(r1, grad1_v1b) + + call j1b_nucl_r1_seq(n_grid2, v1b_r2) + call j12_mu_r1_seq(r1, n_grid2, u2b_r12) + call grad1_j12_mu_r1_seq(r1, n_grid2, gradx1_u2b, grady1_u2b, gradz1_u2b) + + resx(:) = (gradx1_u2b(:) * v1b_r1 + u2b_r12(:) * grad1_v1b(1)) * v1b_r2(:) + resy(:) = (grady1_u2b(:) * v1b_r1 + u2b_r12(:) * grad1_v1b(2)) * v1b_r2(:) + resz(:) = (gradz1_u2b(:) * v1b_r1 + u2b_r12(:) * grad1_v1b(3)) * v1b_r2(:) + + deallocate(v1b_r2, u2b_r12, gradx1_u2b, grady1_u2b, gradz1_u2b) + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + + return +end subroutine get_grad1_u12_r1_seq + +! --- + +subroutine get_grad1_u12_squared_r1_seq(r1, n_grid2, res) + + BEGIN_DOC + ! + ! grad_1 u(r1,r2) + ! + ! this will be integrated numerically over r2: + ! we use grid for r1 and extra_grid for r2 + ! + ! for 99 < j1b_type < 199 + ! + ! u(r1,r2) = j12_mu(r12) x v(r1) x v(r2) + ! grad1 u(r1, r2) = [(grad1 j12_mu) v(r1) + j12_mu grad1 v(r1)] v(r2) + ! + END_DOC + + implicit none + integer, intent(in) :: n_grid2 + double precision, intent(in) :: r1(3) + double precision, intent(out) :: res(n_grid2) + + + integer :: jpoint + double precision :: r2(3) + double precision :: v1b_r1, v1b_r2, u2b_r12 + double precision :: grad1_v1b(3), grad1_u2b(3) + double precision :: dx, dy, dz + double precision, external :: j12_mu, j1b_nucl + + PROVIDE j1b_type + PROVIDE final_grid_points_extra + + if( (j1b_type .eq. 100) .or. & + (j1b_type .ge. 200) .and. (j1b_type .lt. 300) ) then + + do jpoint = 1, n_points_extra_final_grid ! r2 + + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + + call grad1_j12_mu(r1, r2, grad1_u2b) + + dx = grad1_u2b(1) + dy = grad1_u2b(2) + dz = grad1_u2b(3) + + res(jpoint) = dx*dx + dy*dy + dz*dz + enddo + + elseif((j1b_type .gt. 100) .and. (j1b_type .lt. 200)) then + + v1b_r1 = j1b_nucl(r1) + call grad1_j1b_nucl(r1, grad1_v1b) + + do jpoint = 1, n_points_extra_final_grid ! r2 + + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + + v1b_r2 = j1b_nucl(r2) + u2b_r12 = j12_mu(r1, r2) + call grad1_j12_mu(r1, r2, grad1_u2b) + + dx = (grad1_u2b(1) * v1b_r1 + u2b_r12 * grad1_v1b(1)) * v1b_r2 + dy = (grad1_u2b(2) * v1b_r1 + u2b_r12 * grad1_v1b(2)) * v1b_r2 + dz = (grad1_u2b(3) * v1b_r1 + u2b_r12 * grad1_v1b(3)) * v1b_r2 + + res(jpoint) = dx*dx + dy*dy + dz*dz + enddo + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + + return +end subroutine get_grad1_u12_squared_r1_seq + +! --- + +subroutine get_grad1_u12_withsq_r1_seq(r1, n_grid2, resx, resy, resz, res) + + BEGIN_DOC + ! + ! grad_1 u(r1,r2) + ! + ! this will be integrated numerically over r2: + ! we use grid for r1 and extra_grid for r2 + ! + ! for 99 < j1b_type < 199 + ! + ! u(r1,r2) = j12_mu(r12) x v(r1) x v(r2) + ! grad1 u(r1, r2) = [(grad1 j12_mu) v(r1) + j12_mu grad1 v(r1)] v(r2) + ! + END_DOC + + implicit none + integer, intent(in) :: n_grid2 + double precision, intent(in) :: r1(3) + double precision, intent(out) :: resx(n_grid2), resy(n_grid2), resz(n_grid2), res(n_grid2) + + integer :: jpoint + double precision :: v1b_r1 + double precision :: grad1_v1b(3) + double precision, allocatable :: v1b_r2(:) + double precision, allocatable :: u2b_r12(:) + double precision, allocatable :: gradx1_u2b(:), grady1_u2b(:), gradz1_u2b(:) + double precision, external :: j1b_nucl + + PROVIDE j1b_type + PROVIDE final_grid_points_extra + + if( (j1b_type .eq. 100) .or. & + (j1b_type .ge. 200) .and. (j1b_type .lt. 300) ) then + + call grad1_j12_mu_r1_seq(r1, n_grid2, resx, resy, resz) + do jpoint = 1, n_points_extra_final_grid + res(jpoint) = resx(jpoint) * resx(jpoint) & + + resy(jpoint) * resy(jpoint) & + + resz(jpoint) * resz(jpoint) + enddo + + elseif((j1b_type .gt. 100) .and. (j1b_type .lt. 200)) then + + allocate(v1b_r2(n_grid2)) + allocate(u2b_r12(n_grid2)) + allocate(gradx1_u2b(n_grid2)) + allocate(grady1_u2b(n_grid2)) + allocate(gradz1_u2b(n_grid2)) + + v1b_r1 = j1b_nucl(r1) + call grad1_j1b_nucl(r1, grad1_v1b) + + call j1b_nucl_r1_seq(n_grid2, v1b_r2) + call j12_mu_r1_seq(r1, n_grid2, u2b_r12) + call grad1_j12_mu_r1_seq(r1, n_grid2, gradx1_u2b, grady1_u2b, gradz1_u2b) + + do jpoint = 1, n_points_extra_final_grid + resx(jpoint) = (gradx1_u2b(jpoint) * v1b_r1 + u2b_r12(jpoint) * grad1_v1b(1)) * v1b_r2(jpoint) + resy(jpoint) = (grady1_u2b(jpoint) * v1b_r1 + u2b_r12(jpoint) * grad1_v1b(2)) * v1b_r2(jpoint) + resz(jpoint) = (gradz1_u2b(jpoint) * v1b_r1 + u2b_r12(jpoint) * grad1_v1b(3)) * v1b_r2(jpoint) + res (jpoint) = resx(jpoint) * resx(jpoint) & + + resy(jpoint) * resy(jpoint) & + + resz(jpoint) * resz(jpoint) + enddo + + deallocate(v1b_r2, u2b_r12, gradx1_u2b, grady1_u2b, gradz1_u2b) + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + + return +end subroutine get_grad1_u12_withsq_r1_seq + +! --- + double precision function j12_mu(r1, r2) include 'constants.include.F' @@ -190,18 +388,20 @@ end function j12_mu subroutine grad1_j12_mu(r1, r2, grad) BEGIN_DOC -! gradient of j(mu(r1,r2),r12) form of jastrow. -! -! if mu(r1,r2) = cst ---> j1b_type < 200 and -! -! d/dx1 j(mu,r12) = 0.5 * (1 - erf(mu *r12))/r12 * (x1 - x2) -! -! if mu(r1,r2) /= cst ---> 200 < j1b_type < 300 and -! -! d/dx1 j(mu(r1,r2),r12) = exp(-(mu(r1,r2)*r12)**2) /(2 *sqrt(pi) * mu(r1,r2)**2 ) d/dx1 mu(r1,r2) -! -! + 0.5 * (1 - erf(mu(r1,r2) *r12))/r12 * (x1 - x2) + ! + ! gradient of j(mu(r1,r2),r12) form of jastrow. + ! + ! if mu(r1,r2) = cst ---> j1b_type < 200 and + ! + ! d/dx1 j(mu,r12) = 0.5 * (1 - erf(mu *r12))/r12 * (x1 - x2) + ! + ! if mu(r1,r2) /= cst ---> 200 < j1b_type < 300 and + ! + ! d/dx1 j(mu(r1,r2),r12) = exp(-(mu(r1,r2)*r12)**2) /(2 *sqrt(pi) * mu(r1,r2)**2 ) d/dx1 mu(r1,r2) + ! + 0.5 * (1 - erf(mu(r1,r2) *r12))/r12 * (x1 - x2) + ! END_DOC + include 'constants.include.F' implicit none @@ -851,3 +1051,240 @@ subroutine f_mu_and_deriv_mu_simple(rho,alpha,mu0,beta,f_mu,d_drho_f_mu) end +! --- + +subroutine grad1_j12_mu_r1_seq(r1, n_grid2, gradx, grady, gradz) + + BEGIN_DOC + ! + ! gradient of j(mu(r1,r2),r12) form of jastrow. + ! + ! if mu(r1,r2) = cst ---> j1b_type < 200 and + ! + ! d/dx1 j(mu,r12) = 0.5 * (1 - erf(mu *r12))/r12 * (x1 - x2) + ! + ! if mu(r1,r2) /= cst ---> 200 < j1b_type < 300 and + ! + ! d/dx1 j(mu(r1,r2),r12) = exp(-(mu(r1,r2)*r12)**2) /(2 *sqrt(pi) * mu(r1,r2)**2 ) d/dx1 mu(r1,r2) + ! + 0.5 * (1 - erf(mu(r1,r2) *r12))/r12 * (x1 - x2) + ! + END_DOC + + include 'constants.include.F' + + implicit none + integer , intent(in) :: n_grid2 + double precision, intent(in) :: r1(3) + double precision, intent(out) :: gradx(n_grid2) + double precision, intent(out) :: grady(n_grid2) + double precision, intent(out) :: gradz(n_grid2) + + integer :: jpoint + double precision :: r2(3) + double precision :: dx, dy, dz, r12, tmp + + if((j1b_type .ge. 0) .and. (j1b_type .lt. 200)) then + + do jpoint = 1, n_points_extra_final_grid ! r2 + + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + + dx = r1(1) - r2(1) + dy = r1(2) - r2(2) + dz = r1(3) - r2(3) + + r12 = dsqrt(dx * dx + dy * dy + dz * dz) + if(r12 .lt. 1d-10) return + + tmp = 0.5d0 * (1.d0 - derf(mu_erf * r12)) / r12 + + gradx(jpoint) = tmp * dx + grady(jpoint) = tmp * dy + gradz(jpoint) = tmp * dz + enddo + + elseif((j1b_type .ge. 200) .and. (j1b_type .lt. 300)) then + + double precision :: mu_val, mu_tmp, mu_der(3) + + do jpoint = 1, n_points_extra_final_grid ! r2 + + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + + dx = r1(1) - r2(1) + dy = r1(2) - r2(2) + dz = r1(3) - r2(3) + r12 = dsqrt(dx * dx + dy * dy + dz * dz) + + call mu_r_val_and_grad(r1, r2, mu_val, mu_der) + mu_tmp = mu_val * r12 + tmp = inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / (mu_val * mu_val) + gradx(jpoint) = tmp * mu_der(1) + grady(jpoint) = tmp * mu_der(2) + gradz(jpoint) = tmp * mu_der(3) + + if(r12 .lt. 1d-10) return + tmp = 0.5d0 * (1.d0 - derf(mu_tmp)) / r12 + + gradx(jpoint) = gradx(jpoint) + tmp * dx + grady(jpoint) = grady(jpoint) + tmp * dy + gradz(jpoint) = gradz(jpoint) + tmp * dz + enddo + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + + return +end subroutine grad1_j12_mu_r1_seq + +! --- + +subroutine j12_mu_r1_seq(r1, n_grid2, res) + + include 'constants.include.F' + + implicit none + integer, intent(in) :: n_grid2 + double precision, intent(in) :: r1(3) + double precision, intent(out) :: res(n_grid2) + + integer :: jpoint + double precision :: r2(3) + double precision :: mu_tmp, r12 + + PROVIDE final_grid_points_extra + + if((j1b_type .ge. 0) .and. (j1b_type .lt. 200)) then + + do jpoint = 1, n_points_extra_final_grid ! r2 + + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + + r12 = dsqrt( (r1(1) - r2(1)) * (r1(1) - r2(1)) & + + (r1(2) - r2(2)) * (r1(2) - r2(2)) & + + (r1(3) - r2(3)) * (r1(3) - r2(3)) ) + mu_tmp = mu_erf * r12 + + res(jpoint) = 0.5d0 * r12 * (1.d0 - derf(mu_tmp)) - inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / mu_erf + enddo + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented for j12_mu_r1_seq' + stop + + endif + + return +end subroutine j12_mu_r1_seq + +! --- + +subroutine j1b_nucl_r1_seq(n_grid2, res) + + implicit none + integer, intent(in) :: n_grid2 + double precision, intent(out) :: res(n_grid2) + + double precision :: r(3) + integer :: i, jpoint + double precision :: a, d, e, x, y, z + + if((j1b_type .eq. 2) .or. (j1b_type .eq. 102)) then + + res = 1.d0 + + do jpoint = 1, n_points_extra_final_grid ! r2 + r(1) = final_grid_points_extra(1,jpoint) + r(2) = final_grid_points_extra(2,jpoint) + r(3) = final_grid_points_extra(3,jpoint) + + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + + res(jpoint) -= dexp(-a*dsqrt(d)) + enddo + enddo + + elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then + + res = 1.d0 + + do jpoint = 1, n_points_extra_final_grid ! r2 + r(1) = final_grid_points_extra(1,jpoint) + r(2) = final_grid_points_extra(2,jpoint) + r(3) = final_grid_points_extra(3,jpoint) + + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + e = 1.d0 - dexp(-a*d) + + res(jpoint) *= e + enddo + enddo + + elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then + + res = 1.d0 + + do jpoint = 1, n_points_extra_final_grid ! r2 + r(1) = final_grid_points_extra(1,jpoint) + r(2) = final_grid_points_extra(2,jpoint) + r(3) = final_grid_points_extra(3,jpoint) + + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + res(jpoint) -= j1b_pen_coef(i) * dexp(-a*d) + enddo + enddo + + elseif((j1b_type .eq. 5) .or. (j1b_type .eq. 105)) then + + res = 1.d0 + + do jpoint = 1, n_points_extra_final_grid ! r2 + r(1) = final_grid_points_extra(1,jpoint) + r(2) = final_grid_points_extra(2,jpoint) + r(3) = final_grid_points_extra(3,jpoint) + + do i = 1, nucl_num + a = j1b_pen(i) + x = r(1) - nucl_coord(i,1) + y = r(2) - nucl_coord(i,2) + z = r(3) - nucl_coord(i,3) + d = x*x + y*y + z*z + res(jpoint) -= dexp(-a*d*d) + enddo + enddo + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented for j1b_nucl_r1_seq' + stop + + endif + + return +end subroutine j1b_nucl_r1_seq + +! --- + diff --git a/src/non_h_ints_mu/new_grad_tc.irp.f b/src/non_h_ints_mu/new_grad_tc.irp.f index dc76431d..ab3cc3be 100644 --- a/src/non_h_ints_mu/new_grad_tc.irp.f +++ b/src/non_h_ints_mu/new_grad_tc.irp.f @@ -149,22 +149,6 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, deallocate(b_mat) call sum_A_At(tc_grad_and_lapl_ao(1,1,1,1), ao_num*ao_num) - ! !$OMP PARALLEL & - ! !$OMP DEFAULT (NONE) & - ! !$OMP PRIVATE (i, j, k, l) & - ! !$OMP SHARED (ac_mat, tc_grad_and_lapl_ao, ao_num) - ! !$OMP DO SCHEDULE (static) - ! do j = 1, ao_num - ! do l = 1, ao_num - ! do i = 1, ao_num - ! do k = 1, ao_num - ! tc_grad_and_lapl_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) - ! enddo - ! enddo - ! enddo - ! enddo - ! !$OMP END DO - ! !$OMP END PARALLEL endif diff --git a/src/non_h_ints_mu/tc_integ.irp.f b/src/non_h_ints_mu/tc_integ.irp.f index d569b25c..dd21a67f 100644 --- a/src/non_h_ints_mu/tc_integ.irp.f +++ b/src/non_h_ints_mu/tc_integ.irp.f @@ -1,10 +1,11 @@ -! --- - BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_final_grid, 3)] BEGIN_DOC ! + ! TODO + ! combine with int2_grad1_u12_square_ao to avoid repeated calculation ? + ! ! int2_grad1_u12_ao(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) ! ! where r1 = r(ipoint) @@ -106,7 +107,6 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f elseif(j1b_type .ge. 100) then PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra - PROVIDE grad1_u12_num double precision, allocatable :: tmp(:,:,:) allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) @@ -126,39 +126,71 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f !$OMP END DO !$OMP END PARALLEL - int2_grad1_u12_ao = 0.d0 - do m = 1, 3 - !call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, +1.d0 & - ! this work also because of the symmetry in K(1,2) and sign compensation in L(1,2,3) - call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, -1.d0 & - , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_num(1,1,m), n_points_extra_final_grid & - , 0.d0, int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num) - enddo + integer :: n_blocks, n_rest, n_pass + integer :: i_blocks, i_rest, i_pass, ii + double precision, allocatable :: tmp_grad1_u12(:,:,:) + + ! n_points_final_grid = n_blocks * n_pass + n_rest + n_blocks = 8 + n_rest = int(mod(n_points_final_grid, n_blocks)) + n_pass = int((n_points_final_grid - n_rest) / n_blocks) + + if(n_pass .le. 1) then + print*, ' blocks are to large or grid is very small !' + stop + endif + + allocate(tmp_grad1_u12(n_points_extra_final_grid,n_blocks,3)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_pass, i_blocks, ipoint, ii, m, tmp_grad1_u12) & + !$OMP SHARED (n_pass, n_blocks, n_points_extra_final_grid, ao_num, & + !$OMP final_grid_points, tmp, int2_grad1_u12_ao) + !$OMP DO + do i_pass = 1, n_pass + ii = (i_pass-1)*n_blocks + 1 + + do i_blocks = 1, n_blocks + ipoint = ii - 1 + i_blocks ! r1 + call get_grad1_u12_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12(1,i_blocks,1) & + , tmp_grad1_u12(1,i_blocks,2) & + , tmp_grad1_u12(1,i_blocks,3) ) + enddo - !! these dgemm are equivalent to - !!$OMP PARALLEL & - !!$OMP DEFAULT (NONE) & - !!$OMP PRIVATE (j, i, ipoint, jpoint, w) & - !!$OMP SHARED (int2_grad1_u12_ao, ao_num, n_points_final_grid, & - !!$OMP n_points_extra_final_grid, final_weight_at_r_vector_extra, & - !!$OMP aos_in_r_array_extra_transp, grad1_u12_num, tmp) - !!$OMP DO SCHEDULE (static) - !do ipoint = 1, n_points_final_grid - ! do j = 1, ao_num - ! do i = 1, ao_num - ! do jpoint = 1, n_points_extra_final_grid - ! w = -tmp(jpoint,i,j) - ! !w = tmp(jpoint,i,j) this work also because of the symmetry in K(1,2) - ! ! and sign compensation in L(1,2,3) - ! int2_grad1_u12_ao(i,j,ipoint,1) += w * grad1_u12_num(jpoint,ipoint,1) - ! int2_grad1_u12_ao(i,j,ipoint,2) += w * grad1_u12_num(jpoint,ipoint,2) - ! int2_grad1_u12_ao(i,j,ipoint,3) += w * grad1_u12_num(jpoint,ipoint,3) - ! enddo - ! enddo - ! enddo - !enddo - !!$OMP END DO - !!$OMP END PARALLEL + do m = 1, 3 + call dgemm( "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, 1.d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmp_grad1_u12) + + ! TODO + ! OPENMP + if(n_rest .ne. 0) then + + allocate(tmp_grad1_u12(n_points_extra_final_grid,n_rest,3)) + + ii = n_pass*n_blocks + 1 + do i_rest = 1, n_rest + ipoint = ii - 1 + i_rest ! r1 + call get_grad1_u12_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12(1,i_rest,1) & + , tmp_grad1_u12(1,i_rest,2) & + , tmp_grad1_u12(1,i_rest,3) ) + enddo + + do m = 1, 3 + call dgemm( "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, 1.d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num) + enddo + + deallocate(tmp_grad1_u12) + endif deallocate(tmp) else @@ -185,6 +217,72 @@ END_PROVIDER ! --- +BEGIN_PROVIDER [double precision, int2_grad1_u12_ao_num_1shot, (ao_num, ao_num, n_points_final_grid, 3)] + + BEGIN_DOC + ! + ! int2_grad1_u12_ao_num_1shot(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) + ! + END_DOC + + implicit none + integer :: ipoint, i, j, m, jpoint + double precision :: time0, time1 + double precision :: x, y, z, w, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2 + + print*, ' providing int2_grad1_u12_ao_num_1shot ...' + call wall_time(time0) + + PROVIDE j1b_type + + if(j1b_type .ge. 100) then + + PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra + PROVIDE grad1_u12_num + + double precision, allocatable :: tmp(:,:,:) + allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) + tmp = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, i, jpoint) & + !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) + !$OMP DO SCHEDULE (static) + do j = 1, ao_num + do i = 1, ao_num + do jpoint = 1, n_points_extra_final_grid + tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + do m = 1, 3 + !call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, -1.d0 & + ! this work also because of the symmetry in K(1,2) and sign compensation in L(1,2,3) + call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, +1.d0 & + , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_num(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao_num_1shot(1,1,1,m), ao_num*ao_num) + enddo + + deallocate(tmp) + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + + call wall_time(time1) + print*, ' wall time for int2_grad1_u12_ao_num_1shot =', time1-time0 + call print_memory_usage() + +END_PROVIDER + +! --- + BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_points_final_grid)] BEGIN_DOC @@ -275,16 +373,14 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p elseif(j1b_type .ge. 100) then PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra - PROVIDE grad1_u12_squared_num double precision, allocatable :: tmp(:,:,:) allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) - tmp = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (j, i, jpoint) & !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) - !$OMP DO SCHEDULE (static) + !$OMP DO COLLAPSE(2) do j = 1, ao_num do i = 1, ao_num do jpoint = 1, n_points_extra_final_grid @@ -295,31 +391,63 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p !$OMP END DO !$OMP END PARALLEL - int2_grad1_u12_square_ao = 0.d0 - call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, -0.5d0 & - , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_squared_num(1,1), n_points_extra_final_grid & - , 0.d0, int2_grad1_u12_square_ao(1,1,1), ao_num*ao_num) + integer :: n_blocks, n_rest, n_pass + integer :: i_blocks, i_rest, i_pass, ii + double precision, allocatable :: tmp_grad1_u12_squared(:,:) - !! this dgemm is equivalen to - !!$OMP PARALLEL & - !!$OMP DEFAULT (NONE) & - !!$OMP PRIVATE (i, j, ipoint, jpoint, w) & - !!$OMP SHARED (int2_grad1_u12_square_ao, ao_num, n_points_final_grid, & - !!$OMP n_points_extra_final_grid, final_weight_at_r_vector_extra, & - !!$OMP aos_in_r_array_extra_transp, grad1_u12_squared_num, tmp) - !!$OMP DO SCHEDULE (static) - !do ipoint = 1, n_points_final_grid - ! do j = 1, ao_num - ! do i = 1, ao_num - ! do jpoint = 1, n_points_extra_final_grid - ! w = -0.5d0 * tmp(jpoint,i,j) - ! int2_grad1_u12_square_ao(i,j,ipoint) += w * grad1_u12_squared_num(jpoint,ipoint) - ! enddo - ! enddo - ! enddo - !enddo - !!$OMP END DO - !!$OMP END PARALLEL + ! n_points_final_grid = n_blocks * n_pass + n_rest + n_blocks = 16 + n_rest = int(mod(n_points_final_grid, n_blocks)) + n_pass = int((n_points_final_grid - n_rest) / n_blocks) + + if(n_pass .le. 1) then + print*, ' blocks are to large or grid is very small !' + stop + endif + + allocate(tmp_grad1_u12_squared(n_points_extra_final_grid,n_blocks)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_pass, i_blocks, ipoint, ii, tmp_grad1_u12_squared) & + !$OMP SHARED (n_pass, n_blocks, n_points_extra_final_grid, ao_num, & + !$OMP final_grid_points, tmp, int2_grad1_u12_square_ao) + !$OMP DO + do i_pass = 1, n_pass + ii = (i_pass-1)*n_blocks + 1 + + do i_blocks = 1, n_blocks + ipoint = ii - 1 + i_blocks ! r1 + call get_grad1_u12_squared_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12_squared(1,i_blocks)) + enddo + + call dgemm( "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, -0.5d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12_squared(1,1), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_square_ao(1,1,ii), ao_num*ao_num) + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmp_grad1_u12_squared) + + ! TODO + ! OPENMP + if(n_rest .ne. 0) then + + allocate(tmp_grad1_u12_squared(n_points_extra_final_grid,n_rest)) + + ii = n_pass*n_blocks + 1 + do i_rest = 1, n_rest + ipoint = ii - 1 + i_rest ! r1 + call get_grad1_u12_squared_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12_squared(1,i_rest)) + enddo + + call dgemm( "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, -0.5d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12_squared(1,1), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_square_ao(1,1,ii), ao_num*ao_num) + + deallocate(tmp_grad1_u12_squared) + endif deallocate(tmp) @@ -338,3 +466,65 @@ END_PROVIDER ! --- +BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao_num_1shot, (ao_num, ao_num, n_points_final_grid)] + + BEGIN_DOC + ! + ! int2_grad1_u12_square_ao_num_1shot = -(1/2) x int dr2 chi_l(r2) chi_j(r2) [grad_1 u(r1,r2)]^2 + ! + END_DOC + + implicit none + integer :: i, j, jpoint + double precision :: time0, time1 + + print*, ' providing int2_grad1_u12_square_ao_num_1shot ...' + call wall_time(time0) + + PROVIDE j1b_type + + if(j1b_type .ge. 100) then + + PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra + PROVIDE grad1_u12_squared_num + + double precision, allocatable :: tmp(:,:,:) + allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, i, jpoint) & + !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) + !$OMP DO COLLAPSE(2) + do j = 1, ao_num + do i = 1, ao_num + do jpoint = 1, n_points_extra_final_grid + tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, -0.5d0 & + , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_squared_num(1,1), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_square_ao_num_1shot(1,1,1), ao_num*ao_num) + + FREE grad1_u12_squared_num + deallocate(tmp) + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + + call wall_time(time1) + print*, ' wall time for int2_grad1_u12_square_ao_num_1shot =', time1-time0 + call print_memory_usage() + +END_PROVIDER + +! --- + diff --git a/src/non_h_ints_mu/test_non_h_ints.irp.f b/src/non_h_ints_mu/test_non_h_ints.irp.f index e7718a40..7d84e73b 100644 --- a/src/non_h_ints_mu/test_non_h_ints.irp.f +++ b/src/non_h_ints_mu/test_non_h_ints.irp.f @@ -11,12 +11,24 @@ program test_non_h my_n_pt_a_grid = tc_grid1_a touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + if(j1b_type .ge. 100) then + my_extra_grid_becke = .True. + PROVIDE tc_grid2_a tc_grid2_r + my_n_pt_r_extra_grid = tc_grid2_r + my_n_pt_a_extra_grid = tc_grid2_a + touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + endif + + !call routine_grad_squared() !call routine_fit() !call test_ipp() - call test_v_ij_u_cst_mu_j1b_an() + !call test_v_ij_u_cst_mu_j1b_an() + + call test_int2_grad1_u12_square_ao() + call test_int2_grad1_u12_ao() end ! --- @@ -583,7 +595,91 @@ subroutine test_v_ij_u_cst_mu_j1b_an() print*, ' accuracy(%) = ', 100.d0 * accu / norm return -end subroutine test_v_ij_u_cst_mu_j1b_an() +end subroutine test_v_ij_u_cst_mu_j1b_an +! --- +subroutine test_int2_grad1_u12_square_ao() + + implicit none + integer :: i, j, ipoint + double precision :: I_old, I_new + double precision :: norm, accu, thr, diff + + PROVIDE int2_grad1_u12_square_ao + PROVIDE int2_grad1_u12_square_ao_num_1shot + + thr = 1d-8 + norm = 0.d0 + accu = 0.d0 + do ipoint = 1, n_points_final_grid + do i = 1, ao_num + do j = 1, ao_num + + I_old = int2_grad1_u12_square_ao_num_1shot(j,i,ipoint) + I_new = int2_grad1_u12_square_ao (j,i,ipoint) + + diff = dabs(I_new-I_old) + if(diff .gt. thr) then + print *, ' problem on:', j, i, ipoint + print *, ' old value :', I_old + print *, ' new value :', I_new + stop + endif + + accu += diff + norm += dabs(I_old) + enddo + enddo + enddo + + print*, ' accuracy(%) = ', 100.d0 * accu / norm + + return +end subroutine test_int2_grad1_u12_square_ao + +! --- + +subroutine test_int2_grad1_u12_ao() + + implicit none + integer :: i, j, ipoint, m + double precision :: I_old, I_new + double precision :: norm, accu, thr, diff + + PROVIDE int2_grad1_u12_ao + PROVIDE int2_grad1_u12_ao_num_1shot + + thr = 1d-8 + norm = 0.d0 + accu = 0.d0 + do ipoint = 1, n_points_final_grid + do i = 1, ao_num + do j = 1, ao_num + + do m = 1, 3 + I_old = int2_grad1_u12_ao_num_1shot(j,i,ipoint,m) + I_new = int2_grad1_u12_ao (j,i,ipoint,m) + + diff = dabs(I_new-I_old) + if(diff .gt. thr) then + print *, ' problem on:', j, i, ipoint, m + print *, ' old value :', I_old + print *, ' new value :', I_new + stop + endif + + accu += diff + norm += dabs(I_old) + enddo + enddo + enddo + enddo + + print*, ' accuracy(%) = ', 100.d0 * accu / norm + + return +end subroutine test_int2_grad1_u12_ao + +! --- diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f index 58103643..254defe1 100644 --- a/src/non_h_ints_mu/total_tc_int.irp.f +++ b/src/non_h_ints_mu/total_tc_int.irp.f @@ -1,7 +1,4 @@ -! TODO -! remove ao_two_e_coul and use map directly - ! --- BEGIN_PROVIDER [double precision, ao_vartc_int_chemist, (ao_num, ao_num, ao_num, ao_num)] @@ -159,10 +156,8 @@ BEGIN_PROVIDER [double precision, ao_two_e_coul, (ao_num, ao_num, ao_num, ao_num ! END_DOC - integer :: i, j, k, l - double precision :: integral - double precision, allocatable :: tmp(:) - double precision, external :: get_ao_two_e_integral + integer :: i, j, k, l + double precision, external :: get_ao_two_e_integral PROVIDE ao_integrals_map @@ -183,25 +178,6 @@ BEGIN_PROVIDER [double precision, ao_two_e_coul, (ao_num, ao_num, ao_num, ao_num !$OMP END DO !$OMP END PARALLEL - -! TODO -! allocate(tmp(ao_num)) -! -! !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,l,j,k,tmp) -! do j = 1, ao_num -! do l = 1, ao_num -! do i = 1, ao_num -! call get_ao_two_e_integrals(i, l, l, ao_num, tmp(1)) -! do k = 1, ao_num -! ao_two_e_coul(k,i,l,j) = tmp(k) -! enddo -! enddo -! enddo -! enddo -! !$OMP END PARALLEL DO -! -! deallocate(tmp) - END_PROVIDER ! --- diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f index e4c38741..9bcbb13b 100644 --- a/src/tc_scf/tc_scf.irp.f +++ b/src/tc_scf/tc_scf.irp.f @@ -13,11 +13,9 @@ program tc_scf print *, ' starting ...' my_grid_becke = .True. - PROVIDE tc_grid1_a tc_grid1_r my_n_pt_r_grid = tc_grid1_r my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid PROVIDE mu_erf @@ -26,6 +24,14 @@ program tc_scf print *, ' j1b_type = ', j1b_type print *, j1b_pen + if(j1b_type .ge. 100) then + my_extra_grid_becke = .True. + PROVIDE tc_grid2_a tc_grid2_r + my_n_pt_r_extra_grid = tc_grid2_r + my_n_pt_a_extra_grid = tc_grid2_a + touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + endif + !call create_guess() !call orthonormalize_mos() From 1f56b5d0f4eb8f6f68ef83ef6744a26807514807 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Tue, 5 Sep 2023 11:52:08 +0200 Subject: [PATCH 275/337] num integ grad & grad squared --- src/becke_numerical_grid/EZFIO.cfg | 4 + src/becke_numerical_grid/extra_grid.irp.f | 4 +- .../extra_grid_vector.irp.f | 34 +- src/becke_numerical_grid/grid_becke.irp.f | 2 +- src/non_h_ints_mu/debug_fit.irp.f | 77 +- src/non_h_ints_mu/jast_deriv.irp.f | 1165 ----------------- src/non_h_ints_mu/jast_deriv_utils.irp.f | 700 ++++++++++ src/non_h_ints_mu/jast_deriv_utils_vect.irp.f | 332 +++++ src/non_h_ints_mu/tc_integ.irp.f | 530 -------- src/non_h_ints_mu/tc_integ_an.irp.f | 244 ++++ src/non_h_ints_mu/tc_integ_num.irp.f | 190 +++ src/non_h_ints_mu/test_non_h_ints.irp.f | 6 +- src/tc_keywords/EZFIO.cfg | 11 + 13 files changed, 1584 insertions(+), 1715 deletions(-) create mode 100644 src/non_h_ints_mu/jast_deriv_utils.irp.f create mode 100644 src/non_h_ints_mu/jast_deriv_utils_vect.irp.f delete mode 100644 src/non_h_ints_mu/tc_integ.irp.f create mode 100644 src/non_h_ints_mu/tc_integ_an.irp.f create mode 100644 src/non_h_ints_mu/tc_integ_num.irp.f diff --git a/src/becke_numerical_grid/EZFIO.cfg b/src/becke_numerical_grid/EZFIO.cfg index 7861f074..e660fd6d 100644 --- a/src/becke_numerical_grid/EZFIO.cfg +++ b/src/becke_numerical_grid/EZFIO.cfg @@ -33,6 +33,10 @@ doc: Number of angular grid points given from input. Warning, this number cannot interface: ezfio,provider,ocaml default: 1202 +[n_points_extra_final_grid] +type: integer +doc: Total number of extra_grid points +interface: ezfio [extra_grid_type_sgn] type: integer diff --git a/src/becke_numerical_grid/extra_grid.irp.f b/src/becke_numerical_grid/extra_grid.irp.f index 9bd24f22..7df4dd6d 100644 --- a/src/becke_numerical_grid/extra_grid.irp.f +++ b/src/becke_numerical_grid/extra_grid.irp.f @@ -14,7 +14,7 @@ implicit none - if(.not.my_extra_grid_becke)then + if(.not. my_extra_grid_becke) then select case (extra_grid_type_sgn) case(0) n_points_extra_radial_grid = 23 @@ -33,7 +33,7 @@ stop end select else - n_points_extra_radial_grid = my_n_pt_r_extra_grid + n_points_extra_radial_grid = my_n_pt_r_extra_grid n_points_extra_integration_angular = my_n_pt_a_extra_grid endif diff --git a/src/becke_numerical_grid/extra_grid_vector.irp.f b/src/becke_numerical_grid/extra_grid_vector.irp.f index e4fc03b5..ae167282 100644 --- a/src/becke_numerical_grid/extra_grid_vector.irp.f +++ b/src/becke_numerical_grid/extra_grid_vector.irp.f @@ -23,29 +23,33 @@ BEGIN_PROVIDER [integer, n_points_extra_final_grid] enddo enddo - print*,'n_points_extra_final_grid = ',n_points_extra_final_grid - print*,'n max point = ',n_points_extra_integration_angular*(n_points_extra_radial_grid*nucl_num - 1) -! call ezfio_set_becke_numerical_grid_n_points_extra_final_grid(n_points_extra_final_grid) + print*, ' n_points_extra_final_grid = ', n_points_extra_final_grid + print*, ' n max point = ', n_points_extra_integration_angular*(n_points_extra_radial_grid*nucl_num - 1) + call ezfio_set_becke_numerical_grid_n_points_extra_final_grid(n_points_extra_final_grid) + END_PROVIDER ! --- BEGIN_PROVIDER [double precision, final_grid_points_extra, (3,n_points_extra_final_grid)] -&BEGIN_PROVIDER [double precision, final_weight_at_r_vector_extra, (n_points_extra_final_grid) ] -&BEGIN_PROVIDER [integer, index_final_points_extra, (3,n_points_extra_final_grid) ] -&BEGIN_PROVIDER [integer, index_final_points_extra_reverse, (n_points_extra_integration_angular,n_points_extra_radial_grid,nucl_num) ] - implicit none +&BEGIN_PROVIDER [double precision, final_weight_at_r_vector_extra, (n_points_extra_final_grid)] +&BEGIN_PROVIDER [integer, index_final_points_extra, (3,n_points_extra_final_grid)] +&BEGIN_PROVIDER [integer, index_final_points_extra_reverse, (n_points_extra_integration_angular,n_points_extra_radial_grid,nucl_num)] + BEGIN_DOC -! final_grid_points_extra(1:3,j) = (/ x, y, z /) of the jth grid point -! -! final_weight_at_r_vector_extra(i) = Total weight function of the ith grid point which contains the Lebedev, Voronoi and radial weights contributions -! -! index_final_points_extra(1:3,i) = gives the angular, radial and atomic indices associated to the ith grid point -! -! index_final_points_extra_reverse(i,j,k) = index of the grid point having i as angular, j as radial and l as atomic indices + ! final_grid_points_extra(1:3,j) = (/ x, y, z /) of the jth grid point + ! + ! final_weight_at_r_vector_extra(i) = Total weight function of the ith grid point which contains the Lebedev, Voronoi and radial weights contributions + ! + ! index_final_points_extra(1:3,i) = gives the angular, radial and atomic indices associated to the ith grid point + ! + ! index_final_points_extra_reverse(i,j,k) = index of the grid point having i as angular, j as radial and l as atomic indices END_DOC + + implicit none integer :: i,j,k,l,i_count double precision :: r(3) + i_count = 0 do j = 1, nucl_num do i = 1, n_points_extra_radial_grid -1 @@ -67,3 +71,5 @@ END_PROVIDER enddo END_PROVIDER + + diff --git a/src/becke_numerical_grid/grid_becke.irp.f b/src/becke_numerical_grid/grid_becke.irp.f index 21b9f98d..91fdc563 100644 --- a/src/becke_numerical_grid/grid_becke.irp.f +++ b/src/becke_numerical_grid/grid_becke.irp.f @@ -14,7 +14,7 @@ implicit none - if(.not.my_grid_becke)then + if(.not. my_grid_becke) then select case (grid_type_sgn) case(0) n_points_radial_grid = 23 diff --git a/src/non_h_ints_mu/debug_fit.irp.f b/src/non_h_ints_mu/debug_fit.irp.f index 05d2db68..d3152836 100644 --- a/src/non_h_ints_mu/debug_fit.irp.f +++ b/src/non_h_ints_mu/debug_fit.irp.f @@ -13,17 +13,27 @@ program debug_fit PROVIDE mu_erf j1b_pen + if(j1b_type .ge. 100) then + my_extra_grid_becke = .True. + PROVIDE tc_grid2_a tc_grid2_r + my_n_pt_r_extra_grid = tc_grid2_r + my_n_pt_a_extra_grid = tc_grid2_a + touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + endif + !call test_j1b_nucl() !call test_grad_j1b_nucl() !call test_lapl_j1b_nucl() !call test_list_b2() - call test_list_b3() + !call test_list_b3() !call test_fit_u() !call test_fit_u2() !call test_fit_ugradu() + call test_grad1_u12_withsq_num() + end ! --- @@ -643,4 +653,69 @@ end subroutine test_fit_u2 ! --- +subroutine test_grad1_u12_withsq_num() + + implicit none + integer :: ipoint, jpoint, m + double precision :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz + double precision, allocatable :: tmp_grad1_u12_squared(:,:), tmp_grad1_u12(:,:,:) + + print*, ' test_grad1_u12_withsq_num ...' + + PROVIDE grad1_u12_num grad1_u12_squared_num + + allocate(tmp_grad1_u12_squared(n_points_extra_final_grid,n_points_final_grid)) + allocate(tmp_grad1_u12(n_points_extra_final_grid,n_points_final_grid,3)) + + eps_ij = 1d-7 + acc_tot = 0.d0 + normalz = 0.d0 + + do ipoint = 1, n_points_final_grid + + call get_grad1_u12_withsq_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12(1,ipoint,1) & + , tmp_grad1_u12(1,ipoint,2) & + , tmp_grad1_u12(1,ipoint,3) & + , tmp_grad1_u12_squared(1,ipoint)) + do jpoint = 1, n_points_extra_final_grid + + i_exc = grad1_u12_squared_num(jpoint,ipoint) + i_num = tmp_grad1_u12_squared(jpoint,ipoint) + acc_ij = dabs(i_exc - i_num) + if(acc_ij .gt. eps_ij) then + print *, ' problem in grad1_u12_squared_num on', ipoint, jpoint + print *, ' analyt = ', i_exc + print *, ' numeri = ', i_num + print *, ' diff = ', acc_ij + stop + endif + acc_tot += acc_ij + normalz += dabs(i_num) + + do m = 1, 3 + i_exc = grad1_u12_num(jpoint,ipoint,m) + i_num = tmp_grad1_u12(jpoint,ipoint,m) + acc_ij = dabs(i_exc - i_num) + if(acc_ij .gt. eps_ij) then + print *, ' problem in grad1_u12_num on', ipoint, jpoint, m + print *, ' analyt = ', i_exc + print *, ' numeri = ', i_num + print *, ' diff = ', acc_ij + stop + endif + acc_tot += acc_ij + normalz += dabs(i_num) + enddo + enddo + enddo + + !print*, ' acc_tot = ', acc_tot + !print*, ' normalz = ', normalz + print*, ' accuracy (%) = ', 100.d0 * acc_tot / normalz + + return +end subroutine test_grad1_u12_withsq_num + +! --- + diff --git a/src/non_h_ints_mu/jast_deriv.irp.f b/src/non_h_ints_mu/jast_deriv.irp.f index 082412f9..ee01886c 100644 --- a/src/non_h_ints_mu/jast_deriv.irp.f +++ b/src/non_h_ints_mu/jast_deriv.irp.f @@ -123,1168 +123,3 @@ END_PROVIDER ! --- -subroutine get_grad1_u12_r1_seq(r1, n_grid2, resx, resy, resz) - - BEGIN_DOC - ! - ! grad_1 u(r1,r2) - ! - ! this will be integrated numerically over r2: - ! we use grid for r1 and extra_grid for r2 - ! - ! for 99 < j1b_type < 199 - ! - ! u(r1,r2) = j12_mu(r12) x v(r1) x v(r2) - ! grad1 u(r1, r2) = [(grad1 j12_mu) v(r1) + j12_mu grad1 v(r1)] v(r2) - ! - END_DOC - - implicit none - integer, intent(in) :: n_grid2 - double precision, intent(in) :: r1(3) - double precision, intent(out) :: resx(n_grid2), resy(n_grid2), resz(n_grid2) - - double precision :: v1b_r1 - double precision :: grad1_v1b(3) - double precision, allocatable :: v1b_r2(:) - double precision, allocatable :: u2b_r12(:) - double precision, allocatable :: gradx1_u2b(:), grady1_u2b(:), gradz1_u2b(:) - double precision, external :: j1b_nucl - - PROVIDE j1b_type - PROVIDE final_grid_points_extra - - if( (j1b_type .eq. 100) .or. & - (j1b_type .ge. 200) .and. (j1b_type .lt. 300) ) then - - call grad1_j12_mu_r1_seq(r1, n_grid2, resx, resy, resz) - - elseif((j1b_type .gt. 100) .and. (j1b_type .lt. 200)) then - - allocate(v1b_r2(n_grid2)) - allocate(u2b_r12(n_grid2)) - allocate(gradx1_u2b(n_grid2)) - allocate(grady1_u2b(n_grid2)) - allocate(gradz1_u2b(n_grid2)) - - v1b_r1 = j1b_nucl(r1) - call grad1_j1b_nucl(r1, grad1_v1b) - - call j1b_nucl_r1_seq(n_grid2, v1b_r2) - call j12_mu_r1_seq(r1, n_grid2, u2b_r12) - call grad1_j12_mu_r1_seq(r1, n_grid2, gradx1_u2b, grady1_u2b, gradz1_u2b) - - resx(:) = (gradx1_u2b(:) * v1b_r1 + u2b_r12(:) * grad1_v1b(1)) * v1b_r2(:) - resy(:) = (grady1_u2b(:) * v1b_r1 + u2b_r12(:) * grad1_v1b(2)) * v1b_r2(:) - resz(:) = (gradz1_u2b(:) * v1b_r1 + u2b_r12(:) * grad1_v1b(3)) * v1b_r2(:) - - deallocate(v1b_r2, u2b_r12, gradx1_u2b, grady1_u2b, gradz1_u2b) - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented yet' - stop - - endif - - return -end subroutine get_grad1_u12_r1_seq - -! --- - -subroutine get_grad1_u12_squared_r1_seq(r1, n_grid2, res) - - BEGIN_DOC - ! - ! grad_1 u(r1,r2) - ! - ! this will be integrated numerically over r2: - ! we use grid for r1 and extra_grid for r2 - ! - ! for 99 < j1b_type < 199 - ! - ! u(r1,r2) = j12_mu(r12) x v(r1) x v(r2) - ! grad1 u(r1, r2) = [(grad1 j12_mu) v(r1) + j12_mu grad1 v(r1)] v(r2) - ! - END_DOC - - implicit none - integer, intent(in) :: n_grid2 - double precision, intent(in) :: r1(3) - double precision, intent(out) :: res(n_grid2) - - - integer :: jpoint - double precision :: r2(3) - double precision :: v1b_r1, v1b_r2, u2b_r12 - double precision :: grad1_v1b(3), grad1_u2b(3) - double precision :: dx, dy, dz - double precision, external :: j12_mu, j1b_nucl - - PROVIDE j1b_type - PROVIDE final_grid_points_extra - - if( (j1b_type .eq. 100) .or. & - (j1b_type .ge. 200) .and. (j1b_type .lt. 300) ) then - - do jpoint = 1, n_points_extra_final_grid ! r2 - - r2(1) = final_grid_points_extra(1,jpoint) - r2(2) = final_grid_points_extra(2,jpoint) - r2(3) = final_grid_points_extra(3,jpoint) - - call grad1_j12_mu(r1, r2, grad1_u2b) - - dx = grad1_u2b(1) - dy = grad1_u2b(2) - dz = grad1_u2b(3) - - res(jpoint) = dx*dx + dy*dy + dz*dz - enddo - - elseif((j1b_type .gt. 100) .and. (j1b_type .lt. 200)) then - - v1b_r1 = j1b_nucl(r1) - call grad1_j1b_nucl(r1, grad1_v1b) - - do jpoint = 1, n_points_extra_final_grid ! r2 - - r2(1) = final_grid_points_extra(1,jpoint) - r2(2) = final_grid_points_extra(2,jpoint) - r2(3) = final_grid_points_extra(3,jpoint) - - v1b_r2 = j1b_nucl(r2) - u2b_r12 = j12_mu(r1, r2) - call grad1_j12_mu(r1, r2, grad1_u2b) - - dx = (grad1_u2b(1) * v1b_r1 + u2b_r12 * grad1_v1b(1)) * v1b_r2 - dy = (grad1_u2b(2) * v1b_r1 + u2b_r12 * grad1_v1b(2)) * v1b_r2 - dz = (grad1_u2b(3) * v1b_r1 + u2b_r12 * grad1_v1b(3)) * v1b_r2 - - res(jpoint) = dx*dx + dy*dy + dz*dz - enddo - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented yet' - stop - - endif - - return -end subroutine get_grad1_u12_squared_r1_seq - -! --- - -subroutine get_grad1_u12_withsq_r1_seq(r1, n_grid2, resx, resy, resz, res) - - BEGIN_DOC - ! - ! grad_1 u(r1,r2) - ! - ! this will be integrated numerically over r2: - ! we use grid for r1 and extra_grid for r2 - ! - ! for 99 < j1b_type < 199 - ! - ! u(r1,r2) = j12_mu(r12) x v(r1) x v(r2) - ! grad1 u(r1, r2) = [(grad1 j12_mu) v(r1) + j12_mu grad1 v(r1)] v(r2) - ! - END_DOC - - implicit none - integer, intent(in) :: n_grid2 - double precision, intent(in) :: r1(3) - double precision, intent(out) :: resx(n_grid2), resy(n_grid2), resz(n_grid2), res(n_grid2) - - integer :: jpoint - double precision :: v1b_r1 - double precision :: grad1_v1b(3) - double precision, allocatable :: v1b_r2(:) - double precision, allocatable :: u2b_r12(:) - double precision, allocatable :: gradx1_u2b(:), grady1_u2b(:), gradz1_u2b(:) - double precision, external :: j1b_nucl - - PROVIDE j1b_type - PROVIDE final_grid_points_extra - - if( (j1b_type .eq. 100) .or. & - (j1b_type .ge. 200) .and. (j1b_type .lt. 300) ) then - - call grad1_j12_mu_r1_seq(r1, n_grid2, resx, resy, resz) - do jpoint = 1, n_points_extra_final_grid - res(jpoint) = resx(jpoint) * resx(jpoint) & - + resy(jpoint) * resy(jpoint) & - + resz(jpoint) * resz(jpoint) - enddo - - elseif((j1b_type .gt. 100) .and. (j1b_type .lt. 200)) then - - allocate(v1b_r2(n_grid2)) - allocate(u2b_r12(n_grid2)) - allocate(gradx1_u2b(n_grid2)) - allocate(grady1_u2b(n_grid2)) - allocate(gradz1_u2b(n_grid2)) - - v1b_r1 = j1b_nucl(r1) - call grad1_j1b_nucl(r1, grad1_v1b) - - call j1b_nucl_r1_seq(n_grid2, v1b_r2) - call j12_mu_r1_seq(r1, n_grid2, u2b_r12) - call grad1_j12_mu_r1_seq(r1, n_grid2, gradx1_u2b, grady1_u2b, gradz1_u2b) - - do jpoint = 1, n_points_extra_final_grid - resx(jpoint) = (gradx1_u2b(jpoint) * v1b_r1 + u2b_r12(jpoint) * grad1_v1b(1)) * v1b_r2(jpoint) - resy(jpoint) = (grady1_u2b(jpoint) * v1b_r1 + u2b_r12(jpoint) * grad1_v1b(2)) * v1b_r2(jpoint) - resz(jpoint) = (gradz1_u2b(jpoint) * v1b_r1 + u2b_r12(jpoint) * grad1_v1b(3)) * v1b_r2(jpoint) - res (jpoint) = resx(jpoint) * resx(jpoint) & - + resy(jpoint) * resy(jpoint) & - + resz(jpoint) * resz(jpoint) - enddo - - deallocate(v1b_r2, u2b_r12, gradx1_u2b, grady1_u2b, gradz1_u2b) - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented yet' - stop - - endif - - return -end subroutine get_grad1_u12_withsq_r1_seq - -! --- - -double precision function j12_mu(r1, r2) - - include 'constants.include.F' - - implicit none - double precision, intent(in) :: r1(3), r2(3) - double precision :: mu_tmp, r12 - - if((j1b_type .ge. 0) .and. (j1b_type .lt. 200)) then - - r12 = dsqrt( (r1(1) - r2(1)) * (r1(1) - r2(1)) & - + (r1(2) - r2(2)) * (r1(2) - r2(2)) & - + (r1(3) - r2(3)) * (r1(3) - r2(3)) ) - mu_tmp = mu_erf * r12 - - j12_mu = 0.5d0 * r12 * (1.d0 - derf(mu_tmp)) - inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / mu_erf - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented for j12_mu' - stop - - endif - - return -end function j12_mu - -! --- - -subroutine grad1_j12_mu(r1, r2, grad) - - BEGIN_DOC - ! - ! gradient of j(mu(r1,r2),r12) form of jastrow. - ! - ! if mu(r1,r2) = cst ---> j1b_type < 200 and - ! - ! d/dx1 j(mu,r12) = 0.5 * (1 - erf(mu *r12))/r12 * (x1 - x2) - ! - ! if mu(r1,r2) /= cst ---> 200 < j1b_type < 300 and - ! - ! d/dx1 j(mu(r1,r2),r12) = exp(-(mu(r1,r2)*r12)**2) /(2 *sqrt(pi) * mu(r1,r2)**2 ) d/dx1 mu(r1,r2) - ! + 0.5 * (1 - erf(mu(r1,r2) *r12))/r12 * (x1 - x2) - ! - END_DOC - - include 'constants.include.F' - - implicit none - double precision, intent(in) :: r1(3), r2(3) - double precision, intent(out) :: grad(3) - double precision :: dx, dy, dz, r12, tmp - - grad = 0.d0 - - if((j1b_type .ge. 0) .and. (j1b_type .lt. 200)) then - - dx = r1(1) - r2(1) - dy = r1(2) - r2(2) - dz = r1(3) - r2(3) - - r12 = dsqrt(dx * dx + dy * dy + dz * dz) - if(r12 .lt. 1d-10) return - - tmp = 0.5d0 * (1.d0 - derf(mu_erf * r12)) / r12 - - grad(1) = tmp * dx - grad(2) = tmp * dy - grad(3) = tmp * dz - - elseif((j1b_type .ge. 200) .and. (j1b_type .lt. 300)) then - - double precision :: mu_val, mu_tmp, mu_der(3) - - dx = r1(1) - r2(1) - dy = r1(2) - r2(2) - dz = r1(3) - r2(3) - r12 = dsqrt(dx * dx + dy * dy + dz * dz) - - call mu_r_val_and_grad(r1, r2, mu_val, mu_der) - mu_tmp = mu_val * r12 - tmp = inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / (mu_val * mu_val) - grad(1) = tmp * mu_der(1) - grad(2) = tmp * mu_der(2) - grad(3) = tmp * mu_der(3) - - if(r12 .lt. 1d-10) return - tmp = 0.5d0 * (1.d0 - derf(mu_tmp)) / r12 - grad(1) = grad(1) + tmp * dx - grad(2) = grad(2) + tmp * dy - grad(3) = grad(3) + tmp * dz - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented yet' - stop - - endif - - return -end subroutine grad1_j12_mu - -! --- - -double precision function j1b_nucl(r) - - implicit none - double precision, intent(in) :: r(3) - integer :: i - double precision :: a, d, e, x, y, z - - if((j1b_type .eq. 2) .or. (j1b_type .eq. 102)) then - - j1b_nucl = 1.d0 - do i = 1, nucl_num - a = j1b_pen(i) - d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & - + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & - + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) - j1b_nucl = j1b_nucl - dexp(-a*dsqrt(d)) - enddo - - elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then - - j1b_nucl = 1.d0 - do i = 1, nucl_num - a = j1b_pen(i) - d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & - + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & - + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) - e = 1.d0 - dexp(-a*d) - j1b_nucl = j1b_nucl * e - enddo - - elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then - - j1b_nucl = 1.d0 - do i = 1, nucl_num - a = j1b_pen(i) - d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & - + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & - + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) - j1b_nucl = j1b_nucl - j1b_pen_coef(i) * dexp(-a*d) - enddo - - elseif((j1b_type .eq. 5) .or. (j1b_type .eq. 105)) then - - j1b_nucl = 1.d0 - do i = 1, nucl_num - a = j1b_pen(i) - x = r(1) - nucl_coord(i,1) - y = r(2) - nucl_coord(i,2) - z = r(3) - nucl_coord(i,3) - d = x*x + y*y + z*z - j1b_nucl = j1b_nucl - dexp(-a*d*d) - enddo - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented for j1b_nucl' - stop - - endif - - return -end function j1b_nucl - -! --- - -double precision function j1b_nucl_square(r) - - implicit none - double precision, intent(in) :: r(3) - integer :: i - double precision :: a, d, e, x, y, z - - if((j1b_type .eq. 2) .or. (j1b_type .eq. 102)) then - - j1b_nucl_square = 1.d0 - do i = 1, nucl_num - a = j1b_pen(i) - d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & - + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & - + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) - j1b_nucl_square = j1b_nucl_square - dexp(-a*dsqrt(d)) - enddo - j1b_nucl_square = j1b_nucl_square * j1b_nucl_square - - elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then - - j1b_nucl_square = 1.d0 - do i = 1, nucl_num - a = j1b_pen(i) - d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & - + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & - + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) - e = 1.d0 - dexp(-a*d) - j1b_nucl_square = j1b_nucl_square * e - enddo - j1b_nucl_square = j1b_nucl_square * j1b_nucl_square - - elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then - - j1b_nucl_square = 1.d0 - do i = 1, nucl_num - a = j1b_pen(i) - d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & - + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & - + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) - j1b_nucl_square = j1b_nucl_square - j1b_pen_coef(i) * dexp(-a*d) - enddo - j1b_nucl_square = j1b_nucl_square * j1b_nucl_square - - elseif((j1b_type .eq. 5) .or. (j1b_type .eq. 105)) then - - j1b_nucl_square = 1.d0 - do i = 1, nucl_num - a = j1b_pen(i) - x = r(1) - nucl_coord(i,1) - y = r(2) - nucl_coord(i,2) - z = r(3) - nucl_coord(i,3) - d = x*x + y*y + z*z - j1b_nucl_square = j1b_nucl_square - dexp(-a*d*d) - enddo - j1b_nucl_square = j1b_nucl_square * j1b_nucl_square - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented for j1b_nucl_square' - stop - - endif - - return -end function j1b_nucl_square - -! --- - -subroutine grad1_j1b_nucl(r, grad) - - implicit none - double precision, intent(in) :: r(3) - double precision, intent(out) :: grad(3) - integer :: ipoint, i, j, phase - double precision :: x, y, z, dx, dy, dz - double precision :: a, d, e - double precision :: fact_x, fact_y, fact_z - double precision :: ax_der, ay_der, az_der, a_expo - - if((j1b_type .eq. 2) .or. (j1b_type .eq. 102)) then - - fact_x = 0.d0 - fact_y = 0.d0 - fact_z = 0.d0 - do i = 1, nucl_num - a = j1b_pen(i) - x = r(1) - nucl_coord(i,1) - y = r(2) - nucl_coord(i,2) - z = r(3) - nucl_coord(i,3) - d = dsqrt(x*x + y*y + z*z) - e = a * dexp(-a*d) / d - - fact_x += e * x - fact_y += e * y - fact_z += e * z - enddo - - grad(1) = fact_x - grad(2) = fact_y - grad(3) = fact_z - - elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then - - x = r(1) - y = r(2) - z = r(3) - - fact_x = 0.d0 - fact_y = 0.d0 - fact_z = 0.d0 - do i = 1, List_all_comb_b2_size - - phase = 0 - a_expo = 0.d0 - ax_der = 0.d0 - ay_der = 0.d0 - az_der = 0.d0 - do j = 1, nucl_num - a = dble(List_all_comb_b2(j,i)) * j1b_pen(j) - dx = x - nucl_coord(j,1) - dy = y - nucl_coord(j,2) - dz = z - nucl_coord(j,3) - - phase += List_all_comb_b2(j,i) - a_expo += a * (dx*dx + dy*dy + dz*dz) - ax_der += a * dx - ay_der += a * dy - az_der += a * dz - enddo - e = -2.d0 * (-1.d0)**dble(phase) * dexp(-a_expo) - - fact_x += e * ax_der - fact_y += e * ay_der - fact_z += e * az_der - enddo - - grad(1) = fact_x - grad(2) = fact_y - grad(3) = fact_z - - elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then - - fact_x = 0.d0 - fact_y = 0.d0 - fact_z = 0.d0 - do i = 1, nucl_num - a = j1b_pen(i) - x = r(1) - nucl_coord(i,1) - y = r(2) - nucl_coord(i,2) - z = r(3) - nucl_coord(i,3) - d = x*x + y*y + z*z - e = a * j1b_pen_coef(i) * dexp(-a*d) - - fact_x += e * x - fact_y += e * y - fact_z += e * z - enddo - - grad(1) = 2.d0 * fact_x - grad(2) = 2.d0 * fact_y - grad(3) = 2.d0 * fact_z - - elseif((j1b_type .eq. 5) .or. (j1b_type .eq. 105)) then - - fact_x = 0.d0 - fact_y = 0.d0 - fact_z = 0.d0 - do i = 1, nucl_num - a = j1b_pen(i) - x = r(1) - nucl_coord(i,1) - y = r(2) - nucl_coord(i,2) - z = r(3) - nucl_coord(i,3) - d = x*x + y*y + z*z - e = a * d * dexp(-a*d*d) - - fact_x += e * x - fact_y += e * y - fact_z += e * z - enddo - - grad(1) = 4.d0 * fact_x - grad(2) = 4.d0 * fact_y - grad(3) = 4.d0 * fact_z - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented for grad1_j1b_nucl' - stop - - endif - - return -end subroutine grad1_j1b_nucl - -! --- - -subroutine mu_r_val_and_grad(r1, r2, mu_val, mu_der) - - implicit none - double precision, intent(in) :: r1(3), r2(3) - double precision, intent(out) :: mu_val, mu_der(3) - double precision :: r(3) - double precision :: dm_a(1), dm_b(1), grad_dm_a(3,1), grad_dm_b(3,1) - double precision :: dm_tot, tmp1, tmp2, tmp3 - double precision :: rho1, grad_rho1(3),rho2,rho_tot,inv_rho_tot - double precision :: f_rho1, f_rho2, d_drho_f_rho1 - double precision :: d_dx1_f_rho1(3),d_dx_rho_f_rho(3),nume - - if(j1b_type .eq. 200) then - - ! - ! r = 0.5 (r1 + r2) - ! - ! mu[rho(r)] = alpha sqrt(rho) + mu0 exp(-rho) - ! - ! d mu[rho(r)] / dx1 = 0.5 d mu[rho(r)] / dx - ! d mu[rho(r)] / dx = [0.5 alpha / sqrt(rho) - mu0 exp(-rho)] (d rho(r) / dx) - ! - - PROVIDE mu_r_ct mu_erf - - r(1) = 0.5d0 * (r1(1) + r2(1)) - r(2) = 0.5d0 * (r1(2) + r2(2)) - r(3) = 0.5d0 * (r1(3) + r2(3)) - - call density_and_grad_alpha_beta(r, dm_a, dm_b, grad_dm_a, grad_dm_b) - - dm_tot = dm_a(1) + dm_b(1) - tmp1 = dsqrt(dm_tot) - tmp2 = mu_erf * dexp(-dm_tot) - - mu_val = mu_r_ct * tmp1 + tmp2 - - mu_der = 0.d0 - if(dm_tot .lt. 1d-7) return - - tmp3 = 0.25d0 * mu_r_ct / tmp1 - 0.5d0 * tmp2 - mu_der(1) = tmp3 * (grad_dm_a(1,1) + grad_dm_b(1,1)) - mu_der(2) = tmp3 * (grad_dm_a(2,1) + grad_dm_b(2,1)) - mu_der(3) = tmp3 * (grad_dm_a(3,1) + grad_dm_b(3,1)) - - elseif(j1b_type .eq. 201) then - - ! - ! r = 0.5 (r1 + r2) - ! - ! mu[rho(r)] = alpha rho + mu0 exp(-rho) - ! - ! d mu[rho(r)] / dx1 = 0.5 d mu[rho(r)] / dx - ! d mu[rho(r)] / dx = [0.5 alpha / sqrt(rho) - mu0 exp(-rho)] (d rho(r) / dx) - ! - - PROVIDE mu_r_ct mu_erf - - r(1) = 0.5d0 * (r1(1) + r2(1)) - r(2) = 0.5d0 * (r1(2) + r2(2)) - r(3) = 0.5d0 * (r1(3) + r2(3)) - - call density_and_grad_alpha_beta(r, dm_a, dm_b, grad_dm_a, grad_dm_b) - - dm_tot = dm_a(1) + dm_b(1) - tmp2 = mu_erf * dexp(-dm_tot) - - mu_val = mu_r_ct * dm_tot + tmp2 - - tmp3 = 0.5d0 * (mu_r_ct - tmp2) - mu_der(1) = tmp3 * (grad_dm_a(1,1) + grad_dm_b(1,1)) - mu_der(2) = tmp3 * (grad_dm_a(2,1) + grad_dm_b(2,1)) - mu_der(3) = tmp3 * (grad_dm_a(3,1) + grad_dm_b(3,1)) - - elseif(j1b_type .eq. 202) then - - ! mu(r1,r2) = {rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]} / RHO - ! - ! RHO = rho(r1) + rho(r2) - ! - ! f[rho] = alpha rho^beta + mu0 exp(-rho) - ! - ! d/dx1 mu(r1,r2) = 1/RHO^2 * {RHO * d/dx1 (rho(r1) f[rho(r1)]) - ! - d/dx1 rho(r1) * [rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]] } - ! - ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) - mu0 exp(-rho(r1))] (d rho(r1) / dx1) - ! - ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1) - - !!!!!!!!! rho1,rho2,rho1+rho2 - call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) - rho_tot = rho1 + rho2 - if(rho_tot.lt.1.d-10)rho_tot = 1.d-10 - inv_rho_tot = 1.d0/rho_tot - ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf * exp(-rho) - call get_all_f_rho(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2) - d_dx1_f_rho1(1:3) = d_drho_f_rho1 * grad_rho1(1:3) - d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3) - nume = rho1 * f_rho1 + rho2 * f_rho2 - mu_val = nume * inv_rho_tot - mu_der(1:3) = inv_rho_tot*inv_rho_tot * (rho_tot * d_dx_rho_f_rho(1:3) - grad_rho1(1:3) * nume) - elseif(j1b_type .eq. 203) then - - ! mu(r1,r2) = {rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]} / RHO - ! - ! RHO = rho(r1) + rho(r2) - ! - ! f[rho] = alpha rho^beta + mu0 - ! - ! d/dx1 mu(r1,r2) = 1/RHO^2 * {RHO * d/dx1 (rho(r1) f[rho(r1)]) - ! - d/dx1 rho(r1) * [rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]] } - ! - ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) ] (d rho(r1) / dx1) - ! - ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1) - - !!!!!!!!! rho1,rho2,rho1+rho2 - call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) - rho_tot = rho1 + rho2 - if(rho_tot.lt.1.d-10)rho_tot = 1.d-10 - inv_rho_tot = 1.d0/rho_tot - ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf - call get_all_f_rho_simple(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2) - d_dx1_f_rho1(1:3) = d_drho_f_rho1 * grad_rho1(1:3) - d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3) - nume = rho1 * f_rho1 + rho2 * f_rho2 - mu_val = nume * inv_rho_tot - mu_der(1:3) = inv_rho_tot*inv_rho_tot * (rho_tot * d_dx_rho_f_rho(1:3) - grad_rho1(1:3) * nume) - elseif(j1b_type .eq. 204) then - - ! mu(r1,r2) = 1/2 * (f[rho(r1)] + f[rho(r2)]} - ! - ! f[rho] = alpha rho^beta + mu0 - ! - ! d/dx1 mu(r1,r2) = 1/2 * d/dx1 (rho(r1) f[rho(r1)]) - ! - ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) ] (d rho(r1) / dx1) - ! - ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1) - - !!!!!!!!! rho1,rho2,rho1+rho2 - call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) - rho_tot = rho1 + rho2 - if(rho_tot.lt.1.d-10)rho_tot = 1.d-10 - inv_rho_tot = 1.d0/rho_tot - ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf - call get_all_f_rho_simple(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2) - d_dx1_f_rho1(1:3) = d_drho_f_rho1 * grad_rho1(1:3) - d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3) - mu_val = 0.5d0 * ( f_rho1 + f_rho2) - mu_der(1:3) = d_dx_rho_f_rho(1:3) - else - print *, ' j1b_type = ', j1b_type, 'not implemented yet' - stop - - endif - - return -end subroutine mu_r_val_and_grad - -! --- - -subroutine grad1_j1b_nucl_square_num(r1, grad) - - implicit none - double precision, intent(in) :: r1(3) - double precision, intent(out) :: grad(3) - double precision :: r(3), eps, tmp_eps, vp, vm - double precision, external :: j1b_nucl_square - - eps = 1d-5 - tmp_eps = 0.5d0 / eps - - r(1:3) = r1(1:3) - - r(1) = r(1) + eps - vp = j1b_nucl_square(r) - r(1) = r(1) - 2.d0 * eps - vm = j1b_nucl_square(r) - r(1) = r(1) + eps - grad(1) = tmp_eps * (vp - vm) - - r(2) = r(2) + eps - vp = j1b_nucl_square(r) - r(2) = r(2) - 2.d0 * eps - vm = j1b_nucl_square(r) - r(2) = r(2) + eps - grad(2) = tmp_eps * (vp - vm) - - r(3) = r(3) + eps - vp = j1b_nucl_square(r) - r(3) = r(3) - 2.d0 * eps - vm = j1b_nucl_square(r) - r(3) = r(3) + eps - grad(3) = tmp_eps * (vp - vm) - - return -end subroutine grad1_j1b_nucl_square_num - -! --- - -subroutine grad1_j12_mu_square_num(r1, r2, grad) - - include 'constants.include.F' - - implicit none - double precision, intent(in) :: r1(3), r2(3) - double precision, intent(out) :: grad(3) - double precision :: r(3) - double precision :: eps, tmp_eps, vp, vm - double precision, external :: j12_mu_square - - eps = 1d-5 - tmp_eps = 0.5d0 / eps - - r(1:3) = r1(1:3) - - r(1) = r(1) + eps - vp = j12_mu_square(r, r2) - r(1) = r(1) - 2.d0 * eps - vm = j12_mu_square(r, r2) - r(1) = r(1) + eps - grad(1) = tmp_eps * (vp - vm) - - r(2) = r(2) + eps - vp = j12_mu_square(r, r2) - r(2) = r(2) - 2.d0 * eps - vm = j12_mu_square(r, r2) - r(2) = r(2) + eps - grad(2) = tmp_eps * (vp - vm) - - r(3) = r(3) + eps - vp = j12_mu_square(r, r2) - r(3) = r(3) - 2.d0 * eps - vm = j12_mu_square(r, r2) - r(3) = r(3) + eps - grad(3) = tmp_eps * (vp - vm) - - return -end subroutine grad1_j12_mu_square_num - -! --- - -double precision function j12_mu_square(r1, r2) - - implicit none - double precision, intent(in) :: r1(3), r2(3) - double precision, external :: j12_mu - - j12_mu_square = j12_mu(r1, r2) * j12_mu(r1, r2) - - return -end function j12_mu_square - -! --- - -subroutine f_mu_and_deriv_mu(rho,alpha,mu0,beta,f_mu,d_drho_f_mu) - implicit none - BEGIN_DOC -! function giving mu as a function of rho -! -! f_mu = alpha * rho**beta + mu0 * exp(-rho) -! -! and its derivative with respect to rho d_drho_f_mu - END_DOC - double precision, intent(in) :: rho,alpha,mu0,beta - double precision, intent(out) :: f_mu,d_drho_f_mu - f_mu = alpha * (rho)**beta + mu0 * dexp(-rho) - d_drho_f_mu = alpha * beta * rho**(beta-1.d0) - mu0 * dexp(-rho) - -end - - -subroutine get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) - implicit none - BEGIN_DOC -! returns the density in r1,r2 and grad_rho at r1 - END_DOC - double precision, intent(in) :: r1(3),r2(3) - double precision, intent(out):: grad_rho1(3),rho1,rho2 - double precision :: dm_a(1), dm_b(1), grad_dm_a(3,1), grad_dm_b(3,1) - call density_and_grad_alpha_beta(r1, dm_a, dm_b, grad_dm_a, grad_dm_b) - rho1 = dm_a(1) + dm_b(1) - grad_rho1(1:3) = grad_dm_a(1:3,1) + grad_dm_b(1:3,1) - call density_and_grad_alpha_beta(r2, dm_a, dm_b, grad_dm_a, grad_dm_b) - rho2 = dm_a(1) + dm_b(1) -end - -subroutine get_all_f_rho(rho1,rho2,alpha,mu0,beta,f_rho1,d_drho_f_rho1,f_rho2) - implicit none - BEGIN_DOC -! returns the values f(mu(r1)), f(mu(r2)) and d/drho(1) f(mu(r1)) - END_DOC - double precision, intent(in) :: rho1,rho2,alpha,mu0,beta - double precision, intent(out):: f_rho1,d_drho_f_rho1,f_rho2 - double precision :: tmp - call f_mu_and_deriv_mu(rho1,alpha,mu0,beta,f_rho1,d_drho_f_rho1) - call f_mu_and_deriv_mu(rho2,alpha,mu0,beta,f_rho2,tmp) -end - - -subroutine get_all_f_rho_simple(rho1,rho2,alpha,mu0,beta,f_rho1,d_drho_f_rho1,f_rho2) - implicit none - BEGIN_DOC -! returns the values f(mu(r1)), f(mu(r2)) and d/drho(1) f(mu(r1)) - END_DOC - double precision, intent(in) :: rho1,rho2,alpha,mu0,beta - double precision, intent(out):: f_rho1,d_drho_f_rho1,f_rho2 - double precision :: tmp - call f_mu_and_deriv_mu_simple(rho1,alpha,mu0,beta,f_rho1,d_drho_f_rho1) - call f_mu_and_deriv_mu_simple(rho2,alpha,mu0,beta,f_rho2,tmp) -end - -subroutine f_mu_and_deriv_mu_simple(rho,alpha,mu0,beta,f_mu,d_drho_f_mu) - implicit none - BEGIN_DOC -! function giving mu as a function of rho -! -! f_mu = alpha * rho**beta + mu0 -! -! and its derivative with respect to rho d_drho_f_mu - END_DOC - double precision, intent(in) :: rho,alpha,mu0,beta - double precision, intent(out) :: f_mu,d_drho_f_mu - f_mu = alpha * (rho)**beta + mu0 - d_drho_f_mu = alpha * beta * rho**(beta-1.d0) - -end - -! --- - -subroutine grad1_j12_mu_r1_seq(r1, n_grid2, gradx, grady, gradz) - - BEGIN_DOC - ! - ! gradient of j(mu(r1,r2),r12) form of jastrow. - ! - ! if mu(r1,r2) = cst ---> j1b_type < 200 and - ! - ! d/dx1 j(mu,r12) = 0.5 * (1 - erf(mu *r12))/r12 * (x1 - x2) - ! - ! if mu(r1,r2) /= cst ---> 200 < j1b_type < 300 and - ! - ! d/dx1 j(mu(r1,r2),r12) = exp(-(mu(r1,r2)*r12)**2) /(2 *sqrt(pi) * mu(r1,r2)**2 ) d/dx1 mu(r1,r2) - ! + 0.5 * (1 - erf(mu(r1,r2) *r12))/r12 * (x1 - x2) - ! - END_DOC - - include 'constants.include.F' - - implicit none - integer , intent(in) :: n_grid2 - double precision, intent(in) :: r1(3) - double precision, intent(out) :: gradx(n_grid2) - double precision, intent(out) :: grady(n_grid2) - double precision, intent(out) :: gradz(n_grid2) - - integer :: jpoint - double precision :: r2(3) - double precision :: dx, dy, dz, r12, tmp - - if((j1b_type .ge. 0) .and. (j1b_type .lt. 200)) then - - do jpoint = 1, n_points_extra_final_grid ! r2 - - r2(1) = final_grid_points_extra(1,jpoint) - r2(2) = final_grid_points_extra(2,jpoint) - r2(3) = final_grid_points_extra(3,jpoint) - - dx = r1(1) - r2(1) - dy = r1(2) - r2(2) - dz = r1(3) - r2(3) - - r12 = dsqrt(dx * dx + dy * dy + dz * dz) - if(r12 .lt. 1d-10) return - - tmp = 0.5d0 * (1.d0 - derf(mu_erf * r12)) / r12 - - gradx(jpoint) = tmp * dx - grady(jpoint) = tmp * dy - gradz(jpoint) = tmp * dz - enddo - - elseif((j1b_type .ge. 200) .and. (j1b_type .lt. 300)) then - - double precision :: mu_val, mu_tmp, mu_der(3) - - do jpoint = 1, n_points_extra_final_grid ! r2 - - r2(1) = final_grid_points_extra(1,jpoint) - r2(2) = final_grid_points_extra(2,jpoint) - r2(3) = final_grid_points_extra(3,jpoint) - - dx = r1(1) - r2(1) - dy = r1(2) - r2(2) - dz = r1(3) - r2(3) - r12 = dsqrt(dx * dx + dy * dy + dz * dz) - - call mu_r_val_and_grad(r1, r2, mu_val, mu_der) - mu_tmp = mu_val * r12 - tmp = inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / (mu_val * mu_val) - gradx(jpoint) = tmp * mu_der(1) - grady(jpoint) = tmp * mu_der(2) - gradz(jpoint) = tmp * mu_der(3) - - if(r12 .lt. 1d-10) return - tmp = 0.5d0 * (1.d0 - derf(mu_tmp)) / r12 - - gradx(jpoint) = gradx(jpoint) + tmp * dx - grady(jpoint) = grady(jpoint) + tmp * dy - gradz(jpoint) = gradz(jpoint) + tmp * dz - enddo - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented yet' - stop - - endif - - return -end subroutine grad1_j12_mu_r1_seq - -! --- - -subroutine j12_mu_r1_seq(r1, n_grid2, res) - - include 'constants.include.F' - - implicit none - integer, intent(in) :: n_grid2 - double precision, intent(in) :: r1(3) - double precision, intent(out) :: res(n_grid2) - - integer :: jpoint - double precision :: r2(3) - double precision :: mu_tmp, r12 - - PROVIDE final_grid_points_extra - - if((j1b_type .ge. 0) .and. (j1b_type .lt. 200)) then - - do jpoint = 1, n_points_extra_final_grid ! r2 - - r2(1) = final_grid_points_extra(1,jpoint) - r2(2) = final_grid_points_extra(2,jpoint) - r2(3) = final_grid_points_extra(3,jpoint) - - r12 = dsqrt( (r1(1) - r2(1)) * (r1(1) - r2(1)) & - + (r1(2) - r2(2)) * (r1(2) - r2(2)) & - + (r1(3) - r2(3)) * (r1(3) - r2(3)) ) - mu_tmp = mu_erf * r12 - - res(jpoint) = 0.5d0 * r12 * (1.d0 - derf(mu_tmp)) - inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / mu_erf - enddo - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented for j12_mu_r1_seq' - stop - - endif - - return -end subroutine j12_mu_r1_seq - -! --- - -subroutine j1b_nucl_r1_seq(n_grid2, res) - - implicit none - integer, intent(in) :: n_grid2 - double precision, intent(out) :: res(n_grid2) - - double precision :: r(3) - integer :: i, jpoint - double precision :: a, d, e, x, y, z - - if((j1b_type .eq. 2) .or. (j1b_type .eq. 102)) then - - res = 1.d0 - - do jpoint = 1, n_points_extra_final_grid ! r2 - r(1) = final_grid_points_extra(1,jpoint) - r(2) = final_grid_points_extra(2,jpoint) - r(3) = final_grid_points_extra(3,jpoint) - - do i = 1, nucl_num - a = j1b_pen(i) - d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & - + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & - + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) - - res(jpoint) -= dexp(-a*dsqrt(d)) - enddo - enddo - - elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then - - res = 1.d0 - - do jpoint = 1, n_points_extra_final_grid ! r2 - r(1) = final_grid_points_extra(1,jpoint) - r(2) = final_grid_points_extra(2,jpoint) - r(3) = final_grid_points_extra(3,jpoint) - - do i = 1, nucl_num - a = j1b_pen(i) - d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & - + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & - + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) - e = 1.d0 - dexp(-a*d) - - res(jpoint) *= e - enddo - enddo - - elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then - - res = 1.d0 - - do jpoint = 1, n_points_extra_final_grid ! r2 - r(1) = final_grid_points_extra(1,jpoint) - r(2) = final_grid_points_extra(2,jpoint) - r(3) = final_grid_points_extra(3,jpoint) - - do i = 1, nucl_num - a = j1b_pen(i) - d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & - + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & - + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) - res(jpoint) -= j1b_pen_coef(i) * dexp(-a*d) - enddo - enddo - - elseif((j1b_type .eq. 5) .or. (j1b_type .eq. 105)) then - - res = 1.d0 - - do jpoint = 1, n_points_extra_final_grid ! r2 - r(1) = final_grid_points_extra(1,jpoint) - r(2) = final_grid_points_extra(2,jpoint) - r(3) = final_grid_points_extra(3,jpoint) - - do i = 1, nucl_num - a = j1b_pen(i) - x = r(1) - nucl_coord(i,1) - y = r(2) - nucl_coord(i,2) - z = r(3) - nucl_coord(i,3) - d = x*x + y*y + z*z - res(jpoint) -= dexp(-a*d*d) - enddo - enddo - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented for j1b_nucl_r1_seq' - stop - - endif - - return -end subroutine j1b_nucl_r1_seq - -! --- - diff --git a/src/non_h_ints_mu/jast_deriv_utils.irp.f b/src/non_h_ints_mu/jast_deriv_utils.irp.f new file mode 100644 index 00000000..bcbe16af --- /dev/null +++ b/src/non_h_ints_mu/jast_deriv_utils.irp.f @@ -0,0 +1,700 @@ + +! --- + +double precision function j12_mu(r1, r2) + + include 'constants.include.F' + + implicit none + double precision, intent(in) :: r1(3), r2(3) + double precision :: mu_tmp, r12 + + if((j1b_type .ge. 0) .and. (j1b_type .lt. 200)) then + + r12 = dsqrt( (r1(1) - r2(1)) * (r1(1) - r2(1)) & + + (r1(2) - r2(2)) * (r1(2) - r2(2)) & + + (r1(3) - r2(3)) * (r1(3) - r2(3)) ) + mu_tmp = mu_erf * r12 + + j12_mu = 0.5d0 * r12 * (1.d0 - derf(mu_tmp)) - inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / mu_erf + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented for j12_mu' + stop + + endif + + return +end function j12_mu + +! --- + +subroutine grad1_j12_mu(r1, r2, grad) + + BEGIN_DOC + ! + ! gradient of j(mu(r1,r2),r12) form of jastrow. + ! + ! if mu(r1,r2) = cst ---> j1b_type < 200 and + ! + ! d/dx1 j(mu,r12) = 0.5 * (1 - erf(mu *r12))/r12 * (x1 - x2) + ! + ! if mu(r1,r2) /= cst ---> 200 < j1b_type < 300 and + ! + ! d/dx1 j(mu(r1,r2),r12) = exp(-(mu(r1,r2)*r12)**2) /(2 *sqrt(pi) * mu(r1,r2)**2 ) d/dx1 mu(r1,r2) + ! + 0.5 * (1 - erf(mu(r1,r2) *r12))/r12 * (x1 - x2) + ! + END_DOC + + include 'constants.include.F' + + implicit none + double precision, intent(in) :: r1(3), r2(3) + double precision, intent(out) :: grad(3) + double precision :: dx, dy, dz, r12, tmp + + grad = 0.d0 + + if((j1b_type .ge. 0) .and. (j1b_type .lt. 200)) then + + dx = r1(1) - r2(1) + dy = r1(2) - r2(2) + dz = r1(3) - r2(3) + + r12 = dsqrt(dx * dx + dy * dy + dz * dz) + if(r12 .lt. 1d-10) return + + tmp = 0.5d0 * (1.d0 - derf(mu_erf * r12)) / r12 + + grad(1) = tmp * dx + grad(2) = tmp * dy + grad(3) = tmp * dz + + elseif((j1b_type .ge. 200) .and. (j1b_type .lt. 300)) then + + double precision :: mu_val, mu_tmp, mu_der(3) + + dx = r1(1) - r2(1) + dy = r1(2) - r2(2) + dz = r1(3) - r2(3) + r12 = dsqrt(dx * dx + dy * dy + dz * dz) + + call mu_r_val_and_grad(r1, r2, mu_val, mu_der) + mu_tmp = mu_val * r12 + tmp = inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / (mu_val * mu_val) + grad(1) = tmp * mu_der(1) + grad(2) = tmp * mu_der(2) + grad(3) = tmp * mu_der(3) + + if(r12 .lt. 1d-10) return + tmp = 0.5d0 * (1.d0 - derf(mu_tmp)) / r12 + grad(1) = grad(1) + tmp * dx + grad(2) = grad(2) + tmp * dy + grad(3) = grad(3) + tmp * dz + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + + return +end subroutine grad1_j12_mu + +! --- + +double precision function j1b_nucl(r) + + implicit none + double precision, intent(in) :: r(3) + integer :: i + double precision :: a, d, e, x, y, z + + if((j1b_type .eq. 2) .or. (j1b_type .eq. 102)) then + + j1b_nucl = 1.d0 + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + j1b_nucl = j1b_nucl - dexp(-a*dsqrt(d)) + enddo + + elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then + + j1b_nucl = 1.d0 + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + e = 1.d0 - dexp(-a*d) + j1b_nucl = j1b_nucl * e + enddo + + elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then + + j1b_nucl = 1.d0 + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + j1b_nucl = j1b_nucl - j1b_pen_coef(i) * dexp(-a*d) + enddo + + elseif((j1b_type .eq. 5) .or. (j1b_type .eq. 105)) then + + j1b_nucl = 1.d0 + do i = 1, nucl_num + a = j1b_pen(i) + x = r(1) - nucl_coord(i,1) + y = r(2) - nucl_coord(i,2) + z = r(3) - nucl_coord(i,3) + d = x*x + y*y + z*z + j1b_nucl = j1b_nucl - dexp(-a*d*d) + enddo + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented for j1b_nucl' + stop + + endif + + return +end function j1b_nucl + +! --- + +double precision function j1b_nucl_square(r) + + implicit none + double precision, intent(in) :: r(3) + integer :: i + double precision :: a, d, e, x, y, z + + if((j1b_type .eq. 2) .or. (j1b_type .eq. 102)) then + + j1b_nucl_square = 1.d0 + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + j1b_nucl_square = j1b_nucl_square - dexp(-a*dsqrt(d)) + enddo + j1b_nucl_square = j1b_nucl_square * j1b_nucl_square + + elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then + + j1b_nucl_square = 1.d0 + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + e = 1.d0 - dexp(-a*d) + j1b_nucl_square = j1b_nucl_square * e + enddo + j1b_nucl_square = j1b_nucl_square * j1b_nucl_square + + elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then + + j1b_nucl_square = 1.d0 + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + j1b_nucl_square = j1b_nucl_square - j1b_pen_coef(i) * dexp(-a*d) + enddo + j1b_nucl_square = j1b_nucl_square * j1b_nucl_square + + elseif((j1b_type .eq. 5) .or. (j1b_type .eq. 105)) then + + j1b_nucl_square = 1.d0 + do i = 1, nucl_num + a = j1b_pen(i) + x = r(1) - nucl_coord(i,1) + y = r(2) - nucl_coord(i,2) + z = r(3) - nucl_coord(i,3) + d = x*x + y*y + z*z + j1b_nucl_square = j1b_nucl_square - dexp(-a*d*d) + enddo + j1b_nucl_square = j1b_nucl_square * j1b_nucl_square + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented for j1b_nucl_square' + stop + + endif + + return +end function j1b_nucl_square + +! --- + +subroutine grad1_j1b_nucl(r, grad) + + implicit none + double precision, intent(in) :: r(3) + double precision, intent(out) :: grad(3) + integer :: ipoint, i, j, phase + double precision :: x, y, z, dx, dy, dz + double precision :: a, d, e + double precision :: fact_x, fact_y, fact_z + double precision :: ax_der, ay_der, az_der, a_expo + + if((j1b_type .eq. 2) .or. (j1b_type .eq. 102)) then + + fact_x = 0.d0 + fact_y = 0.d0 + fact_z = 0.d0 + do i = 1, nucl_num + a = j1b_pen(i) + x = r(1) - nucl_coord(i,1) + y = r(2) - nucl_coord(i,2) + z = r(3) - nucl_coord(i,3) + d = dsqrt(x*x + y*y + z*z) + e = a * dexp(-a*d) / d + + fact_x += e * x + fact_y += e * y + fact_z += e * z + enddo + + grad(1) = fact_x + grad(2) = fact_y + grad(3) = fact_z + + elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then + + x = r(1) + y = r(2) + z = r(3) + + fact_x = 0.d0 + fact_y = 0.d0 + fact_z = 0.d0 + do i = 1, List_all_comb_b2_size + + phase = 0 + a_expo = 0.d0 + ax_der = 0.d0 + ay_der = 0.d0 + az_der = 0.d0 + do j = 1, nucl_num + a = dble(List_all_comb_b2(j,i)) * j1b_pen(j) + dx = x - nucl_coord(j,1) + dy = y - nucl_coord(j,2) + dz = z - nucl_coord(j,3) + + phase += List_all_comb_b2(j,i) + a_expo += a * (dx*dx + dy*dy + dz*dz) + ax_der += a * dx + ay_der += a * dy + az_der += a * dz + enddo + e = -2.d0 * (-1.d0)**dble(phase) * dexp(-a_expo) + + fact_x += e * ax_der + fact_y += e * ay_der + fact_z += e * az_der + enddo + + grad(1) = fact_x + grad(2) = fact_y + grad(3) = fact_z + + elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then + + fact_x = 0.d0 + fact_y = 0.d0 + fact_z = 0.d0 + do i = 1, nucl_num + a = j1b_pen(i) + x = r(1) - nucl_coord(i,1) + y = r(2) - nucl_coord(i,2) + z = r(3) - nucl_coord(i,3) + d = x*x + y*y + z*z + e = a * j1b_pen_coef(i) * dexp(-a*d) + + fact_x += e * x + fact_y += e * y + fact_z += e * z + enddo + + grad(1) = 2.d0 * fact_x + grad(2) = 2.d0 * fact_y + grad(3) = 2.d0 * fact_z + + elseif((j1b_type .eq. 5) .or. (j1b_type .eq. 105)) then + + fact_x = 0.d0 + fact_y = 0.d0 + fact_z = 0.d0 + do i = 1, nucl_num + a = j1b_pen(i) + x = r(1) - nucl_coord(i,1) + y = r(2) - nucl_coord(i,2) + z = r(3) - nucl_coord(i,3) + d = x*x + y*y + z*z + e = a * d * dexp(-a*d*d) + + fact_x += e * x + fact_y += e * y + fact_z += e * z + enddo + + grad(1) = 4.d0 * fact_x + grad(2) = 4.d0 * fact_y + grad(3) = 4.d0 * fact_z + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented for grad1_j1b_nucl' + stop + + endif + + return +end subroutine grad1_j1b_nucl + +! --- + +subroutine mu_r_val_and_grad(r1, r2, mu_val, mu_der) + + implicit none + double precision, intent(in) :: r1(3), r2(3) + double precision, intent(out) :: mu_val, mu_der(3) + double precision :: r(3) + double precision :: dm_a(1), dm_b(1), grad_dm_a(3,1), grad_dm_b(3,1) + double precision :: dm_tot, tmp1, tmp2, tmp3 + double precision :: rho1, grad_rho1(3),rho2,rho_tot,inv_rho_tot + double precision :: f_rho1, f_rho2, d_drho_f_rho1 + double precision :: d_dx1_f_rho1(3),d_dx_rho_f_rho(3),nume + + if(j1b_type .eq. 200) then + + ! + ! r = 0.5 (r1 + r2) + ! + ! mu[rho(r)] = alpha sqrt(rho) + mu0 exp(-rho) + ! + ! d mu[rho(r)] / dx1 = 0.5 d mu[rho(r)] / dx + ! d mu[rho(r)] / dx = [0.5 alpha / sqrt(rho) - mu0 exp(-rho)] (d rho(r) / dx) + ! + + PROVIDE mu_r_ct mu_erf + + r(1) = 0.5d0 * (r1(1) + r2(1)) + r(2) = 0.5d0 * (r1(2) + r2(2)) + r(3) = 0.5d0 * (r1(3) + r2(3)) + + call density_and_grad_alpha_beta(r, dm_a, dm_b, grad_dm_a, grad_dm_b) + + dm_tot = dm_a(1) + dm_b(1) + tmp1 = dsqrt(dm_tot) + tmp2 = mu_erf * dexp(-dm_tot) + + mu_val = mu_r_ct * tmp1 + tmp2 + + mu_der = 0.d0 + if(dm_tot .lt. 1d-7) return + + tmp3 = 0.25d0 * mu_r_ct / tmp1 - 0.5d0 * tmp2 + mu_der(1) = tmp3 * (grad_dm_a(1,1) + grad_dm_b(1,1)) + mu_der(2) = tmp3 * (grad_dm_a(2,1) + grad_dm_b(2,1)) + mu_der(3) = tmp3 * (grad_dm_a(3,1) + grad_dm_b(3,1)) + + elseif(j1b_type .eq. 201) then + + ! + ! r = 0.5 (r1 + r2) + ! + ! mu[rho(r)] = alpha rho + mu0 exp(-rho) + ! + ! d mu[rho(r)] / dx1 = 0.5 d mu[rho(r)] / dx + ! d mu[rho(r)] / dx = [0.5 alpha / sqrt(rho) - mu0 exp(-rho)] (d rho(r) / dx) + ! + + PROVIDE mu_r_ct mu_erf + + r(1) = 0.5d0 * (r1(1) + r2(1)) + r(2) = 0.5d0 * (r1(2) + r2(2)) + r(3) = 0.5d0 * (r1(3) + r2(3)) + + call density_and_grad_alpha_beta(r, dm_a, dm_b, grad_dm_a, grad_dm_b) + + dm_tot = dm_a(1) + dm_b(1) + tmp2 = mu_erf * dexp(-dm_tot) + + mu_val = mu_r_ct * dm_tot + tmp2 + + tmp3 = 0.5d0 * (mu_r_ct - tmp2) + mu_der(1) = tmp3 * (grad_dm_a(1,1) + grad_dm_b(1,1)) + mu_der(2) = tmp3 * (grad_dm_a(2,1) + grad_dm_b(2,1)) + mu_der(3) = tmp3 * (grad_dm_a(3,1) + grad_dm_b(3,1)) + + elseif(j1b_type .eq. 202) then + + ! mu(r1,r2) = {rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]} / RHO + ! + ! RHO = rho(r1) + rho(r2) + ! + ! f[rho] = alpha rho^beta + mu0 exp(-rho) + ! + ! d/dx1 mu(r1,r2) = 1/RHO^2 * {RHO * d/dx1 (rho(r1) f[rho(r1)]) + ! - d/dx1 rho(r1) * [rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]] } + ! + ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) - mu0 exp(-rho(r1))] (d rho(r1) / dx1) + ! + ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1) + + !!!!!!!!! rho1,rho2,rho1+rho2 + call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) + rho_tot = rho1 + rho2 + if(rho_tot.lt.1.d-10)rho_tot = 1.d-10 + inv_rho_tot = 1.d0/rho_tot + ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf * exp(-rho) + call get_all_f_rho(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2) + d_dx1_f_rho1(1:3) = d_drho_f_rho1 * grad_rho1(1:3) + d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3) + nume = rho1 * f_rho1 + rho2 * f_rho2 + mu_val = nume * inv_rho_tot + mu_der(1:3) = inv_rho_tot*inv_rho_tot * (rho_tot * d_dx_rho_f_rho(1:3) - grad_rho1(1:3) * nume) + elseif(j1b_type .eq. 203) then + + ! mu(r1,r2) = {rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]} / RHO + ! + ! RHO = rho(r1) + rho(r2) + ! + ! f[rho] = alpha rho^beta + mu0 + ! + ! d/dx1 mu(r1,r2) = 1/RHO^2 * {RHO * d/dx1 (rho(r1) f[rho(r1)]) + ! - d/dx1 rho(r1) * [rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]] } + ! + ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) ] (d rho(r1) / dx1) + ! + ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1) + + !!!!!!!!! rho1,rho2,rho1+rho2 + call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) + rho_tot = rho1 + rho2 + if(rho_tot.lt.1.d-10)rho_tot = 1.d-10 + inv_rho_tot = 1.d0/rho_tot + ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf + call get_all_f_rho_simple(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2) + d_dx1_f_rho1(1:3) = d_drho_f_rho1 * grad_rho1(1:3) + d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3) + nume = rho1 * f_rho1 + rho2 * f_rho2 + mu_val = nume * inv_rho_tot + mu_der(1:3) = inv_rho_tot*inv_rho_tot * (rho_tot * d_dx_rho_f_rho(1:3) - grad_rho1(1:3) * nume) + elseif(j1b_type .eq. 204) then + + ! mu(r1,r2) = 1/2 * (f[rho(r1)] + f[rho(r2)]} + ! + ! f[rho] = alpha rho^beta + mu0 + ! + ! d/dx1 mu(r1,r2) = 1/2 * d/dx1 (rho(r1) f[rho(r1)]) + ! + ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) ] (d rho(r1) / dx1) + ! + ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1) + + !!!!!!!!! rho1,rho2,rho1+rho2 + call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) + rho_tot = rho1 + rho2 + if(rho_tot.lt.1.d-10)rho_tot = 1.d-10 + inv_rho_tot = 1.d0/rho_tot + ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf + call get_all_f_rho_simple(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2) + d_dx1_f_rho1(1:3) = d_drho_f_rho1 * grad_rho1(1:3) + d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3) + mu_val = 0.5d0 * ( f_rho1 + f_rho2) + mu_der(1:3) = d_dx_rho_f_rho(1:3) + else + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + + return +end subroutine mu_r_val_and_grad + +! --- + +subroutine grad1_j1b_nucl_square_num(r1, grad) + + implicit none + double precision, intent(in) :: r1(3) + double precision, intent(out) :: grad(3) + double precision :: r(3), eps, tmp_eps, vp, vm + double precision, external :: j1b_nucl_square + + eps = 1d-5 + tmp_eps = 0.5d0 / eps + + r(1:3) = r1(1:3) + + r(1) = r(1) + eps + vp = j1b_nucl_square(r) + r(1) = r(1) - 2.d0 * eps + vm = j1b_nucl_square(r) + r(1) = r(1) + eps + grad(1) = tmp_eps * (vp - vm) + + r(2) = r(2) + eps + vp = j1b_nucl_square(r) + r(2) = r(2) - 2.d0 * eps + vm = j1b_nucl_square(r) + r(2) = r(2) + eps + grad(2) = tmp_eps * (vp - vm) + + r(3) = r(3) + eps + vp = j1b_nucl_square(r) + r(3) = r(3) - 2.d0 * eps + vm = j1b_nucl_square(r) + r(3) = r(3) + eps + grad(3) = tmp_eps * (vp - vm) + + return +end subroutine grad1_j1b_nucl_square_num + +! --- + +subroutine grad1_j12_mu_square_num(r1, r2, grad) + + include 'constants.include.F' + + implicit none + double precision, intent(in) :: r1(3), r2(3) + double precision, intent(out) :: grad(3) + double precision :: r(3) + double precision :: eps, tmp_eps, vp, vm + double precision, external :: j12_mu_square + + eps = 1d-5 + tmp_eps = 0.5d0 / eps + + r(1:3) = r1(1:3) + + r(1) = r(1) + eps + vp = j12_mu_square(r, r2) + r(1) = r(1) - 2.d0 * eps + vm = j12_mu_square(r, r2) + r(1) = r(1) + eps + grad(1) = tmp_eps * (vp - vm) + + r(2) = r(2) + eps + vp = j12_mu_square(r, r2) + r(2) = r(2) - 2.d0 * eps + vm = j12_mu_square(r, r2) + r(2) = r(2) + eps + grad(2) = tmp_eps * (vp - vm) + + r(3) = r(3) + eps + vp = j12_mu_square(r, r2) + r(3) = r(3) - 2.d0 * eps + vm = j12_mu_square(r, r2) + r(3) = r(3) + eps + grad(3) = tmp_eps * (vp - vm) + + return +end subroutine grad1_j12_mu_square_num + +! --- + +double precision function j12_mu_square(r1, r2) + + implicit none + double precision, intent(in) :: r1(3), r2(3) + double precision, external :: j12_mu + + j12_mu_square = j12_mu(r1, r2) * j12_mu(r1, r2) + + return +end function j12_mu_square + +! --- + +subroutine f_mu_and_deriv_mu(rho,alpha,mu0,beta,f_mu,d_drho_f_mu) + implicit none + BEGIN_DOC +! function giving mu as a function of rho +! +! f_mu = alpha * rho**beta + mu0 * exp(-rho) +! +! and its derivative with respect to rho d_drho_f_mu + END_DOC + double precision, intent(in) :: rho,alpha,mu0,beta + double precision, intent(out) :: f_mu,d_drho_f_mu + f_mu = alpha * (rho)**beta + mu0 * dexp(-rho) + d_drho_f_mu = alpha * beta * rho**(beta-1.d0) - mu0 * dexp(-rho) + +end + + +subroutine get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1) + implicit none + BEGIN_DOC +! returns the density in r1,r2 and grad_rho at r1 + END_DOC + double precision, intent(in) :: r1(3),r2(3) + double precision, intent(out):: grad_rho1(3),rho1,rho2 + double precision :: dm_a(1), dm_b(1), grad_dm_a(3,1), grad_dm_b(3,1) + call density_and_grad_alpha_beta(r1, dm_a, dm_b, grad_dm_a, grad_dm_b) + rho1 = dm_a(1) + dm_b(1) + grad_rho1(1:3) = grad_dm_a(1:3,1) + grad_dm_b(1:3,1) + call density_and_grad_alpha_beta(r2, dm_a, dm_b, grad_dm_a, grad_dm_b) + rho2 = dm_a(1) + dm_b(1) +end + +subroutine get_all_f_rho(rho1,rho2,alpha,mu0,beta,f_rho1,d_drho_f_rho1,f_rho2) + implicit none + BEGIN_DOC +! returns the values f(mu(r1)), f(mu(r2)) and d/drho(1) f(mu(r1)) + END_DOC + double precision, intent(in) :: rho1,rho2,alpha,mu0,beta + double precision, intent(out):: f_rho1,d_drho_f_rho1,f_rho2 + double precision :: tmp + call f_mu_and_deriv_mu(rho1,alpha,mu0,beta,f_rho1,d_drho_f_rho1) + call f_mu_and_deriv_mu(rho2,alpha,mu0,beta,f_rho2,tmp) +end + + +subroutine get_all_f_rho_simple(rho1,rho2,alpha,mu0,beta,f_rho1,d_drho_f_rho1,f_rho2) + implicit none + BEGIN_DOC +! returns the values f(mu(r1)), f(mu(r2)) and d/drho(1) f(mu(r1)) + END_DOC + double precision, intent(in) :: rho1,rho2,alpha,mu0,beta + double precision, intent(out):: f_rho1,d_drho_f_rho1,f_rho2 + double precision :: tmp + call f_mu_and_deriv_mu_simple(rho1,alpha,mu0,beta,f_rho1,d_drho_f_rho1) + call f_mu_and_deriv_mu_simple(rho2,alpha,mu0,beta,f_rho2,tmp) +end + +subroutine f_mu_and_deriv_mu_simple(rho,alpha,mu0,beta,f_mu,d_drho_f_mu) + implicit none + BEGIN_DOC +! function giving mu as a function of rho +! +! f_mu = alpha * rho**beta + mu0 +! +! and its derivative with respect to rho d_drho_f_mu + END_DOC + double precision, intent(in) :: rho,alpha,mu0,beta + double precision, intent(out) :: f_mu,d_drho_f_mu + f_mu = alpha * (rho)**beta + mu0 + d_drho_f_mu = alpha * beta * rho**(beta-1.d0) + +end + +! --- + diff --git a/src/non_h_ints_mu/jast_deriv_utils_vect.irp.f b/src/non_h_ints_mu/jast_deriv_utils_vect.irp.f new file mode 100644 index 00000000..f9512827 --- /dev/null +++ b/src/non_h_ints_mu/jast_deriv_utils_vect.irp.f @@ -0,0 +1,332 @@ + +! --- + +subroutine get_grad1_u12_withsq_r1_seq(r1, n_grid2, resx, resy, resz, res) + + BEGIN_DOC + ! + ! grad_1 u(r1,r2) + ! + ! this will be integrated numerically over r2: + ! we use grid for r1 and extra_grid for r2 + ! + ! for 99 < j1b_type < 199 + ! + ! u(r1,r2) = j12_mu(r12) x v(r1) x v(r2) + ! grad1 u(r1, r2) = [(grad1 j12_mu) v(r1) + j12_mu grad1 v(r1)] v(r2) + ! + END_DOC + + implicit none + integer, intent(in) :: n_grid2 + double precision, intent(in) :: r1(3) + double precision, intent(out) :: resx(n_grid2), resy(n_grid2), resz(n_grid2), res(n_grid2) + + integer :: jpoint + double precision :: v1b_r1 + double precision :: grad1_v1b(3) + double precision, allocatable :: v1b_r2(:) + double precision, allocatable :: u2b_r12(:) + double precision, allocatable :: gradx1_u2b(:), grady1_u2b(:), gradz1_u2b(:) + double precision, external :: j1b_nucl + + PROVIDE j1b_type + PROVIDE final_grid_points_extra + + if( (j1b_type .eq. 100) .or. & + (j1b_type .ge. 200) .and. (j1b_type .lt. 300) ) then + + call grad1_j12_mu_r1_seq(r1, n_grid2, resx, resy, resz) + do jpoint = 1, n_points_extra_final_grid + res(jpoint) = resx(jpoint) * resx(jpoint) & + + resy(jpoint) * resy(jpoint) & + + resz(jpoint) * resz(jpoint) + enddo + + elseif((j1b_type .gt. 100) .and. (j1b_type .lt. 200)) then + + allocate(v1b_r2(n_grid2)) + allocate(u2b_r12(n_grid2)) + allocate(gradx1_u2b(n_grid2)) + allocate(grady1_u2b(n_grid2)) + allocate(gradz1_u2b(n_grid2)) + + v1b_r1 = j1b_nucl(r1) + call grad1_j1b_nucl(r1, grad1_v1b) + + call j1b_nucl_r1_seq(n_grid2, v1b_r2) + call j12_mu_r1_seq(r1, n_grid2, u2b_r12) + call grad1_j12_mu_r1_seq(r1, n_grid2, gradx1_u2b, grady1_u2b, gradz1_u2b) + + do jpoint = 1, n_points_extra_final_grid + resx(jpoint) = (gradx1_u2b(jpoint) * v1b_r1 + u2b_r12(jpoint) * grad1_v1b(1)) * v1b_r2(jpoint) + resy(jpoint) = (grady1_u2b(jpoint) * v1b_r1 + u2b_r12(jpoint) * grad1_v1b(2)) * v1b_r2(jpoint) + resz(jpoint) = (gradz1_u2b(jpoint) * v1b_r1 + u2b_r12(jpoint) * grad1_v1b(3)) * v1b_r2(jpoint) + res (jpoint) = resx(jpoint) * resx(jpoint) & + + resy(jpoint) * resy(jpoint) & + + resz(jpoint) * resz(jpoint) + enddo + + deallocate(v1b_r2, u2b_r12, gradx1_u2b, grady1_u2b, gradz1_u2b) + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + + return +end subroutine get_grad1_u12_withsq_r1_seq + +! --- + +subroutine grad1_j12_mu_r1_seq(r1, n_grid2, gradx, grady, gradz) + + BEGIN_DOC + ! + ! gradient of j(mu(r1,r2),r12) form of jastrow. + ! + ! if mu(r1,r2) = cst ---> j1b_type < 200 and + ! + ! d/dx1 j(mu,r12) = 0.5 * (1 - erf(mu *r12))/r12 * (x1 - x2) + ! + ! if mu(r1,r2) /= cst ---> 200 < j1b_type < 300 and + ! + ! d/dx1 j(mu(r1,r2),r12) = exp(-(mu(r1,r2)*r12)**2) /(2 *sqrt(pi) * mu(r1,r2)**2 ) d/dx1 mu(r1,r2) + ! + 0.5 * (1 - erf(mu(r1,r2) *r12))/r12 * (x1 - x2) + ! + END_DOC + + include 'constants.include.F' + + implicit none + integer , intent(in) :: n_grid2 + double precision, intent(in) :: r1(3) + double precision, intent(out) :: gradx(n_grid2) + double precision, intent(out) :: grady(n_grid2) + double precision, intent(out) :: gradz(n_grid2) + + integer :: jpoint + double precision :: r2(3) + double precision :: dx, dy, dz, r12, tmp + + if((j1b_type .ge. 0) .and. (j1b_type .lt. 200)) then + + do jpoint = 1, n_points_extra_final_grid ! r2 + + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + + dx = r1(1) - r2(1) + dy = r1(2) - r2(2) + dz = r1(3) - r2(3) + + r12 = dsqrt(dx * dx + dy * dy + dz * dz) + if(r12 .lt. 1d-10) then + gradx(jpoint) = 0.d0 + grady(jpoint) = 0.d0 + gradz(jpoint) = 0.d0 + cycle + endif + + tmp = 0.5d0 * (1.d0 - derf(mu_erf * r12)) / r12 + + gradx(jpoint) = tmp * dx + grady(jpoint) = tmp * dy + gradz(jpoint) = tmp * dz + enddo + + elseif((j1b_type .ge. 200) .and. (j1b_type .lt. 300)) then + + double precision :: mu_val, mu_tmp, mu_der(3) + + do jpoint = 1, n_points_extra_final_grid ! r2 + + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + + dx = r1(1) - r2(1) + dy = r1(2) - r2(2) + dz = r1(3) - r2(3) + r12 = dsqrt(dx * dx + dy * dy + dz * dz) + + call mu_r_val_and_grad(r1, r2, mu_val, mu_der) + mu_tmp = mu_val * r12 + tmp = inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / (mu_val * mu_val) + gradx(jpoint) = tmp * mu_der(1) + grady(jpoint) = tmp * mu_der(2) + gradz(jpoint) = tmp * mu_der(3) + + if(r12 .lt. 1d-10) then + gradx(jpoint) = 0.d0 + grady(jpoint) = 0.d0 + gradz(jpoint) = 0.d0 + cycle + endif + + tmp = 0.5d0 * (1.d0 - derf(mu_tmp)) / r12 + + gradx(jpoint) = gradx(jpoint) + tmp * dx + grady(jpoint) = grady(jpoint) + tmp * dy + gradz(jpoint) = gradz(jpoint) + tmp * dz + enddo + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + + return +end subroutine grad1_j12_mu_r1_seq + +! --- + +subroutine j12_mu_r1_seq(r1, n_grid2, res) + + include 'constants.include.F' + + implicit none + integer, intent(in) :: n_grid2 + double precision, intent(in) :: r1(3) + double precision, intent(out) :: res(n_grid2) + + integer :: jpoint + double precision :: r2(3) + double precision :: mu_tmp, r12 + + PROVIDE final_grid_points_extra + + if((j1b_type .ge. 0) .and. (j1b_type .lt. 200)) then + + do jpoint = 1, n_points_extra_final_grid ! r2 + + r2(1) = final_grid_points_extra(1,jpoint) + r2(2) = final_grid_points_extra(2,jpoint) + r2(3) = final_grid_points_extra(3,jpoint) + + r12 = dsqrt( (r1(1) - r2(1)) * (r1(1) - r2(1)) & + + (r1(2) - r2(2)) * (r1(2) - r2(2)) & + + (r1(3) - r2(3)) * (r1(3) - r2(3)) ) + mu_tmp = mu_erf * r12 + + res(jpoint) = 0.5d0 * r12 * (1.d0 - derf(mu_tmp)) - inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / mu_erf + enddo + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented for j12_mu_r1_seq' + stop + + endif + + return +end subroutine j12_mu_r1_seq + +! --- + +subroutine j1b_nucl_r1_seq(n_grid2, res) + + ! TODO + ! change loops order + + implicit none + integer, intent(in) :: n_grid2 + double precision, intent(out) :: res(n_grid2) + + double precision :: r(3) + integer :: i, jpoint + double precision :: a, d, e, x, y, z + + if((j1b_type .eq. 2) .or. (j1b_type .eq. 102)) then + + res = 1.d0 + + do jpoint = 1, n_points_extra_final_grid ! r2 + r(1) = final_grid_points_extra(1,jpoint) + r(2) = final_grid_points_extra(2,jpoint) + r(3) = final_grid_points_extra(3,jpoint) + + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + + res(jpoint) -= dexp(-a*dsqrt(d)) + enddo + enddo + + elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then + + res = 1.d0 + + do jpoint = 1, n_points_extra_final_grid ! r2 + r(1) = final_grid_points_extra(1,jpoint) + r(2) = final_grid_points_extra(2,jpoint) + r(3) = final_grid_points_extra(3,jpoint) + + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + e = 1.d0 - dexp(-a*d) + + res(jpoint) *= e + enddo + enddo + + elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then + + res = 1.d0 + + do jpoint = 1, n_points_extra_final_grid ! r2 + r(1) = final_grid_points_extra(1,jpoint) + r(2) = final_grid_points_extra(2,jpoint) + r(3) = final_grid_points_extra(3,jpoint) + + do i = 1, nucl_num + a = j1b_pen(i) + d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) & + + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) & + + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) ) + res(jpoint) -= j1b_pen_coef(i) * dexp(-a*d) + enddo + enddo + + elseif((j1b_type .eq. 5) .or. (j1b_type .eq. 105)) then + + res = 1.d0 + + do jpoint = 1, n_points_extra_final_grid ! r2 + r(1) = final_grid_points_extra(1,jpoint) + r(2) = final_grid_points_extra(2,jpoint) + r(3) = final_grid_points_extra(3,jpoint) + + do i = 1, nucl_num + a = j1b_pen(i) + x = r(1) - nucl_coord(i,1) + y = r(2) - nucl_coord(i,2) + z = r(3) - nucl_coord(i,3) + d = x*x + y*y + z*z + res(jpoint) -= dexp(-a*d*d) + enddo + enddo + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented for j1b_nucl_r1_seq' + stop + + endif + + return +end subroutine j1b_nucl_r1_seq + +! --- + diff --git a/src/non_h_ints_mu/tc_integ.irp.f b/src/non_h_ints_mu/tc_integ.irp.f deleted file mode 100644 index dd21a67f..00000000 --- a/src/non_h_ints_mu/tc_integ.irp.f +++ /dev/null @@ -1,530 +0,0 @@ - -BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_final_grid, 3)] - - BEGIN_DOC - ! - ! TODO - ! combine with int2_grad1_u12_square_ao to avoid repeated calculation ? - ! - ! int2_grad1_u12_ao(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) - ! - ! where r1 = r(ipoint) - ! - ! if J(r1,r2) = u12 (j1b_type .eq. 1) - ! - ! int2_grad1_u12_ao(i,j,ipoint,:) = 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r2) \phi_j(r2) - ! = 0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ] - ! - ! if J(r1,r2) = u12 x v1 x v2 (j1b_type .eq. 3) - ! - ! int2_grad1_u12_ao(i,j,ipoint,:) = v1 x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ] - ! - \grad_1 v1 x [ \int dr2 u12 v2 \phi_i(r2) \phi_j(r2) ] - ! = 0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) - ! - 0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) - ! - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint) - ! - END_DOC - - implicit none - integer :: ipoint, i, j, m, jpoint - double precision :: time0, time1 - double precision :: x, y, z, w, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2 - - print*, ' providing int2_grad1_u12_ao ...' - call wall_time(time0) - - PROVIDE j1b_type - - if(read_tc_integ) then - - open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="read") - read(11) int2_grad1_u12_ao - - else - - if(j1b_type .eq. 0) then - - PROVIDE v_ij_erf_rk_cst_mu x_v_ij_erf_rk_cst_mu - - int2_grad1_u12_ao = 0.d0 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, x, y, z, tmp1) & - !$OMP SHARED ( ao_num, n_points_final_grid, final_grid_points & - !$OMP , v_ij_erf_rk_cst_mu, x_v_ij_erf_rk_cst_mu, int2_grad1_u12_ao) - !$OMP DO SCHEDULE (static) - do ipoint = 1, n_points_final_grid - x = final_grid_points(1,ipoint) - y = final_grid_points(2,ipoint) - z = final_grid_points(3,ipoint) - do j = 1, ao_num - do i = 1, ao_num - tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint) - int2_grad1_u12_ao(i,j,ipoint,1) = 0.5d0 * (tmp1 * x - x_v_ij_erf_rk_cst_mu(i,j,ipoint,1)) - int2_grad1_u12_ao(i,j,ipoint,2) = 0.5d0 * (tmp1 * y - x_v_ij_erf_rk_cst_mu(i,j,ipoint,2)) - int2_grad1_u12_ao(i,j,ipoint,3) = 0.5d0 * (tmp1 * z - x_v_ij_erf_rk_cst_mu(i,j,ipoint,3)) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 4)) then - - PROVIDE v_1b_grad - PROVIDE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b_an x_v_ij_erf_rk_cst_mu_j1b - - int2_grad1_u12_ao = 0.d0 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, j, x, y, z, tmp0, tmp1, tmp2, tmp_x, tmp_y, tmp_z) & - !$OMP SHARED ( ao_num, n_points_final_grid, final_grid_points, v_1b, v_1b_grad & - !$OMP , v_ij_erf_rk_cst_mu_j1b, v_ij_u_cst_mu_j1b_an, x_v_ij_erf_rk_cst_mu_j1b, int2_grad1_u12_ao) - !$OMP DO SCHEDULE (static) - do ipoint = 1, n_points_final_grid - x = final_grid_points(1,ipoint) - y = final_grid_points(2,ipoint) - z = final_grid_points(3,ipoint) - tmp0 = 0.5d0 * v_1b(ipoint) - tmp_x = v_1b_grad(1,ipoint) - tmp_y = v_1b_grad(2,ipoint) - tmp_z = v_1b_grad(3,ipoint) - do j = 1, ao_num - do i = 1, ao_num - tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) - tmp2 = v_ij_u_cst_mu_j1b_an(i,j,ipoint) - int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x - int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y - int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - FREE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b_an x_v_ij_erf_rk_cst_mu_j1b - - elseif(j1b_type .ge. 100) then - - PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra - - double precision, allocatable :: tmp(:,:,:) - allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) - tmp = 0.d0 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (j, i, jpoint) & - !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) - !$OMP DO SCHEDULE (static) - do j = 1, ao_num - do i = 1, ao_num - do jpoint = 1, n_points_extra_final_grid - tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - integer :: n_blocks, n_rest, n_pass - integer :: i_blocks, i_rest, i_pass, ii - double precision, allocatable :: tmp_grad1_u12(:,:,:) - - ! n_points_final_grid = n_blocks * n_pass + n_rest - n_blocks = 8 - n_rest = int(mod(n_points_final_grid, n_blocks)) - n_pass = int((n_points_final_grid - n_rest) / n_blocks) - - if(n_pass .le. 1) then - print*, ' blocks are to large or grid is very small !' - stop - endif - - allocate(tmp_grad1_u12(n_points_extra_final_grid,n_blocks,3)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i_pass, i_blocks, ipoint, ii, m, tmp_grad1_u12) & - !$OMP SHARED (n_pass, n_blocks, n_points_extra_final_grid, ao_num, & - !$OMP final_grid_points, tmp, int2_grad1_u12_ao) - !$OMP DO - do i_pass = 1, n_pass - ii = (i_pass-1)*n_blocks + 1 - - do i_blocks = 1, n_blocks - ipoint = ii - 1 + i_blocks ! r1 - call get_grad1_u12_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12(1,i_blocks,1) & - , tmp_grad1_u12(1,i_blocks,2) & - , tmp_grad1_u12(1,i_blocks,3) ) - enddo - - do m = 1, 3 - call dgemm( "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, 1.d0 & - , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & - , 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - deallocate(tmp_grad1_u12) - - ! TODO - ! OPENMP - if(n_rest .ne. 0) then - - allocate(tmp_grad1_u12(n_points_extra_final_grid,n_rest,3)) - - ii = n_pass*n_blocks + 1 - do i_rest = 1, n_rest - ipoint = ii - 1 + i_rest ! r1 - call get_grad1_u12_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12(1,i_rest,1) & - , tmp_grad1_u12(1,i_rest,2) & - , tmp_grad1_u12(1,i_rest,3) ) - enddo - - do m = 1, 3 - call dgemm( "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, 1.d0 & - , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & - , 0.d0, int2_grad1_u12_ao(1,1,ii,m), ao_num*ao_num) - enddo - - deallocate(tmp_grad1_u12) - endif - - deallocate(tmp) - else - - print *, ' j1b_type = ', j1b_type, 'not implemented yet' - stop - - endif - endif - - if(write_tc_integ.and.mpi_master) then - open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="write") - call ezfio_set_work_empty(.False.) - write(11) int2_grad1_u12_ao - close(11) - call ezfio_set_tc_keywords_io_tc_integ('Read') - endif - - call wall_time(time1) - print*, ' wall time for int2_grad1_u12_ao =', time1-time0 - call print_memory_usage() - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [double precision, int2_grad1_u12_ao_num_1shot, (ao_num, ao_num, n_points_final_grid, 3)] - - BEGIN_DOC - ! - ! int2_grad1_u12_ao_num_1shot(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) - ! - END_DOC - - implicit none - integer :: ipoint, i, j, m, jpoint - double precision :: time0, time1 - double precision :: x, y, z, w, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2 - - print*, ' providing int2_grad1_u12_ao_num_1shot ...' - call wall_time(time0) - - PROVIDE j1b_type - - if(j1b_type .ge. 100) then - - PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra - PROVIDE grad1_u12_num - - double precision, allocatable :: tmp(:,:,:) - allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) - tmp = 0.d0 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (j, i, jpoint) & - !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) - !$OMP DO SCHEDULE (static) - do j = 1, ao_num - do i = 1, ao_num - do jpoint = 1, n_points_extra_final_grid - tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - do m = 1, 3 - !call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, -1.d0 & - ! this work also because of the symmetry in K(1,2) and sign compensation in L(1,2,3) - call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, +1.d0 & - , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_num(1,1,m), n_points_extra_final_grid & - , 0.d0, int2_grad1_u12_ao_num_1shot(1,1,1,m), ao_num*ao_num) - enddo - - deallocate(tmp) - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented yet' - stop - - endif - - call wall_time(time1) - print*, ' wall time for int2_grad1_u12_ao_num_1shot =', time1-time0 - call print_memory_usage() - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_points_final_grid)] - - BEGIN_DOC - ! - ! int2_grad1_u12_square_ao = -(1/2) x int dr2 chi_l(r2) chi_j(r2) [grad_1 u(r1,r2)]^2 - ! - END_DOC - - implicit none - integer :: ipoint, i, j, m, jpoint - double precision :: time0, time1 - double precision :: x, y, z, w, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2 - - print*, ' providing int2_grad1_u12_square_ao ...' - call wall_time(time0) - - PROVIDE j1b_type - - if(j1b_type .eq. 0) then - - PROVIDE int2_grad1u2_grad2u2 - - int2_grad1_u12_square_ao = 0.d0 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, j, ipoint) & - !$OMP SHARED (int2_grad1_u12_square_ao, ao_num, n_points_final_grid, int2_grad1u2_grad2u2) - !$OMP DO SCHEDULE (static) - do ipoint = 1, n_points_final_grid - do j = 1, ao_num - do i = 1, ao_num - int2_grad1_u12_square_ao(i,j,ipoint) = int2_grad1u2_grad2u2(i,j,ipoint) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 4)) then - - if(use_ipp) then - - ! the term u12_grad1_u12_j1b_grad1_j1b is added directly for performance - PROVIDE u12sq_j1bsq grad12_j12 - - int2_grad1_u12_square_ao = 0.d0 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, j, ipoint) & - !$OMP SHARED (int2_grad1_u12_square_ao, ao_num, n_points_final_grid, u12sq_j1bsq, grad12_j12) - !$OMP DO SCHEDULE (static) - do ipoint = 1, n_points_final_grid - do j = 1, ao_num - do i = 1, ao_num - int2_grad1_u12_square_ao(i,j,ipoint) = u12sq_j1bsq(i,j,ipoint) + 0.5d0 * grad12_j12(i,j,ipoint) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - FREE u12sq_j1bsq grad12_j12 - - else - - PROVIDE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12 - - int2_grad1_u12_square_ao = 0.d0 - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, j, ipoint) & - !$OMP SHARED (int2_grad1_u12_square_ao, ao_num, n_points_final_grid, u12sq_j1bsq, grad12_j12, u12_grad1_u12_j1b_grad1_j1b) - !$OMP DO SCHEDULE (static) - do ipoint = 1, n_points_final_grid - do j = 1, ao_num - do i = 1, ao_num - int2_grad1_u12_square_ao(i,j,ipoint) = u12sq_j1bsq(i,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) + 0.5d0 * grad12_j12(i,j,ipoint) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - FREE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12 - - endif - - elseif(j1b_type .ge. 100) then - - PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra - - double precision, allocatable :: tmp(:,:,:) - allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (j, i, jpoint) & - !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) - !$OMP DO COLLAPSE(2) - do j = 1, ao_num - do i = 1, ao_num - do jpoint = 1, n_points_extra_final_grid - tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - integer :: n_blocks, n_rest, n_pass - integer :: i_blocks, i_rest, i_pass, ii - double precision, allocatable :: tmp_grad1_u12_squared(:,:) - - ! n_points_final_grid = n_blocks * n_pass + n_rest - n_blocks = 16 - n_rest = int(mod(n_points_final_grid, n_blocks)) - n_pass = int((n_points_final_grid - n_rest) / n_blocks) - - if(n_pass .le. 1) then - print*, ' blocks are to large or grid is very small !' - stop - endif - - allocate(tmp_grad1_u12_squared(n_points_extra_final_grid,n_blocks)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i_pass, i_blocks, ipoint, ii, tmp_grad1_u12_squared) & - !$OMP SHARED (n_pass, n_blocks, n_points_extra_final_grid, ao_num, & - !$OMP final_grid_points, tmp, int2_grad1_u12_square_ao) - !$OMP DO - do i_pass = 1, n_pass - ii = (i_pass-1)*n_blocks + 1 - - do i_blocks = 1, n_blocks - ipoint = ii - 1 + i_blocks ! r1 - call get_grad1_u12_squared_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12_squared(1,i_blocks)) - enddo - - call dgemm( "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, -0.5d0 & - , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12_squared(1,1), n_points_extra_final_grid & - , 0.d0, int2_grad1_u12_square_ao(1,1,ii), ao_num*ao_num) - enddo - !$OMP END DO - !$OMP END PARALLEL - - deallocate(tmp_grad1_u12_squared) - - ! TODO - ! OPENMP - if(n_rest .ne. 0) then - - allocate(tmp_grad1_u12_squared(n_points_extra_final_grid,n_rest)) - - ii = n_pass*n_blocks + 1 - do i_rest = 1, n_rest - ipoint = ii - 1 + i_rest ! r1 - call get_grad1_u12_squared_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12_squared(1,i_rest)) - enddo - - call dgemm( "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, -0.5d0 & - , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12_squared(1,1), n_points_extra_final_grid & - , 0.d0, int2_grad1_u12_square_ao(1,1,ii), ao_num*ao_num) - - deallocate(tmp_grad1_u12_squared) - endif - - deallocate(tmp) - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented yet' - stop - - endif - - call wall_time(time1) - print*, ' wall time for int2_grad1_u12_square_ao =', time1-time0 - call print_memory_usage() - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao_num_1shot, (ao_num, ao_num, n_points_final_grid)] - - BEGIN_DOC - ! - ! int2_grad1_u12_square_ao_num_1shot = -(1/2) x int dr2 chi_l(r2) chi_j(r2) [grad_1 u(r1,r2)]^2 - ! - END_DOC - - implicit none - integer :: i, j, jpoint - double precision :: time0, time1 - - print*, ' providing int2_grad1_u12_square_ao_num_1shot ...' - call wall_time(time0) - - PROVIDE j1b_type - - if(j1b_type .ge. 100) then - - PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra - PROVIDE grad1_u12_squared_num - - double precision, allocatable :: tmp(:,:,:) - allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (j, i, jpoint) & - !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) - !$OMP DO COLLAPSE(2) - do j = 1, ao_num - do i = 1, ao_num - do jpoint = 1, n_points_extra_final_grid - tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, -0.5d0 & - , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_squared_num(1,1), n_points_extra_final_grid & - , 0.d0, int2_grad1_u12_square_ao_num_1shot(1,1,1), ao_num*ao_num) - - FREE grad1_u12_squared_num - deallocate(tmp) - - else - - print *, ' j1b_type = ', j1b_type, 'not implemented yet' - stop - - endif - - call wall_time(time1) - print*, ' wall time for int2_grad1_u12_square_ao_num_1shot =', time1-time0 - call print_memory_usage() - -END_PROVIDER - -! --- - diff --git a/src/non_h_ints_mu/tc_integ_an.irp.f b/src/non_h_ints_mu/tc_integ_an.irp.f new file mode 100644 index 00000000..e2f181a0 --- /dev/null +++ b/src/non_h_ints_mu/tc_integ_an.irp.f @@ -0,0 +1,244 @@ + +BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_final_grid, 3)] + + BEGIN_DOC + ! + ! TODO + ! combine with int2_grad1_u12_square_ao to avoid repeated calculation ? + ! + ! int2_grad1_u12_ao(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) + ! + ! where r1 = r(ipoint) + ! + ! if J(r1,r2) = u12 (j1b_type .eq. 1) + ! + ! int2_grad1_u12_ao(i,j,ipoint,:) = 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r2) \phi_j(r2) + ! = 0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ] + ! + ! if J(r1,r2) = u12 x v1 x v2 (j1b_type .eq. 3) + ! + ! int2_grad1_u12_ao(i,j,ipoint,:) = v1 x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ] + ! - \grad_1 v1 x [ \int dr2 u12 v2 \phi_i(r2) \phi_j(r2) ] + ! = 0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) + ! - 0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) + ! - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint) + ! + END_DOC + + implicit none + integer :: ipoint, i, j, m, jpoint + double precision :: time0, time1 + double precision :: x, y, z, w, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2 + + print*, ' providing int2_grad1_u12_ao ...' + call wall_time(time0) + + PROVIDE j1b_type + + if(read_tc_integ) then + + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="read") + read(11) int2_grad1_u12_ao + + else + + if(j1b_type .eq. 0) then + + PROVIDE v_ij_erf_rk_cst_mu x_v_ij_erf_rk_cst_mu + + int2_grad1_u12_ao = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, x, y, z, tmp1) & + !$OMP SHARED ( ao_num, n_points_final_grid, final_grid_points & + !$OMP , v_ij_erf_rk_cst_mu, x_v_ij_erf_rk_cst_mu, int2_grad1_u12_ao) + !$OMP DO SCHEDULE (static) + do ipoint = 1, n_points_final_grid + x = final_grid_points(1,ipoint) + y = final_grid_points(2,ipoint) + z = final_grid_points(3,ipoint) + do j = 1, ao_num + do i = 1, ao_num + tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint) + int2_grad1_u12_ao(i,j,ipoint,1) = 0.5d0 * (tmp1 * x - x_v_ij_erf_rk_cst_mu(i,j,ipoint,1)) + int2_grad1_u12_ao(i,j,ipoint,2) = 0.5d0 * (tmp1 * y - x_v_ij_erf_rk_cst_mu(i,j,ipoint,2)) + int2_grad1_u12_ao(i,j,ipoint,3) = 0.5d0 * (tmp1 * z - x_v_ij_erf_rk_cst_mu(i,j,ipoint,3)) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 4)) then + + PROVIDE v_1b_grad + PROVIDE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b_an x_v_ij_erf_rk_cst_mu_j1b + + int2_grad1_u12_ao = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, j, x, y, z, tmp0, tmp1, tmp2, tmp_x, tmp_y, tmp_z) & + !$OMP SHARED ( ao_num, n_points_final_grid, final_grid_points, v_1b, v_1b_grad & + !$OMP , v_ij_erf_rk_cst_mu_j1b, v_ij_u_cst_mu_j1b_an, x_v_ij_erf_rk_cst_mu_j1b, int2_grad1_u12_ao) + !$OMP DO SCHEDULE (static) + do ipoint = 1, n_points_final_grid + x = final_grid_points(1,ipoint) + y = final_grid_points(2,ipoint) + z = final_grid_points(3,ipoint) + tmp0 = 0.5d0 * v_1b(ipoint) + tmp_x = v_1b_grad(1,ipoint) + tmp_y = v_1b_grad(2,ipoint) + tmp_z = v_1b_grad(3,ipoint) + do j = 1, ao_num + do i = 1, ao_num + tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) + tmp2 = v_ij_u_cst_mu_j1b_an(i,j,ipoint) + int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x + int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y + int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + FREE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b_an x_v_ij_erf_rk_cst_mu_j1b + + elseif(j1b_type .ge. 100) then + + PROVIDE int2_grad1_u12_ao_num + int2_grad1_u12_ao = int2_grad1_u12_ao_num + FREE int2_grad1_u12_ao_num + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + endif + + if(write_tc_integ.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="write") + call ezfio_set_work_empty(.False.) + write(11) int2_grad1_u12_ao + close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') + endif + + call wall_time(time1) + print*, ' wall time for int2_grad1_u12_ao =', time1-time0 + call print_memory_usage() + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_points_final_grid)] + + BEGIN_DOC + ! + ! int2_grad1_u12_square_ao = -(1/2) x int dr2 chi_l(r2) chi_j(r2) [grad_1 u(r1,r2)]^2 + ! + END_DOC + + implicit none + integer :: ipoint, i, j, m, jpoint + double precision :: time0, time1 + double precision :: x, y, z, w, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2 + + print*, ' providing int2_grad1_u12_square_ao ...' + call wall_time(time0) + + PROVIDE j1b_type + + if(j1b_type .eq. 0) then + + PROVIDE int2_grad1u2_grad2u2 + + int2_grad1_u12_square_ao = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, ipoint) & + !$OMP SHARED (int2_grad1_u12_square_ao, ao_num, n_points_final_grid, int2_grad1u2_grad2u2) + !$OMP DO SCHEDULE (static) + do ipoint = 1, n_points_final_grid + do j = 1, ao_num + do i = 1, ao_num + int2_grad1_u12_square_ao(i,j,ipoint) = int2_grad1u2_grad2u2(i,j,ipoint) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 4)) then + + if(use_ipp) then + + ! the term u12_grad1_u12_j1b_grad1_j1b is added directly for performance + PROVIDE u12sq_j1bsq grad12_j12 + + int2_grad1_u12_square_ao = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, ipoint) & + !$OMP SHARED (int2_grad1_u12_square_ao, ao_num, n_points_final_grid, u12sq_j1bsq, grad12_j12) + !$OMP DO SCHEDULE (static) + do ipoint = 1, n_points_final_grid + do j = 1, ao_num + do i = 1, ao_num + int2_grad1_u12_square_ao(i,j,ipoint) = u12sq_j1bsq(i,j,ipoint) + 0.5d0 * grad12_j12(i,j,ipoint) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + FREE u12sq_j1bsq grad12_j12 + + else + + PROVIDE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12 + + int2_grad1_u12_square_ao = 0.d0 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, ipoint) & + !$OMP SHARED (int2_grad1_u12_square_ao, ao_num, n_points_final_grid, u12sq_j1bsq, grad12_j12, u12_grad1_u12_j1b_grad1_j1b) + !$OMP DO SCHEDULE (static) + do ipoint = 1, n_points_final_grid + do j = 1, ao_num + do i = 1, ao_num + int2_grad1_u12_square_ao(i,j,ipoint) = u12sq_j1bsq(i,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) + 0.5d0 * grad12_j12(i,j,ipoint) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + FREE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12 + + endif + + elseif(j1b_type .ge. 100) then + + PROVIDE int2_grad1_u12_square_ao_num + int2_grad1_u12_square_ao = int2_grad1_u12_square_ao_num + FREE int2_grad1_u12_square_ao_num + + else + + print *, ' j1b_type = ', j1b_type, 'not implemented yet' + stop + + endif + + call wall_time(time1) + print*, ' wall time for int2_grad1_u12_square_ao =', time1-time0 + call print_memory_usage() + +END_PROVIDER + +! --- + diff --git a/src/non_h_ints_mu/tc_integ_num.irp.f b/src/non_h_ints_mu/tc_integ_num.irp.f new file mode 100644 index 00000000..707d484c --- /dev/null +++ b/src/non_h_ints_mu/tc_integ_num.irp.f @@ -0,0 +1,190 @@ + + BEGIN_PROVIDER [double precision, int2_grad1_u12_ao_num , (ao_num,ao_num,n_points_final_grid,3)] +&BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao_num, (ao_num,ao_num,n_points_final_grid) ] + + BEGIN_DOC + ! + ! int2_grad1_u12_ao_num(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) + ! + ! int2_grad1_u12_square_ao_num = -(1/2) x int dr2 chi_l(r2) chi_j(r2) [grad_1 u(r1,r2)]^2 + ! + END_DOC + + implicit none + integer :: ipoint, i, j, m, jpoint + integer :: n_blocks, n_rest, n_pass + integer :: i_blocks, i_rest, i_pass, ii + double precision :: time0, time1 + double precision, allocatable :: tmp(:,:,:) + double precision, allocatable :: tmp_grad1_u12(:,:,:), tmp_grad1_u12_squared(:,:) + + ! TODO + ! tmp_grad1_u12_squared get be obtained from tmp_grad1_u12 + + print*, ' providing int2_grad1_u12_ao_num & int2_grad1_u12_square_ao_num ...' + call wall_time(time0) + + PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra + + allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, i, jpoint) & + !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) + !$OMP DO SCHEDULE (static) + do j = 1, ao_num + do i = 1, ao_num + do jpoint = 1, n_points_extra_final_grid + tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + ! n_points_final_grid = n_blocks * n_pass + n_rest + n_blocks = 4 + n_rest = int(mod(n_points_final_grid, n_blocks)) + n_pass = int((n_points_final_grid - n_rest) / n_blocks) + + if(n_pass .le. 1) then + print*, ' blocks are to large or grid is very small !' + stop + endif + + allocate(tmp_grad1_u12_squared(n_points_extra_final_grid,n_blocks)) + allocate(tmp_grad1_u12(n_points_extra_final_grid,n_blocks,3)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_pass, i_blocks, ipoint, ii, m, tmp_grad1_u12, & + !$OMP tmp_grad1_u12_squared) & + !$OMP SHARED (n_pass, n_blocks, n_points_extra_final_grid, ao_num, & + !$OMP final_grid_points, tmp, int2_grad1_u12_ao_num, & + !$OMP int2_grad1_u12_square_ao_num) + !$OMP DO + do i_pass = 1, n_pass + ii = (i_pass-1)*n_blocks + 1 + + do i_blocks = 1, n_blocks + ipoint = ii - 1 + i_blocks ! r1 + call get_grad1_u12_withsq_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12(1,i_blocks,1) & + , tmp_grad1_u12(1,i_blocks,2) & + , tmp_grad1_u12(1,i_blocks,3) & + , tmp_grad1_u12_squared(1,i_blocks)) + enddo + + do m = 1, 3 + call dgemm( "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, 1.d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao_num(1,1,ii,m), ao_num*ao_num) + enddo + call dgemm( "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, -0.5d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12_squared(1,1), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_square_ao_num(1,1,ii), ao_num*ao_num) + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmp_grad1_u12, tmp_grad1_u12_squared) + + ! TODO + ! OPENMP + if(n_rest .ne. 0) then + + allocate(tmp_grad1_u12_squared(n_points_extra_final_grid,n_rest)) + allocate(tmp_grad1_u12(n_points_extra_final_grid,n_rest,3)) + + ii = n_pass*n_blocks + 1 + do i_rest = 1, n_rest + ipoint = ii - 1 + i_rest ! r1 + call get_grad1_u12_withsq_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12(1,i_rest,1) & + , tmp_grad1_u12(1,i_rest,2) & + , tmp_grad1_u12(1,i_rest,3) & + , tmp_grad1_u12_squared(1,i_rest)) + enddo + + do m = 1, 3 + call dgemm( "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, 1.d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao_num(1,1,ii,m), ao_num*ao_num) + enddo + call dgemm( "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, -0.5d0 & + , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12_squared(1,1), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_square_ao_num(1,1,ii), ao_num*ao_num) + + deallocate(tmp_grad1_u12, tmp_grad1_u12_squared) + endif + + deallocate(tmp) + + call wall_time(time1) + print*, ' wall time for int2_grad1_u12_ao_num & int2_grad1_u12_square_ao_num =', time1-time0 + call print_memory_usage() + +END_PROVIDER + +! --- + + BEGIN_PROVIDER [double precision, int2_grad1_u12_ao_num_1shot , (ao_num,ao_num,n_points_final_grid,3)] +&BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao_num_1shot, (ao_num,ao_num,n_points_final_grid) ] + + BEGIN_DOC + ! + ! int2_grad1_u12_ao_num_1shot(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) + ! + ! int2_grad1_u12_square_ao_num_1shot = -(1/2) x int dr2 chi_l(r2) chi_j(r2) [grad_1 u(r1,r2)]^2 + ! + END_DOC + + implicit none + integer :: ipoint, i, j, m, jpoint + double precision :: time0, time1 + double precision, allocatable :: tmp(:,:,:) + + print*, ' providing int2_grad1_u12_ao_num_1shot & int2_grad1_u12_square_ao_num_1shot ...' + call wall_time(time0) + + PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra + PROVIDE grad1_u12_num grad1_u12_squared_num + + allocate(tmp(n_points_extra_final_grid,ao_num,ao_num)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (j, i, jpoint) & + !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp) + !$OMP DO SCHEDULE (static) + do j = 1, ao_num + do i = 1, ao_num + do jpoint = 1, n_points_extra_final_grid + tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + do m = 1, 3 + !call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, -1.d0 & + ! this work also because of the symmetry in K(1,2) and sign compensation in L(1,2,3) + call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, +1.d0 & + , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_num(1,1,m), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_ao_num_1shot(1,1,1,m), ao_num*ao_num) + enddo + FREE grad1_u12_num + + call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, -0.5d0 & + , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_squared_num(1,1), n_points_extra_final_grid & + , 0.d0, int2_grad1_u12_square_ao_num_1shot(1,1,1), ao_num*ao_num) + FREE grad1_u12_squared_num + + deallocate(tmp) + + call wall_time(time1) + print*, ' wall time for int2_grad1_u12_ao_num_1shot & int2_grad1_u12_square_ao_num_1shot =', time1-time0 + call print_memory_usage() + +END_PROVIDER + +! --- + diff --git a/src/non_h_ints_mu/test_non_h_ints.irp.f b/src/non_h_ints_mu/test_non_h_ints.irp.f index 7d84e73b..84674fa0 100644 --- a/src/non_h_ints_mu/test_non_h_ints.irp.f +++ b/src/non_h_ints_mu/test_non_h_ints.irp.f @@ -618,13 +618,14 @@ subroutine test_int2_grad1_u12_square_ao() I_old = int2_grad1_u12_square_ao_num_1shot(j,i,ipoint) I_new = int2_grad1_u12_square_ao (j,i,ipoint) + !I_new = int2_grad1_u12_square_ao_num (j,i,ipoint) diff = dabs(I_new-I_old) if(diff .gt. thr) then print *, ' problem on:', j, i, ipoint print *, ' old value :', I_old print *, ' new value :', I_new - stop + !stop endif accu += diff @@ -660,13 +661,14 @@ subroutine test_int2_grad1_u12_ao() do m = 1, 3 I_old = int2_grad1_u12_ao_num_1shot(j,i,ipoint,m) I_new = int2_grad1_u12_ao (j,i,ipoint,m) + !I_new = int2_grad1_u12_ao_num (j,i,ipoint,m) diff = dabs(I_new-I_old) if(diff .gt. thr) then print *, ' problem on:', j, i, ipoint, m print *, ' old value :', I_old print *, ' new value :', I_new - stop + !stop endif accu += diff diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index 29c238e1..2d244586 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -274,4 +274,15 @@ doc: size of radial grid over r1 interface: ezfio,provider,ocaml default: 30 +[tc_grid2_a] +type: integer +doc: size of angular grid over r2 +interface: ezfio,provider,ocaml +default: 50 + +[tc_grid2_r] +type: integer +doc: size of radial grid over r2 +interface: ezfio,provider,ocaml +default: 30 From 7076fcd202442a94d37d3c1782a7908a0b637ffd Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Wed, 6 Sep 2023 19:21:14 +0200 Subject: [PATCH 276/337] normal order V2 FROZEN CORE --- src/becke_numerical_grid/grid_becke.irp.f | 3 + .../grid_becke_vector.irp.f | 4 +- src/non_h_ints_mu/tc_integ_an.irp.f | 2 - src/non_h_ints_mu/tc_integ_num.irp.f | 51 +- src/non_h_ints_mu/total_tc_int.irp.f | 8 +- src/tc_bi_ortho/normal_ordered.irp.f | 1594 ++++------------- src/tc_bi_ortho/normal_ordered_old.irp.f | 4 +- src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 63 +- src/tc_keywords/EZFIO.cfg | 8 +- src/tc_scf/tc_scf.irp.f | 7 + 10 files changed, 419 insertions(+), 1325 deletions(-) diff --git a/src/becke_numerical_grid/grid_becke.irp.f b/src/becke_numerical_grid/grid_becke.irp.f index 91fdc563..f72d452d 100644 --- a/src/becke_numerical_grid/grid_becke.irp.f +++ b/src/becke_numerical_grid/grid_becke.irp.f @@ -37,6 +37,9 @@ n_points_integration_angular = my_n_pt_a_grid endif + print*, " n_points_radial_grid = ", n_points_radial_grid + print*, " n_points_integration_angular = ", n_points_integration_angular + END_PROVIDER ! --- diff --git a/src/becke_numerical_grid/grid_becke_vector.irp.f b/src/becke_numerical_grid/grid_becke_vector.irp.f index 0386f3c6..e1147ec7 100644 --- a/src/becke_numerical_grid/grid_becke_vector.irp.f +++ b/src/becke_numerical_grid/grid_becke_vector.irp.f @@ -20,8 +20,8 @@ BEGIN_PROVIDER [integer, n_points_final_grid] enddo enddo - print*,' n_points_final_grid = ', n_points_final_grid - print*,' n max point = ', n_points_integration_angular*(n_points_radial_grid*nucl_num - 1) + !print*,' n_points_final_grid = ', n_points_final_grid + !print*,' n max point = ', n_points_integration_angular*(n_points_radial_grid*nucl_num - 1) call ezfio_set_becke_numerical_grid_n_points_final_grid(n_points_final_grid) END_PROVIDER diff --git a/src/non_h_ints_mu/tc_integ_an.irp.f b/src/non_h_ints_mu/tc_integ_an.irp.f index e2f181a0..ae7af987 100644 --- a/src/non_h_ints_mu/tc_integ_an.irp.f +++ b/src/non_h_ints_mu/tc_integ_an.irp.f @@ -108,7 +108,6 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f PROVIDE int2_grad1_u12_ao_num int2_grad1_u12_ao = int2_grad1_u12_ao_num - FREE int2_grad1_u12_ao_num else @@ -225,7 +224,6 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p PROVIDE int2_grad1_u12_square_ao_num int2_grad1_u12_square_ao = int2_grad1_u12_square_ao_num - FREE int2_grad1_u12_square_ao_num else diff --git a/src/non_h_ints_mu/tc_integ_num.irp.f b/src/non_h_ints_mu/tc_integ_num.irp.f index 707d484c..ee34f531 100644 --- a/src/non_h_ints_mu/tc_integ_num.irp.f +++ b/src/non_h_ints_mu/tc_integ_num.irp.f @@ -15,6 +15,7 @@ integer :: n_blocks, n_rest, n_pass integer :: i_blocks, i_rest, i_pass, ii double precision :: time0, time1 + double precision :: mem, n_double double precision, allocatable :: tmp(:,:,:) double precision, allocatable :: tmp_grad1_u12(:,:,:), tmp_grad1_u12_squared(:,:) @@ -41,31 +42,33 @@ enddo !$OMP END DO !$OMP END PARALLEL - + ! n_points_final_grid = n_blocks * n_pass + n_rest - n_blocks = 4 + call total_memory(mem) + mem = max(1.d0, qp_max_mem - mem) + n_double = mem * 1.d8 + n_blocks = min(n_double / (n_points_extra_final_grid * 4), 1.d0*n_points_final_grid) n_rest = int(mod(n_points_final_grid, n_blocks)) n_pass = int((n_points_final_grid - n_rest) / n_blocks) - - if(n_pass .le. 1) then - print*, ' blocks are to large or grid is very small !' - stop - endif + + call write_int(6, n_pass, 'Number of passes') + call write_int(6, n_blocks, 'Size of the blocks') + call write_int(6, n_rest, 'Size of the last block') + allocate(tmp_grad1_u12_squared(n_points_extra_final_grid,n_blocks)) allocate(tmp_grad1_u12(n_points_extra_final_grid,n_blocks,3)) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i_pass, i_blocks, ipoint, ii, m, tmp_grad1_u12, & - !$OMP tmp_grad1_u12_squared) & - !$OMP SHARED (n_pass, n_blocks, n_points_extra_final_grid, ao_num, & - !$OMP final_grid_points, tmp, int2_grad1_u12_ao_num, & - !$OMP int2_grad1_u12_square_ao_num) - !$OMP DO do i_pass = 1, n_pass ii = (i_pass-1)*n_blocks + 1 + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_blocks, ipoint) & + !$OMP SHARED (n_blocks, n_points_extra_final_grid, ii, & + !$OMP final_grid_points, tmp_grad1_u12, & + !$OMP tmp_grad1_u12_squared) + !$OMP DO do i_blocks = 1, n_blocks ipoint = ii - 1 + i_blocks ! r1 call get_grad1_u12_withsq_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12(1,i_blocks,1) & @@ -73,6 +76,8 @@ , tmp_grad1_u12(1,i_blocks,3) & , tmp_grad1_u12_squared(1,i_blocks)) enddo + !$OMP END DO + !$OMP END PARALLEL do m = 1, 3 call dgemm( "T", "N", ao_num*ao_num, n_blocks, n_points_extra_final_grid, 1.d0 & @@ -83,19 +88,23 @@ , tmp(1,1,1), n_points_extra_final_grid, tmp_grad1_u12_squared(1,1), n_points_extra_final_grid & , 0.d0, int2_grad1_u12_square_ao_num(1,1,ii), ao_num*ao_num) enddo - !$OMP END DO - !$OMP END PARALLEL deallocate(tmp_grad1_u12, tmp_grad1_u12_squared) - ! TODO - ! OPENMP - if(n_rest .ne. 0) then + if(n_rest .gt. 0) then allocate(tmp_grad1_u12_squared(n_points_extra_final_grid,n_rest)) allocate(tmp_grad1_u12(n_points_extra_final_grid,n_rest,3)) ii = n_pass*n_blocks + 1 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i_rest, ipoint) & + !$OMP SHARED (n_rest, n_points_extra_final_grid, ii, & + !$OMP final_grid_points, tmp_grad1_u12, & + !$OMP tmp_grad1_u12_squared) + !$OMP DO do i_rest = 1, n_rest ipoint = ii - 1 + i_rest ! r1 call get_grad1_u12_withsq_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12(1,i_rest,1) & @@ -103,6 +112,8 @@ , tmp_grad1_u12(1,i_rest,3) & , tmp_grad1_u12_squared(1,i_rest)) enddo + !$OMP END DO + !$OMP END PARALLEL do m = 1, 3 call dgemm( "T", "N", ao_num*ao_num, n_rest, n_points_extra_final_grid, 1.d0 & diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f index 254defe1..9c19e0ac 100644 --- a/src/non_h_ints_mu/total_tc_int.irp.f +++ b/src/non_h_ints_mu/total_tc_int.irp.f @@ -55,12 +55,13 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao integer :: i, j, k, l double precision :: wall1, wall0 + PROVIDE j1b_type + print *, ' providing ao_tc_int_chemist ...' call wall_time(wall0) if(test_cycle_tc) then - PROVIDE j1b_type if(j1b_type .ne. 3) then print*, ' TC integrals with cycle can not be used for j1b_type =', j1b_type stop @@ -86,6 +87,11 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao FREE tc_grad_square_ao tc_grad_and_lapl_ao ao_two_e_coul + if(j1b_type .ge. 100) then + FREE int2_grad1_u12_ao_num int2_grad1_u12_square_ao_num + endif + + call wall_time(wall1) print *, ' wall time for ao_tc_int_chemist ', wall1 - wall0 call print_memory_usage() diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index ca5875c9..1a9cddda 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -1,1040 +1,12 @@ ! --- -BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_v0, (mo_num, mo_num, mo_num, mo_num)] - - BEGIN_DOC - ! Normal ordering of the three body interaction on the HF density - END_DOC - - use bitmasks ! you need to include the bitmasks_module.f90 features - - implicit none - - integer :: i, ii, h1, p1, h2, p2, ipoint - integer :: hh1, hh2, pp1, pp2 - integer :: Ne(2) - double precision :: wall0, wall1, walli, wallf - integer, allocatable :: occ(:,:) - integer(bit_kind), allocatable :: key_i_core(:,:) - - print*,' Providing normal_two_body_bi_orth_v0 ...' - call wall_time(walli) - - if(read_tc_norm_ord) then - - open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth_v0', action="read") - read(11) normal_two_body_bi_orth_v0 - close(11) - - else - - double precision, allocatable :: tmp_2d(:,:), tmp_3d(:,:,:) - double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) - double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) - double precision, allocatable :: tmp(:,:,:,:) - - PROVIDE N_int - - allocate( occ(N_int*bit_kind_size,2) ) - allocate( key_i_core(N_int,2) ) - - if(core_tc_op) then - do i = 1, N_int - key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) - key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) - enddo - call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) - else - call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) - endif - - allocate(tmp(mo_num,mo_num,mo_num,mo_num)) - - ! --- - ! aba contraction - - print*,' Providing aba_contraction ...' - call wall_time(wall0) - - tmp = 0.d0 - - allocate(tmp_3d(mo_num,mo_num,mo_num)) - allocate(tmp1(n_points_final_grid,3,mo_num)) - allocate(tmp2(n_points_final_grid,mo_num)) - allocate(tmpval_1(n_points_final_grid)) - allocate(tmpval_2(n_points_final_grid)) - allocate(tmpvec_1(n_points_final_grid,3)) - allocate(tmpvec_2(n_points_final_grid,3)) - allocate(tmp_2d(mo_num,mo_num)) - - ! purely closed shell part - do ii = 1, Ne(2) - i = occ(ii,2) - - ! to avoid tmp(N^4) - do h1 = 1, mo_num - - ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) - !$OMP DO - do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p1, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & - !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) - !$OMP DO - do p1 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(p1,h2,p2) - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - enddo - enddo - enddo - !$OMP END PARALLEL DO - - ! to avoid tmp(N^4) - do p1 = 1, mo_num - - ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1) - !$OMP DO - do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & - - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & - - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & - - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, & - !$OMP mos_r_in_r_array_transp, & - !$OMP tmpval_1, tmp2) - !$OMP DO - do h2 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(h2,p2) - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo - !$OMP END PARALLEL DO - - enddo ! p1 - enddo ! h1 - enddo ! i - - ! purely open-shell part - if(Ne(2) < Ne(1)) then - do ii = Ne(2) + 1, Ne(1) - i = occ(ii,1) - - do h1 = 1, mo_num - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) - !$OMP DO - do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p1, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & - !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmp1) - !$OMP DO - do p1 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(p1,h2,p2) - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - enddo - enddo - enddo - !$OMP END PARALLEL DO - - do p1 = 1, mo_num - - ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1) - !$OMP DO - do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & - - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & - - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & - - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, & - !$OMP mos_r_in_r_array_transp, & - !$OMP tmpval_1, tmp2) - !$OMP DO - do h2 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(h2,p2) - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo - !$OMP END PARALLEL DO - - enddo ! p1 - enddo ! h1 - enddo !i - endif - - deallocate(tmp_3d) - deallocate(tmp1) - deallocate(tmp2) - deallocate(tmpval_1) - deallocate(tmpval_2) - deallocate(tmpvec_1) - deallocate(tmpvec_2) - deallocate(tmp_2d) - - tmp = -0.5d0 * tmp - call sum_A_At(tmp(1,1,1,1), mo_num*mo_num) - - call wall_time(wall1) - print*,' Wall time for aba_contraction', wall1-wall0 - - normal_two_body_bi_orth_v0 = tmp - - ! --- - ! aab contraction - - print*,' Providing aab_contraction ...' - call wall_time(wall0) - - tmp = 0.d0 - - allocate(tmp_2d(mo_num,mo_num)) - allocate(tmp_3d(mo_num,mo_num,mo_num)) - allocate(tmp1(n_points_final_grid,3,mo_num)) - allocate(tmp2(n_points_final_grid,mo_num)) - allocate(tmpval_1(n_points_final_grid)) - allocate(tmpvec_1(n_points_final_grid,3)) - - ! purely closed shell part - do ii = 1, Ne(2) - i = occ(ii,2) - - ! to avoid tmp(N^4) - do h1 = 1, mo_num - - ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1, tmpvec_1) - !$OMP DO - do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p1, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & - !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & - !$OMP tmpval_1, tmpvec_1, tmp1) - !$OMP DO - do p1 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(p1,h2,p2) - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - enddo - enddo - enddo - !$OMP END PARALLEL DO - - ! to avoid tmp(N^4) - do p1 = 1, mo_num - - ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1) - !$OMP DO - do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, & - !$OMP mos_r_in_r_array_transp, & - !$OMP tmpval_1, tmp2) - !$OMP DO - do h2 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(h2,p2) - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo - !$OMP END PARALLEL DO - - enddo ! p1 - enddo ! h1 - enddo ! i - - deallocate(tmp_2d) - deallocate(tmp_3d) - deallocate(tmp1) - deallocate(tmp2) - deallocate(tmpval_1) - deallocate(tmpvec_1) - - tmp = -0.5d0 * tmp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h1, h2, p1, p2) & - !$OMP SHARED (tmp, mo_num) - - !$OMP DO - do h1 = 1, mo_num - do h2 = 1, mo_num - do p1 = 1, mo_num - do p2 = p1, mo_num - tmp(p2,h2,p1,h1) -= tmp(p1,h2,p2,h1) - enddo - enddo - enddo - enddo - !$OMP END DO - - !$OMP DO - do h1 = 1, mo_num - do h2 = 1, mo_num - do p1 = 2, mo_num - do p2 = 1, p1-1 - tmp(p2,h2,p1,h1) = -tmp(p1,h2,p2,h1) - enddo - enddo - enddo - enddo - !$OMP END DO - - !$OMP DO - do h1 = 1, mo_num-1 - do h2 = h1+1, mo_num - do p1 = 2, mo_num - do p2 = 1, p1-1 - tmp(p2,h2,p1,h1) *= -1.d0 - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print*,' Wall time for aab_contraction', wall1-wall0 - - normal_two_body_bi_orth_v0 += tmp - - ! --- - ! aaa contraction - - if(Ne(2) .ge. 3) then - - print*,' Providing aaa_contraction ...' - call wall_time(wall0) - - tmp = 0.d0 - - allocate(tmp_2d(mo_num,mo_num)) - allocate(tmp_3d(mo_num,mo_num,mo_num)) - allocate(tmp1(n_points_final_grid,3,mo_num)) - allocate(tmp2(n_points_final_grid,mo_num)) - allocate(tmp3(n_points_final_grid,3,mo_num)) - allocate(tmpval_1(n_points_final_grid)) - allocate(tmpval_2(n_points_final_grid)) - allocate(tmpvec_1(n_points_final_grid,3)) - allocate(tmpvec_2(n_points_final_grid,3)) - allocate(tmpvec_3(n_points_final_grid,3)) - - ! purely closed shell part - do ii = 1, Ne(2) - i = occ(ii,2) - - ! to avoid tmp(N^4) - do h1 = 1, mo_num - - ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) - !$OMP DO - do ipoint = 1, n_points_final_grid - - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) - - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) - - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p1, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & - !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & - !$OMP tmpval_1, tmpvec_1, tmp1) - !$OMP DO - do p1 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(p1,h2,p2) - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - enddo - enddo - enddo - !$OMP END PARALLEL DO - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & - !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & - !$OMP tmpval_2, tmpvec_2, tmp1) - !$OMP DO - do p2 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) - tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) - tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp1(1,1,1), 3*n_points_final_grid & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(p1,h2,p2) - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) - enddo - enddo - enddo - !$OMP END PARALLEL DO - - ! to avoid tmp(N^4) - do p1 = 1, mo_num - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) - !$OMP DO - do ipoint = 1, n_points_final_grid - - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & - ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) - - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) - - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - - tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) - tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) - tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, i, & - !$OMP mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, & - !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) - !$OMP DO - do h2 = 1, mo_num - do ipoint = 1, n_points_final_grid - - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & - + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & - + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) - - tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) - tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) - tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) - - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(h2,p2) - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo - !$OMP END PARALLEL DO - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & - !$OMP int2_grad1_u12_bimo_t, & - !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) - !$OMP DO - do p2 = 1, mo_num - do ipoint = 1, n_points_final_grid - - tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & - + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & - + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) - - tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) - tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) - tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , tmp2(1,1), n_points_final_grid & - , mos_r_in_r_array_transp(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) - - call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , tmp3(1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 1.d0, tmp_2d(1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(h2,p2) - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo - !$OMP END PARALLEL DO - - enddo ! p1 - enddo ! h1 - enddo ! i - - ! purely open-shell part - if(Ne(2) < Ne(1)) then - - do ii = Ne(2) + 1, Ne(1) - i = occ(ii,1) - - ! to avoid tmp(N^4) - do h1 = 1, mo_num - - ! to minimize the number of operations - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2 ) - !$OMP DO - do ipoint = 1, n_points_final_grid - - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) - - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) - - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p1, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & - !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & - !$OMP tmpval_1, tmpvec_1, tmp1) - !$OMP DO - do p1 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) - - !$OMP PARALLEL DO PRIVATE(p1,h2,p2) - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - enddo - enddo - enddo - !$OMP END PARALLEL DO - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, h1, i, & - !$OMP mos_l_in_r_array_transp, int2_grad1_u12_bimo_t, & - !$OMP tmpval_2, tmpvec_2, tmp1) - !$OMP DO - do p2 = 1, mo_num - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) - tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) - tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & - , tmp1(1,1,1), 3*n_points_final_grid & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(p1,h2,p2) - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) - enddo - enddo - enddo - !$OMP END PARALLEL DO - - ! to avoid tmp(N^4) - do p1 = 1, mo_num - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint) & - !$OMP SHARED (n_points_final_grid, i, h1, p1, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) - !$OMP DO - do ipoint = 1, n_points_final_grid - - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & - ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) - - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) - - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - - tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) - tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) - tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) - enddo - !$OMP END DO - !$OMP END PARALLEL - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, i, & - !$OMP mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, & - !$OMP tmp1, tmp2, tmpval_1, tmpval_2, tmpvec_1) - !$OMP DO - do h2 = 1, mo_num - do ipoint = 1, n_points_final_grid - - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & - + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & - + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) - - tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) - tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) - tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) - - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(h2,p2) - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo - !$OMP END PARALLEL DO - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p2, ipoint) & - !$OMP SHARED (mo_num, n_points_final_grid, i, h1, & - !$OMP int2_grad1_u12_bimo_t, & - !$OMP tmpvec_2, tmpvec_3, tmp2, tmp3) - !$OMP DO - do p2 = 1, mo_num - do ipoint = 1, n_points_final_grid - - tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & - + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & - + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) - - tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) - tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) - tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & - , tmp2(1,1), n_points_final_grid & - , mos_r_in_r_array_transp(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) - - call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & - , tmp3(1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 1.d0, tmp_2d(1,1), mo_num) - - !$OMP PARALLEL DO PRIVATE(h2,p2) - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo - !$OMP END PARALLEL DO - - enddo ! p1 - enddo ! h1 - enddo !i - endif - - deallocate(tmp_2d) - deallocate(tmp_3d) - deallocate(tmp1) - deallocate(tmp2) - deallocate(tmp3) - deallocate(tmpval_1) - deallocate(tmpval_2) - deallocate(tmpvec_1) - deallocate(tmpvec_2) - deallocate(tmpvec_3) - - tmp = -0.5d0 * tmp - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (h1, h2, p1, p2) & - !$OMP SHARED (tmp, mo_num) - - !$OMP DO - do h1 = 1, mo_num - do h2 = 1, mo_num - do p1 = 1, mo_num - do p2 = p1, mo_num - tmp(p2,h2,p1,h1) -= tmp(p1,h2,p2,h1) - enddo - enddo - enddo - enddo - !$OMP END DO - - !$OMP DO - do h1 = 1, mo_num - do h2 = 1, mo_num - do p1 = 2, mo_num - do p2 = 1, p1-1 - tmp(p2,h2,p1,h1) = -tmp(p1,h2,p2,h1) - enddo - enddo - enddo - enddo - !$OMP END DO - - !$OMP DO - do h1 = 1, mo_num-1 - do h2 = h1+1, mo_num - do p1 = 2, mo_num - do p2 = 1, p1-1 - tmp(p2,h2,p1,h1) *= -1.d0 - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - call wall_time(wall1) - print*,' Wall time for aaa_contraction', wall1-wall0 - - normal_two_body_bi_orth_v0 += tmp - endif ! Ne(2) .ge. 3 - - deallocate(tmp) - - endif ! read_tc_norm_ord - - if(write_tc_norm_ord.and.mpi_master) then - open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth_v0', action="write") - call ezfio_set_work_empty(.False.) - write(11) normal_two_body_bi_orth_v0 - close(11) - call ezfio_set_tc_keywords_io_tc_integ('Read') - endif - - call wall_time(wallf) - print*,' Wall time for normal_two_body_bi_orth_v0 ', wallf-walli - -END_PROVIDER - -! --- - BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC + ! ! Normal ordering of the three body interaction on the HF density + ! END_DOC use bitmasks ! you need to include the bitmasks_module.f90 features @@ -1063,7 +35,45 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) double precision, allocatable :: tmp(:,:,:,:) + double precision, allocatable :: int2_grad1_u12_bimo_t_tmp(:,:,:,:), mos_l_in_r_array_transp_tmp(:,:), mos_r_in_r_array_transp_tmp(:,:) + PROVIDE int2_grad1_u12_bimo_t + PROVIDE mos_l_in_r_array_transp mos_r_in_r_array_transp + + allocate(int2_grad1_u12_bimo_t_tmp(n_points_final_grid,3,mo_num,mo_num)) + allocate(mos_l_in_r_array_transp_tmp(n_points_final_grid,mo_num)) + allocate(mos_r_in_r_array_transp_tmp(n_points_final_grid,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, p1) & + !$OMP SHARED (mo_num, mo_class, & + !$OMP int2_grad1_u12_bimo_t, int2_grad1_u12_bimo_t_tmp, & + !$OMP mos_l_in_r_array_transp, mos_l_in_r_array_transp_tmp, & + !$OMP mos_r_in_r_array_transp, mos_r_in_r_array_transp_tmp) + !$OMP DO + do h1 = 1, mo_num + + mos_l_in_r_array_transp_tmp(:,h1) = 0.d0 + mos_r_in_r_array_transp_tmp(:,h1) = 0.d0 + + if(mo_class(h1) .ne. "Active") cycle + + mos_l_in_r_array_transp_tmp(:,h1) = mos_l_in_r_array_transp(:,h1) + mos_r_in_r_array_transp_tmp(:,h1) = mos_r_in_r_array_transp(:,h1) + + do p1 = 1, mo_num + int2_grad1_u12_bimo_t_tmp(:,:,p1,h1) = 0.d0 + if(mo_class(p1) .ne. "Active") cycle + + int2_grad1_u12_bimo_t_tmp(:,:,p1,h1) = int2_grad1_u12_bimo_t(:,:,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + + PROVIDE mo_class PROVIDE N_int allocate( occ(N_int*bit_kind_size,2) ) @@ -1087,16 +97,17 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ print*,' Providing aba_contraction ...' call wall_time(wall0) - tmp = 0.d0 + call set_multiple_levels_omp(.false.) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, h1, p1, h2, p2, i, ii, & - !$OMP tmp_3d, tmp_2d, tmp1, tmp2, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) & - !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, h1, p1, h2, p2, i, ii, & + !$OMP tmp_3d, tmp_2d, tmp1, tmp2, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, mo_class, & + !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & + !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & !$OMP tmp) allocate(tmp_3d(mo_num,mo_num,mo_num), tmp_2d(mo_num,mo_num)) @@ -1115,82 +126,87 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ !$OMP DO - do ii = 1, Ne(2) - i = occ(ii,2) + do h1 = 1, mo_num + tmp(:,:,:,h1) = 0.d0 + if(mo_class(h1) .ne. "Active") cycle - do h1 = 1, mo_num + do ii = 1, Ne(2) + i = occ(ii,2) do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) enddo do p1 = 1, mo_num + tmp1(:,:,p1) = 0.d0 + if(mo_class(p1) .ne. "Active") cycle + do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) enddo enddo call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & , tmp1(1,1,1), 3*n_points_final_grid & , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - !$OMP CRITICAL tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - !$OMP END CRITICAL enddo enddo enddo do p1 = 1, mo_num + if(mo_class(p1) .ne. "Active") cycle do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & - ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & - - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & - - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & - - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) ) enddo do h2 = 1, mo_num + tmp2(:,h2) = 0.d0 + if(mo_class(h2) .ne. "Active") cycle + do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) enddo enddo - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & , 0.d0, tmp_2d(1,1), mo_num) do h2 = 1, mo_num do p2 = 1, mo_num - !$OMP CRITICAL tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - !$OMP END CRITICAL enddo enddo enddo ! p1 - enddo ! h1 - enddo ! i + enddo ! i + enddo ! h1 !$OMP END DO @@ -1205,14 +221,17 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ ! purely open-shell part if(Ne(2) < Ne(1)) then - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, h1, p1, h2, p2, i, ii, & - !$OMP tmp_3d, tmp_2d, tmp1, tmp2, & - !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) & - !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + call set_multiple_levels_omp(.false.) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, h1, p1, h2, p2, i, ii, & + !$OMP tmp_3d, tmp_2d, tmp1, tmp2, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, mo_class, & + !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & + !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & !$OMP tmp) Allocate(tmp_3d(mo_num,mo_num,mo_num), tmp_2d(mo_num,mo_num)) @@ -1231,82 +250,86 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ !$OMP DO - do ii = Ne(2) + 1, Ne(1) - i = occ(ii,1) + do h1 = 1, mo_num + if(mo_class(h1) .ne. "Active") cycle - do h1 = 1, mo_num + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint, i) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i, i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint, i) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint, i) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint, i) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) enddo do p1 = 1, mo_num + tmp1(:,:,p1) = 0.d0 + if(mo_class(p1) .ne. "Active") cycle + do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) enddo enddo call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & , tmp1(1,1,1), 3*n_points_final_grid & , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - !$OMP CRITICAL tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - !$OMP END CRITICAL enddo enddo enddo do p1 = 1, mo_num + if(mo_class(p1) .ne. "Active") cycle do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & - ( int2_grad1_u12_bimo_t(ipoint,1, i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2, i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3, i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & - - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1, i,h1) & - - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2, i,h1) & - - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3, i,h1) ) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) ) enddo do h2 = 1, mo_num + tmp2(:,h2) = 0.d0 + if(mo_class(h2) .ne. "Active") cycle + do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) enddo enddo - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & , 0.d0, tmp_2d(1,1), mo_num) do h2 = 1, mo_num do p2 = 1, mo_num - !$OMP CRITICAL tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - !$OMP END CRITICAL enddo enddo enddo ! p1 - enddo ! h1 - enddo !i + enddo ! i + enddo ! h1 !$OMP END DO deallocate(tmp_3d, tmp_2d) @@ -1331,16 +354,17 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ print*,' Providing aab_contraction ...' call wall_time(wall0) - tmp = 0.d0 + call set_multiple_levels_omp(.false.) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, ii, i, h1, p1, h2, p2, & - !$OMP tmp_2d, tmp_3d, tmp1, tmp2, & - !$OMP tmpval_1, tmpvec_1) & - !$OMP SHARED (n_points_final_grid, mo_num, Ne, occ, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, ii, i, h1, p1, h2, p2, & + !$OMP tmp_2d, tmp_3d, tmp1, tmp2, & + !$OMP tmpval_1, tmpvec_1) & + !$OMP SHARED (n_points_final_grid, mo_num, Ne, occ, mo_class, & + !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & + !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & !$OMP tmp) allocate(tmp_2d(mo_num,mo_num)) @@ -1359,71 +383,76 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ !$OMP DO - do ii = 1, Ne(2) - i = occ(ii,2) + do h1 = 1, mo_num + tmp(:,:,:,h1) = 0.d0 + if(mo_class(h1) .ne. "Active") cycle - do h1 = 1, mo_num + do ii = 1, Ne(2) + i = occ(ii,2) do ipoint = 1, n_points_final_grid tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) enddo do p1 = 1, mo_num + tmp1(:,:,p1) = 0.d0 + if(mo_class(p1) .ne. "Active") cycle + do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) enddo enddo call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & , tmp1(1,1,1), 3*n_points_final_grid & , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - !$OMP CRITICAL tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - !$OMP END CRITICAL enddo enddo enddo do p1 = 1, mo_num + if(mo_class(p1) .ne. "Active") cycle do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) ) enddo do h2 = 1, mo_num + if(mo_class(h2) .ne. "Active") cycle + tmp2(:,h2) = 0.d0 + do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) + tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) enddo enddo - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & , 0.d0, tmp_2d(1,1), mo_num) do h2 = 1, mo_num do p2 = 1, mo_num - !$OMP CRITICAL tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - !$OMP END CRITICAL enddo enddo enddo ! p1 - enddo ! h1 - enddo ! i + enddo ! i + enddo ! h1 !$OMP END DO @@ -1441,7 +470,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ !$OMP PRIVATE (h1, h2, p1, p2) & !$OMP SHARED (tmp, mo_num) - !$OMP DO + !$OMP DO COLLAPSE(2) do h1 = 1, mo_num do h2 = 1, mo_num do p1 = 1, mo_num @@ -1453,7 +482,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ enddo !$OMP END DO - !$OMP DO + !$OMP DO COLLAPSE(2) do h1 = 1, mo_num do h2 = 1, mo_num do p1 = 2, mo_num @@ -1465,7 +494,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ enddo !$OMP END DO - !$OMP DO + !$OMP DO do h1 = 1, mo_num-1 do h2 = h1+1, mo_num do p1 = 2, mo_num @@ -1491,17 +520,18 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ print*,' Providing aaa_contraction ...' call wall_time(wall0) - tmp = 0.d0 + call set_multiple_levels_omp(.false.) - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, ii, h1, h2, p1, p2, & - !$OMP tmp_2d, tmp_3d, tmp1, tmp2, tmp3, & - !$OMP tmpval_1, tmpval_2, & - !$OMP tmpvec_1, tmpvec_2, tmpvec_3) & - !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, ii, h1, h2, p1, p2, & + !$OMP tmp_2d, tmp_3d, tmp1, tmp2, tmp3, & + !$OMP tmpval_1, tmpval_2, & + !$OMP tmpvec_1, tmpvec_2, tmpvec_3) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, mo_class, & + !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & + !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & !$OMP tmp) allocate(tmp_2d(mo_num,mo_num)) @@ -1527,20 +557,23 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ tmpvec_3 = 0.d0 !$OMP DO - do ii = 1, Ne(2) - i = occ(ii,2) - do h1 = 1, mo_num + do h1 = 1, mo_num + tmp(:,:,:,h1) = 0.d0 + if(mo_class(h1) .ne. "Active") cycle + + do ii = 1, Ne(2) + i = occ(ii,2) do ipoint = 1, n_points_final_grid tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) @@ -1548,69 +581,72 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ enddo do p1 = 1, mo_num + tmp1(:,:,p1) = 0.d0 + if(mo_class(p1) .ne. "Active") cycle + do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) enddo enddo call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & , tmp1(1,1,1), 3*n_points_final_grid & , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - !$OMP CRITICAL tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - !$OMP END CRITICAL enddo enddo enddo do p2 = 1, mo_num + tmp1(:,:,p2) = 0.d0 + if(mo_class(p2) .ne. "Active") cycle + do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) - tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) - tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,3) enddo enddo call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & , tmp1(1,1,1), 3*n_points_final_grid & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & , 0.d0, tmp_3d(1,1,1), mo_num) do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - !$OMP CRITICAL tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) - !$OMP END CRITICAL enddo enddo enddo do p1 = 1, mo_num + if(mo_class(p1) .ne. "Active") cycle do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & - ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) ) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp_tmp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) @@ -1618,9 +654,13 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ enddo do h2 = 1, mo_num + tmp2( :,h2) = 0.d0 + tmp1(:,:,h2) = 0.d0 + if(mo_class(h2) .ne. "Active") cycle + do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) & + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) @@ -1632,35 +672,37 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ enddo enddo - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & , 0.d0, tmp_2d(1,1), mo_num) do h2 = 1, mo_num do p2 = 1, mo_num - !$OMP CRITICAL tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - !$OMP END CRITICAL enddo enddo do p2 = 1, mo_num + tmp2( :,p2) = 0.d0 + tmp3(:,:,p2) = 0.d0 + if(mo_class(p2) .ne. "Active") cycle + do ipoint = 1, n_points_final_grid - tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & - + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & - + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t_tmp(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t_tmp(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t_tmp(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) - tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) - tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) - tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,3,p2,h1) enddo enddo - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , tmp2(1,1), n_points_final_grid & - , mos_r_in_r_array_transp(1,1), n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp_tmp(1,1), n_points_final_grid & , 0.d0, tmp_2d(1,1), mo_num) call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & @@ -1670,15 +712,13 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ do h2 = 1, mo_num do p2 = 1, mo_num - !$OMP CRITICAL tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - !$OMP END CRITICAL enddo enddo enddo ! p1 - enddo ! h1 - enddo ! i + enddo ! i + enddo ! h1 !$OMP END DO deallocate(tmp_2d) @@ -1697,15 +737,16 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ ! purely open-shell part if(Ne(2) < Ne(1)) then - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, ii, h1, h2, p1, p2, & - !$OMP tmp_2d, tmp_3d, tmp1, tmp2, tmp3, & - !$OMP tmpval_1, tmpval_2, & - !$OMP tmpvec_1, tmpvec_2, tmpvec_3) & - !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + call set_multiple_levels_omp(.false.) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, ii, h1, h2, p1, p2, tmp_2d, tmp_3d, tmp1, tmp2, tmp3, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, mo_class, & + !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & + !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & !$OMP tmp) allocate(tmp_2d(mo_num,mo_num)) @@ -1732,20 +773,21 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ !$OMP DO - do ii = Ne(2) + 1, Ne(1) - i = occ(ii,1) + do h1 = 1, mo_num + if(mo_class(h1) .ne. "Active") cycle - do h1 = 1, mo_num + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) do ipoint = 1, n_points_final_grid tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) @@ -1753,69 +795,72 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ enddo do p1 = 1, mo_num + tmp1(:,:,p1) = 0.d0 + if(mo_class(p1) .ne. "Active") cycle + do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) enddo enddo call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & , tmp1(1,1,1), 3*n_points_final_grid & , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - !$OMP CRITICAL tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - !$OMP END CRITICAL enddo enddo enddo do p2 = 1, mo_num + tmp1(:,:,p2) = 0.d0 + if(mo_class(p2) .ne. "Active") cycle + do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) - tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) - tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,3) enddo enddo call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & , tmp1(1,1,1), 3*n_points_final_grid & - , int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & , 0.d0, tmp_3d(1,1,1), mo_num) do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num - !$OMP CRITICAL tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) - !$OMP END CRITICAL enddo enddo enddo do p1 = 1, mo_num + if(mo_class(p1) .ne. "Active") cycle do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & - ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) ) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp_tmp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) @@ -1823,9 +868,13 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ enddo do h2 = 1, mo_num + tmp2( :,h2) = 0.d0 + tmp1(:,:,h2) = 0.d0 + if(mo_class(h2) .ne. "Active") cycle + do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) & + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) @@ -1837,35 +886,37 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ enddo enddo - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & - , mos_l_in_r_array_transp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & , 0.d0, tmp_2d(1,1), mo_num) do h2 = 1, mo_num do p2 = 1, mo_num - !$OMP CRITICAL tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - !$OMP END CRITICAL enddo enddo do p2 = 1, mo_num + tmp2( :,p2) = 0.d0 + tmp3(:,:,p2) = 0.d0 + if(mo_class(p2) .ne. "Active") cycle + do ipoint = 1, n_points_final_grid - tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & - + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & - + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t_tmp(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t_tmp(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t_tmp(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) - tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,h1) - tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t(ipoint,2,p2,h1) - tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t(ipoint,3,p2,h1) + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,3,p2,h1) enddo enddo - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & - , tmp2(1,1), n_points_final_grid & - , mos_r_in_r_array_transp(1,1), n_points_final_grid & + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp_tmp(1,1), n_points_final_grid & , 0.d0, tmp_2d(1,1), mo_num) call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & @@ -1875,15 +926,13 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ do h2 = 1, mo_num do p2 = 1, mo_num - !$OMP CRITICAL tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - !$OMP END CRITICAL enddo enddo enddo ! p1 - enddo ! h1 - enddo !i + enddo ! i + enddo ! h1 !$OMP END DO deallocate(tmp_2d) @@ -1907,7 +956,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ !$OMP PRIVATE (h1, h2, p1, p2) & !$OMP SHARED (tmp, mo_num) - !$OMP DO + !$OMP DO COLLAPSE(2) do h1 = 1, mo_num do h2 = 1, mo_num do p1 = 1, mo_num @@ -1919,7 +968,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ enddo !$OMP END DO - !$OMP DO + !$OMP DO COLLAPSE(2) do h1 = 1, mo_num do h2 = 1, mo_num do p1 = 2, mo_num @@ -1931,7 +980,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ enddo !$OMP END DO - !$OMP DO + !$OMP DO do h1 = 1, mo_num-1 do h2 = h1+1, mo_num do p1 = 2, mo_num @@ -1951,6 +1000,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ endif ! Ne(2) .ge. 3 deallocate(tmp) + deallocate(int2_grad1_u12_bimo_t_tmp, mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp) endif ! read_tc_norm_ord diff --git a/src/tc_bi_ortho/normal_ordered_old.irp.f b/src/tc_bi_ortho/normal_ordered_old.irp.f index 417580dd..e5e63355 100644 --- a/src/tc_bi_ortho/normal_ordered_old.irp.f +++ b/src/tc_bi_ortho/normal_ordered_old.irp.f @@ -26,7 +26,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_old, (mo_num, mo_num, if(read_tc_norm_ord) then - open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth_old', action="read") + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="read") read(11) normal_two_body_bi_orth_old close(11) @@ -103,7 +103,7 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_old, (mo_num, mo_num, endif if(write_tc_norm_ord.and.mpi_master) then - open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth_old', action="write") + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="write") call ezfio_set_work_empty(.False.) write(11) normal_two_body_bi_orth_old close(11) diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index b6beb65b..3301fcc2 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -11,14 +11,17 @@ program tc_bi_ortho print *, 'Hello world' - my_grid_becke = .True. + my_grid_becke = .True. PROVIDE tc_grid1_a tc_grid1_r my_n_pt_r_grid = tc_grid1_r my_n_pt_a_grid = tc_grid1_a touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid - read_wf = .True. - touch read_wf + call write_int(6, my_n_pt_r_grid, 'radial external grid over') + call write_int(6, my_n_pt_a_grid, 'angular external grid over') + +! read_wf = .True. +! touch read_wf ! call test_h_u0 ! call test_slater_tc_opt @@ -27,10 +30,12 @@ program tc_bi_ortho ! call timing_single ! call timing_double - call test_no() !call test_no_aba() !call test_no_aab() !call test_no_aaa() + + call test_no() + end subroutine test_h_u0 @@ -272,9 +277,9 @@ subroutine test_no() implicit none integer :: i, j, k, l - double precision :: accu, contrib, new, ref, thr + double precision :: accu, contrib, new, ref, thr, norm - print*, ' testing normal_two_body_bi_orth ...' + print*, ' test_no ...' thr = 1d-8 @@ -282,6 +287,7 @@ subroutine test_no() PROVIDE normal_two_body_bi_orth accu = 0.d0 + norm = 0.d0 do i = 1, mo_num do j = 1, mo_num do k = 1, mo_num @@ -289,8 +295,8 @@ subroutine test_no() new = normal_two_body_bi_orth (l,k,j,i) ref = normal_two_body_bi_orth_old(l,k,j,i) + contrib = dabs(new - ref) - accu += contrib if(contrib .gt. thr) then print*, ' problem on normal_two_body_bi_orth' print*, l, k, j, i @@ -298,14 +304,17 @@ subroutine test_no() stop endif + accu += contrib + norm += dabs(ref) enddo enddo enddo enddo - print*, ' accu on normal_two_body_bi_orth = ', accu / dble(mo_num)**4 - return -end + print*, ' accu (%) = ', 100.d0*accu/norm + + return +end subroutine test_no ! --- @@ -313,7 +322,7 @@ subroutine test_no_aba() implicit none integer :: i, j, k, l - double precision :: accu, contrib, new, ref, thr + double precision :: accu, contrib, new, ref, thr, norm print*, ' testing no_aba_contraction ...' @@ -323,6 +332,7 @@ subroutine test_no_aba() PROVIDE no_aba_contraction accu = 0.d0 + norm = 0.d0 do i = 1, mo_num do j = 1, mo_num do k = 1, mo_num @@ -331,7 +341,6 @@ subroutine test_no_aba() new = no_aba_contraction (l,k,j,i) ref = no_aba_contraction_v0(l,k,j,i) contrib = dabs(new - ref) - accu += contrib if(contrib .gt. thr) then print*, ' problem on no_aba_contraction' print*, l, k, j, i @@ -339,13 +348,16 @@ subroutine test_no_aba() stop endif + accu += contrib + norm += dabs(ref) enddo enddo enddo enddo - print*, ' accu on no_aba_contraction = ', accu / dble(mo_num)**4 - return + print*, ' accu (%) = ', 100.d0*accu/norm + + return end ! --- @@ -355,7 +367,7 @@ subroutine test_no_aab() implicit none integer :: i, j, k, l - double precision :: accu, contrib, new, ref, thr + double precision :: accu, contrib, new, ref, thr, norm print*, ' testing no_aab_contraction ...' @@ -365,6 +377,7 @@ subroutine test_no_aab() PROVIDE no_aab_contraction accu = 0.d0 + norm = 0.d0 do i = 1, mo_num do j = 1, mo_num do k = 1, mo_num @@ -373,7 +386,6 @@ subroutine test_no_aab() new = no_aab_contraction (l,k,j,i) ref = no_aab_contraction_v0(l,k,j,i) contrib = dabs(new - ref) - accu += contrib if(contrib .gt. thr) then print*, ' problem on no_aab_contraction' print*, l, k, j, i @@ -381,13 +393,16 @@ subroutine test_no_aab() stop endif + accu += contrib + norm += dabs(ref) enddo enddo enddo enddo - print*, ' accu on no_aab_contraction = ', accu / dble(mo_num)**4 - return + print*, ' accu (%) = ', 100.d0*accu/norm + + return end ! --- @@ -396,7 +411,7 @@ subroutine test_no_aaa() implicit none integer :: i, j, k, l - double precision :: accu, contrib, new, ref, thr + double precision :: accu, contrib, new, ref, thr, norm print*, ' testing no_aaa_contraction ...' @@ -406,6 +421,7 @@ subroutine test_no_aaa() PROVIDE no_aaa_contraction accu = 0.d0 + norm = 0.d0 do i = 1, mo_num do j = 1, mo_num do k = 1, mo_num @@ -414,7 +430,6 @@ subroutine test_no_aaa() new = no_aaa_contraction (l,k,j,i) ref = no_aaa_contraction_v0(l,k,j,i) contrib = dabs(new - ref) - accu += contrib if(contrib .gt. thr) then print*, ' problem on no_aaa_contraction' print*, l, k, j, i @@ -422,13 +437,17 @@ subroutine test_no_aaa() stop endif + accu += contrib + norm += dabs(ref) enddo enddo enddo enddo - print*, ' accu on no_aaa_contraction = ', accu / dble(mo_num)**4 - return + print*, ' accu (%) = ', 100.d0*accu/norm + + return end ! --- + diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index 2d244586..a70ccc63 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -110,13 +110,13 @@ default: False type: Threshold doc: Threshold on the convergence of the Hartree Fock energy. interface: ezfio,provider,ocaml -default: 1.e-10 +default: 1.e-8 [n_it_tcscf_max] type: Strictly_positive_int doc: Maximum number of SCF iterations interface: ezfio,provider,ocaml -default: 100 +default: 50 [selection_tc] type: integer @@ -278,11 +278,11 @@ default: 30 type: integer doc: size of angular grid over r2 interface: ezfio,provider,ocaml -default: 50 +default: 194 [tc_grid2_r] type: integer doc: size of radial grid over r2 interface: ezfio,provider,ocaml -default: 30 +default: 50 diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f index 9bcbb13b..22f66484 100644 --- a/src/tc_scf/tc_scf.irp.f +++ b/src/tc_scf/tc_scf.irp.f @@ -18,6 +18,10 @@ program tc_scf my_n_pt_a_grid = tc_grid1_a touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + call write_int(6, my_n_pt_r_grid, 'radial external grid over') + call write_int(6, my_n_pt_a_grid, 'angular external grid over') + + PROVIDE mu_erf print *, ' mu = ', mu_erf PROVIDE j1b_type @@ -30,6 +34,9 @@ program tc_scf my_n_pt_r_extra_grid = tc_grid2_r my_n_pt_a_extra_grid = tc_grid2_a touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + + call write_int(6, my_n_pt_r_extra_grid, 'radial internal grid over') + call write_int(6, my_n_pt_a_extra_grid, 'angular internal grid over') endif !call create_guess() From 8739c26509e07efa0586dc543f61f3920bd2e28a Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Wed, 6 Sep 2023 21:03:22 +0200 Subject: [PATCH 277/337] OPTIMZATIONS IN 4-ind integ --- src/bi_ort_ints/bi_ort_ints.irp.f | 35 +++-- src/bi_ort_ints/three_body_ijmk.irp.f | 183 +++++++++++++------------- 2 files changed, 116 insertions(+), 102 deletions(-) diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index cac46b18..0349c731 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -18,10 +18,11 @@ program bi_ort_ints ! call test_5idx ! call test_5idx2 call test_4idx() - call test_4idx_n4() + !call test_4idx_n4() !call test_4idx2() !call test_5idx2 !call test_5idx + end subroutine test_5idx2 @@ -340,7 +341,7 @@ subroutine test_4idx() implicit none integer :: i, j, k, l - double precision :: accu, contrib, new, ref, thr + double precision :: accu, contrib, new, ref, thr, norm thr = 1d-10 @@ -348,6 +349,7 @@ subroutine test_4idx() PROVIDE three_e_4_idx_direct_bi_ort accu = 0.d0 + norm = 0.d0 do i = 1, mo_num do j = 1, mo_num do k = 1, mo_num @@ -356,7 +358,6 @@ subroutine test_4idx() new = three_e_4_idx_direct_bi_ort (l,k,j,i) ref = three_e_4_idx_direct_bi_ort_old(l,k,j,i) contrib = dabs(new - ref) - accu += contrib if(contrib .gt. thr) then print*, ' problem in three_e_4_idx_direct_bi_ort' print*, l, k, j, i @@ -364,11 +365,14 @@ subroutine test_4idx() stop endif + accu += contrib + norm += dabs(ref) enddo enddo enddo enddo - print*, ' accu on three_e_4_idx_direct_bi_ort = ', accu / dble(mo_num)**4 + + print*, ' accu on three_e_4_idx_direct_bi_ort (%) = ', 100.d0 * accu / norm ! --- @@ -376,6 +380,7 @@ subroutine test_4idx() PROVIDE three_e_4_idx_exch13_bi_ort accu = 0.d0 + norm = 0.d0 do i = 1, mo_num do j = 1, mo_num do k = 1, mo_num @@ -384,7 +389,6 @@ subroutine test_4idx() new = three_e_4_idx_exch13_bi_ort (l,k,j,i) ref = three_e_4_idx_exch13_bi_ort_old(l,k,j,i) contrib = dabs(new - ref) - accu += contrib if(contrib .gt. thr) then print*, ' problem in three_e_4_idx_exch13_bi_ort' print*, l, k, j, i @@ -392,11 +396,14 @@ subroutine test_4idx() stop endif + accu += contrib + norm += dabs(ref) enddo enddo enddo enddo - print*, ' accu on three_e_4_idx_exch13_bi_ort = ', accu / dble(mo_num)**4 + + print*, ' accu on three_e_4_idx_exch13_bi_ort (%) = ', 100.d0 * accu / norm ! --- @@ -404,6 +411,7 @@ subroutine test_4idx() PROVIDE three_e_4_idx_cycle_1_bi_ort accu = 0.d0 + norm = 0.d0 do i = 1, mo_num do j = 1, mo_num do k = 1, mo_num @@ -412,7 +420,6 @@ subroutine test_4idx() new = three_e_4_idx_cycle_1_bi_ort (l,k,j,i) ref = three_e_4_idx_cycle_1_bi_ort_old(l,k,j,i) contrib = dabs(new - ref) - accu += contrib if(contrib .gt. thr) then print*, ' problem in three_e_4_idx_cycle_1_bi_ort' print*, l, k, j, i @@ -420,11 +427,14 @@ subroutine test_4idx() stop endif + accu += contrib + norm += dabs(ref) enddo enddo enddo enddo - print*, ' accu on three_e_4_idx_cycle_1_bi_ort = ', accu / dble(mo_num)**4 + + print*, ' accu on three_e_4_idx_cycle_1_bi_ort (%) = ', 100.d0 * accu / norm ! --- @@ -432,6 +442,7 @@ subroutine test_4idx() PROVIDE three_e_4_idx_exch23_bi_ort accu = 0.d0 + norm = 0.d0 do i = 1, mo_num do j = 1, mo_num do k = 1, mo_num @@ -440,7 +451,6 @@ subroutine test_4idx() new = three_e_4_idx_exch23_bi_ort (l,k,j,i) ref = three_e_4_idx_exch23_bi_ort_old(l,k,j,i) contrib = dabs(new - ref) - accu += contrib if(contrib .gt. thr) then print*, ' problem in three_e_4_idx_exch23_bi_ort' print*, l, k, j, i @@ -448,13 +458,18 @@ subroutine test_4idx() stop endif + accu += contrib + norm += dabs(ref) enddo enddo enddo enddo - print*, ' accu on three_e_4_idx_exch23_bi_ort = ', accu / dble(mo_num)**4 + + print*, ' accu on three_e_4_idx_exch23_bi_ort (%) = ', 100.d0 * accu / norm ! --- return end + + diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f index 669861b7..742d5a80 100644 --- a/src/bi_ort_ints/three_body_ijmk.irp.f +++ b/src/bi_ort_ints/three_body_ijmk.irp.f @@ -64,120 +64,117 @@ !$OMP END DO !$OMP END PARALLEL + + + + + ! loops approach to break the O(N^4) scaling in memory + + call set_multiple_levels_omp(.false.) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (n, ipoint, tmp_loc_1, tmp_loc_2, tmp_2d, tmp1, tmp2) & + !$OMP SHARED (mo_num, n_points_final_grid, i, k, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp_aux_1, tmp_aux_2, & + !$OMP three_e_4_idx_direct_bi_ort, three_e_4_idx_exch13_bi_ort, & + !$OMP three_e_4_idx_exch23_bi_ort, three_e_4_idx_cycle_1_bi_ort) + allocate(tmp_2d(mo_num,mo_num)) allocate(tmp1(n_points_final_grid,4,mo_num)) allocate(tmp2(n_points_final_grid,4,mo_num)) - ! loops approach to break the O(N^4) scaling in memory + !$OMP DO do k = 1, mo_num + + ! --- + do i = 1, mo_num - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (n, ipoint, tmp_loc_1, tmp_loc_2) & - !$OMP SHARED (mo_num, n_points_final_grid, i, k, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp_aux_2, tmp1) - !$OMP DO - do n = 1, mo_num - do ipoint = 1, n_points_final_grid + ! --- - tmp_loc_1 = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) - tmp_loc_2 = tmp_aux_2(ipoint,n) + do n = 1, mo_num + do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,1,k,i) * tmp_loc_2 - tmp1(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,2,k,i) * tmp_loc_2 - tmp1(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,3,k,i) * tmp_loc_2 - tmp1(ipoint,4,n) = int2_grad1_u12_bimo_t(ipoint,1,n,n) * int2_grad1_u12_bimo_t(ipoint,1,k,i) & - + int2_grad1_u12_bimo_t(ipoint,2,n,n) * int2_grad1_u12_bimo_t(ipoint,2,k,i) & - + int2_grad1_u12_bimo_t(ipoint,3,n,n) * int2_grad1_u12_bimo_t(ipoint,3,k,i) + tmp_loc_1 = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) + tmp_loc_2 = tmp_aux_2(ipoint,n) + tmp1(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,1,k,i) * tmp_loc_2 + tmp1(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,2,k,i) * tmp_loc_2 + tmp1(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,3,k,i) * tmp_loc_2 + tmp1(ipoint,4,n) = int2_grad1_u12_bimo_t(ipoint,1,n,n) * int2_grad1_u12_bimo_t(ipoint,1,k,i) & + + int2_grad1_u12_bimo_t(ipoint,2,n,n) * int2_grad1_u12_bimo_t(ipoint,2,k,i) & + + int2_grad1_u12_bimo_t(ipoint,3,n,n) * int2_grad1_u12_bimo_t(ipoint,3,k,i) + + enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 & - , tmp_aux_1(1,1,1), 4*n_points_final_grid, tmp1(1,1,1), 4*n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) + call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 & + , tmp_aux_1(1,1,1), 4*n_points_final_grid, tmp1(1,1,1), 4*n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) - !$OMP PARALLEL DO PRIVATE(j,m) - do j = 1, mo_num - do m = 1, mo_num - three_e_4_idx_direct_bi_ort(m,j,k,i) = -tmp_2d(m,j) + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_direct_bi_ort(m,j,k,i) = -tmp_2d(m,j) + enddo enddo - enddo - !$OMP END PARALLEL DO + ! --- - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (n, ipoint, tmp_loc_1, tmp_loc_2) & - !$OMP SHARED (mo_num, n_points_final_grid, i, k, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp1, tmp2) - !$OMP DO - do n = 1, mo_num - do ipoint = 1, n_points_final_grid - - tmp_loc_1 = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,n) - tmp_loc_2 = mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,i) - - tmp1(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,n,i) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,1,k,n) * tmp_loc_2 - tmp1(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,n,i) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,2,k,n) * tmp_loc_2 - tmp1(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,n,i) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,3,k,n) * tmp_loc_2 - tmp1(ipoint,4,n) = int2_grad1_u12_bimo_t(ipoint,1,n,i) * int2_grad1_u12_bimo_t(ipoint,1,k,n) & - + int2_grad1_u12_bimo_t(ipoint,2,n,i) * int2_grad1_u12_bimo_t(ipoint,2,k,n) & - + int2_grad1_u12_bimo_t(ipoint,3,n,i) * int2_grad1_u12_bimo_t(ipoint,3,k,n) - - tmp2(ipoint,1,n) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,n) - tmp2(ipoint,2,n) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,n) - tmp2(ipoint,3,n) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,n) - tmp2(ipoint,4,n) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,n) + do n = 1, mo_num + do ipoint = 1, n_points_final_grid + + tmp_loc_1 = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,n) + tmp_loc_2 = mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,i) + + tmp1(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,n,i) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,1,k,n) * tmp_loc_2 + tmp1(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,n,i) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,2,k,n) * tmp_loc_2 + tmp1(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,n,i) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,3,k,n) * tmp_loc_2 + tmp1(ipoint,4,n) = int2_grad1_u12_bimo_t(ipoint,1,n,i) * int2_grad1_u12_bimo_t(ipoint,1,k,n) & + + int2_grad1_u12_bimo_t(ipoint,2,n,i) * int2_grad1_u12_bimo_t(ipoint,2,k,n) & + + int2_grad1_u12_bimo_t(ipoint,3,n,i) * int2_grad1_u12_bimo_t(ipoint,3,k,n) + + tmp2(ipoint,1,n) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,n) + tmp2(ipoint,2,n) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,n) + tmp2(ipoint,3,n) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,n) + tmp2(ipoint,4,n) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,n) + enddo enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 & - , tmp1(1,1,1), 4*n_points_final_grid, tmp_aux_1(1,1,1), 4*n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) + ! --- - !$OMP PARALLEL DO PRIVATE(j,m) - do j = 1, mo_num - do m = 1, mo_num - three_e_4_idx_exch13_bi_ort(m,j,k,i) = -tmp_2d(m,j) + call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 4*n_points_final_grid, tmp_aux_1(1,1,1), 4*n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_exch13_bi_ort(m,j,k,i) = -tmp_2d(m,j) + enddo enddo - enddo - !$OMP END PARALLEL DO - call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 & - , tmp1(1,1,1), 4*n_points_final_grid, tmp2(1,1,1), 4*n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) + ! --- - !$OMP PARALLEL DO PRIVATE(j,m) - do j = 1, mo_num - do m = 1, mo_num - three_e_4_idx_cycle_1_bi_ort(m,i,k,j) = -tmp_2d(m,j) + call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 4*n_points_final_grid, tmp2(1,1,1), 4*n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do j = 1, mo_num + do m = 1, mo_num + three_e_4_idx_cycle_1_bi_ort(m,i,k,j) = -tmp_2d(m,j) + enddo enddo - enddo - !$OMP END PARALLEL DO - enddo ! i + ! --- + + enddo ! i + + ! --- do j = 1, mo_num - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (n, ipoint, tmp_loc_1, tmp_loc_2) & - !$OMP SHARED (mo_num, n_points_final_grid, j, k, & - !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & - !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & - !$OMP tmp1, tmp2) - !$OMP DO do n = 1, mo_num do ipoint = 1, n_points_final_grid @@ -197,31 +194,33 @@ tmp2(ipoint,4,n) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,n) enddo enddo - !$OMP END DO - !$OMP END PARALLEL call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 & , tmp1(1,1,1), 4*n_points_final_grid, tmp2(1,1,1), 4*n_points_final_grid & , 0.d0, tmp_2d(1,1), mo_num) - !$OMP PARALLEL DO PRIVATE(i,m) do i = 1, mo_num do m = 1, mo_num three_e_4_idx_exch23_bi_ort(m,j,k,i) = -tmp_2d(m,i) enddo enddo - !$OMP END PARALLEL DO enddo ! j + + ! --- + enddo !k + !$OMP END DO deallocate(tmp_2d) deallocate(tmp1) deallocate(tmp2) + + !$OMP END PARALLEL + deallocate(tmp_aux_1) deallocate(tmp_aux_2) - call wall_time(wall1) print *, ' wall time for three_e_4_idx_bi_ort', wall1 - wall0 call print_memory_usage() From 3270602419ece18431da2f0eebd4ec74faea10a8 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Thu, 7 Sep 2023 21:13:57 +0200 Subject: [PATCH 278/337] REDUCED NB OF DGEMM IN NORMAL_OREDER TABLE --- .../grid_becke_vector.irp.f | 4 +- src/tc_bi_ortho/normal_ordered.irp.f | 844 ++++++-------- src/tc_bi_ortho/normal_ordered_v0.irp.f | 1022 +++++++++++++++++ src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 49 +- 4 files changed, 1439 insertions(+), 480 deletions(-) create mode 100644 src/tc_bi_ortho/normal_ordered_v0.irp.f diff --git a/src/becke_numerical_grid/grid_becke_vector.irp.f b/src/becke_numerical_grid/grid_becke_vector.irp.f index e1147ec7..0386f3c6 100644 --- a/src/becke_numerical_grid/grid_becke_vector.irp.f +++ b/src/becke_numerical_grid/grid_becke_vector.irp.f @@ -20,8 +20,8 @@ BEGIN_PROVIDER [integer, n_points_final_grid] enddo enddo - !print*,' n_points_final_grid = ', n_points_final_grid - !print*,' n max point = ', n_points_integration_angular*(n_points_radial_grid*nucl_num - 1) + print*,' n_points_final_grid = ', n_points_final_grid + print*,' n max point = ', n_points_integration_angular*(n_points_radial_grid*nucl_num - 1) call ezfio_set_becke_numerical_grid_n_points_final_grid(n_points_final_grid) END_PROVIDER diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f index 1a9cddda..e65df450 100644 --- a/src/tc_bi_ortho/normal_ordered.irp.f +++ b/src/tc_bi_ortho/normal_ordered.irp.f @@ -13,13 +13,18 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ implicit none - integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: i, ii, ipoint + integer :: h1, p1, h2, p2 integer :: hh1, hh2, pp1, pp2 integer :: Ne(2) double precision :: wall0, wall1, walli, wallf integer, allocatable :: occ(:,:) integer(bit_kind), allocatable :: key_i_core(:,:) + PROVIDE mo_class + PROVIDE list_act n_act_orb + PROVIDE N_int + print*,' Providing normal_two_body_bi_orth ...' call wall_time(walli) @@ -31,8 +36,8 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ else - double precision, allocatable :: tmp_2d(:,:), tmp_3d(:,:,:) - double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) + double precision, allocatable :: tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:) double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) double precision, allocatable :: tmp(:,:,:,:) double precision, allocatable :: int2_grad1_u12_bimo_t_tmp(:,:,:,:), mos_l_in_r_array_transp_tmp(:,:), mos_r_in_r_array_transp_tmp(:,:) @@ -72,10 +77,6 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ !$OMP END DO !$OMP END PARALLEL - - PROVIDE mo_class - PROVIDE N_int - allocate( occ(N_int*bit_kind_size,2) ) allocate( key_i_core(N_int,2) ) @@ -97,120 +98,98 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ print*,' Providing aba_contraction ...' call wall_time(wall0) + tmp = 0.d0 + call set_multiple_levels_omp(.false.) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, h1, p1, h2, p2, i, ii, & - !$OMP tmp_3d, tmp_2d, tmp1, tmp2, & + !$OMP PRIVATE (ipoint, hh1, pp1, hh2, pp2, h1, p1, h2, p2, i, ii, tmp1, tmp2, & !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) & - !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, mo_class, & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, n_act_orb, list_act, & !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & - !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t_tmp, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & !$OMP tmp) - allocate(tmp_3d(mo_num,mo_num,mo_num), tmp_2d(mo_num,mo_num)) allocate(tmp1(n_points_final_grid,3,mo_num), tmp2(n_points_final_grid,mo_num)) allocate(tmpval_1(n_points_final_grid), tmpval_2(n_points_final_grid)) allocate(tmpvec_1(n_points_final_grid,3), tmpvec_2(n_points_final_grid,3)) - tmp_3d = 0.d0 - tmp_2d = 0.d0 - tmp1 = 0.d0 - tmp2 = 0.d0 - tmpval_1 = 0.d0 - tmpval_2 = 0.d0 - tmpvec_1 = 0.d0 - tmpvec_2 = 0.d0 - !$OMP DO + do hh1 = 1, n_act_orb + h1 = list_act(hh1) - do h1 = 1, mo_num - tmp(:,:,:,h1) = 0.d0 - if(mo_class(h1) .ne. "Active") cycle - + tmp1 = 0.d0 do ii = 1, Ne(2) i = occ(ii,2) do ipoint = 1, n_points_final_grid tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) enddo - do p1 = 1, mo_num - tmp1(:,:,p1) = 0.d0 - if(mo_class(p1) .ne. "Active") cycle - + do pp1 = 1, n_act_orb + p1 = list_act(pp1) do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + tmp1(ipoint,1,p1) = tmp1(ipoint,1,p1) + mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = tmp1(ipoint,2,p1) + mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = tmp1(ipoint,3,p1) + mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) enddo enddo - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + enddo ! ii - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - enddo - enddo - enddo + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp(1,1,1,h1), mo_num*mo_num) - do p1 = 1, mo_num - if(mo_class(p1) .ne. "Active") cycle + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + + tmp2 = 0.d0 + do ii = 1, Ne(2) + i = occ(ii,2) do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & - ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) & - - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) & - - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) & + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) & - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) ) enddo - do h2 = 1, mo_num - tmp2(:,h2) = 0.d0 - if(mo_class(h2) .ne. "Active") cycle - + do hh2 = 1, n_act_orb + h2 = list_act(hh2) do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) + tmp2(ipoint,h2) = tmp2(ipoint,h2) + mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) enddo enddo - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) + enddo ! ii - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 1.d0, tmp(1,1,p1,h1), mo_num) - enddo ! p1 - enddo ! i + enddo ! p1 enddo ! h1 !$OMP END DO - deallocate(tmp_3d, tmp_2d) deallocate(tmp1, tmp2) deallocate(tmpval_1, tmpval_2) deallocate(tmpvec_1, tmpvec_2) @@ -225,114 +204,93 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, h1, p1, h2, p2, i, ii, & - !$OMP tmp_3d, tmp_2d, tmp1, tmp2, & + !$OMP PRIVATE (ipoint, hh1, pp1, hh2, pp2, h1, p1, h2, p2, i, ii, tmp1, tmp2, & !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) & - !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, mo_class, & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, n_act_orb, list_act, & !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & - !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t_tmp, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & !$OMP tmp) - Allocate(tmp_3d(mo_num,mo_num,mo_num), tmp_2d(mo_num,mo_num)) Allocate(tmp1(n_points_final_grid,3,mo_num), tmp2(n_points_final_grid,mo_num)) Allocate(tmpval_1(n_points_final_grid), tmpval_2(n_points_final_grid)) Allocate(tmpvec_1(n_points_final_grid,3), tmpvec_2(n_points_final_grid,3)) - Tmp_3d = 0.d0 - Tmp_2d = 0.d0 - Tmp1 = 0.d0 - Tmp2 = 0.d0 - Tmpval_1 = 0.d0 - Tmpval_2 = 0.d0 - Tmpvec_1 = 0.d0 - Tmpvec_2 = 0.d0 - !$OMP DO - do h1 = 1, mo_num - if(mo_class(h1) .ne. "Active") cycle + do hh1 = 1, n_act_orb + h1 = list_act(hh1) + tmp1 = 0.d0 do ii = Ne(2) + 1, Ne(1) i = occ(ii,1) do ipoint = 1, n_points_final_grid tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) enddo - do p1 = 1, mo_num - tmp1(:,:,p1) = 0.d0 - if(mo_class(p1) .ne. "Active") cycle + do pp1 = 1, n_act_orb + p1 = list_act(pp1) do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & - + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + tmp1(ipoint,1,p1) = tmp1(ipoint,1,p1) + mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = tmp1(ipoint,2,p1) + mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = tmp1(ipoint,3,p1) + mos_l_in_r_array_transp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) enddo enddo - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & - , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + enddo ! ii - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - enddo - enddo - enddo + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp(1,1,1,h1), mo_num*mo_num) - do p1 = 1, mo_num - if(mo_class(p1) .ne. "Active") cycle + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + + tmp2 = 0.d0 + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & - ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) & - - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) & - - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) & + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) & - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) ) enddo - do h2 = 1, mo_num - tmp2(:,h2) = 0.d0 - if(mo_class(h2) .ne. "Active") cycle - + do hh2 = 1, n_act_orb + h2 = list_act(hh2) do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) + tmp2(ipoint,h2) = tmp2(ipoint,h2) + mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) enddo enddo - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & - , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) + enddo ! ii - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 1.d0, tmp(1,1,p1,h1), mo_num) - enddo ! p1 - enddo ! i + enddo ! p1 enddo ! h1 !$OMP END DO - deallocate(tmp_3d, tmp_2d) deallocate(tmp1, tmp2) deallocate(tmpval_1, tmpval_2) deallocate(tmpvec_1, tmpvec_2) @@ -354,109 +312,89 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ print*,' Providing aab_contraction ...' call wall_time(wall0) + tmp = 0.d0 + call set_multiple_levels_omp(.false.) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, ii, i, h1, p1, h2, p2, & - !$OMP tmp_2d, tmp_3d, tmp1, tmp2, & + !$OMP PRIVATE (ipoint, hh1, pp1, hh2, pp2, ii, i, h1, p1, h2, p2, tmp1, tmp2, & !$OMP tmpval_1, tmpvec_1) & - !$OMP SHARED (n_points_final_grid, mo_num, Ne, occ, mo_class, & + !$OMP SHARED (n_points_final_grid, mo_num, Ne, occ, n_act_orb, list_act, & !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & - !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t_tmp, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & !$OMP tmp) - allocate(tmp_2d(mo_num,mo_num)) - allocate(tmp_3d(mo_num,mo_num,mo_num)) allocate(tmp1(n_points_final_grid,3,mo_num)) allocate(tmp2(n_points_final_grid,mo_num)) allocate(tmpval_1(n_points_final_grid)) allocate(tmpvec_1(n_points_final_grid,3)) - tmp_2d = 0.d0 - tmp_3d = 0.d0 - tmp1 = 0.d0 - tmp2 = 0.d0 - tmpval_1 = 0.d0 - tmpvec_1 = 0.d0 - !$OMP DO - do h1 = 1, mo_num - tmp(:,:,:,h1) = 0.d0 - if(mo_class(h1) .ne. "Active") cycle + do hh1 = 1, n_act_orb + h1 = list_act(hh1) + tmp1 = 0.d0 do ii = 1, Ne(2) i = occ(ii,2) do ipoint = 1, n_points_final_grid tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) enddo - do p1 = 1, mo_num - tmp1(:,:,p1) = 0.d0 - if(mo_class(p1) .ne. "Active") cycle + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = tmp1(ipoint,1,p1) + mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = tmp1(ipoint,2,p1) + mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = tmp1(ipoint,3,p1) + mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + + enddo ! ii + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp(1,1,1,h1), mo_num*mo_num) + + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + + tmp2 = 0.d0 + do ii = 1, Ne(2) + i = occ(ii,2) do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) - enddo - enddo - - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) - - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) - enddo - enddo - enddo - - do p1 = 1, mo_num - if(mo_class(p1) .ne. "Active") cycle - - do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) ) + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) enddo - do h2 = 1, mo_num - if(mo_class(h2) .ne. "Active") cycle - tmp2(:,h2) = 0.d0 - + do hh2 = 1, n_act_orb + h2 = list_act(hh2) do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) + tmp2(ipoint,h2) = tmp2(ipoint,h2) + mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) enddo enddo - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) + enddo ! ii - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 1.d0, tmp(1,1,p1,h1), mo_num) - enddo ! p1 - enddo ! i + enddo ! p1 enddo ! h1 !$OMP END DO - deallocate(tmp_3d) deallocate(tmp1, tmp2) deallocate(tmpval_1) deallocate(tmpvec_1) @@ -520,212 +458,189 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ print*,' Providing aaa_contraction ...' call wall_time(wall0) + tmp = 0.d0 + call set_multiple_levels_omp(.false.) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, ii, h1, h2, p1, p2, & - !$OMP tmp_2d, tmp_3d, tmp1, tmp2, tmp3, & + !$OMP PRIVATE (ipoint, i, ii, hh1, hh2, pp1, pp2, h1, h2, p1, p2, tmp1, tmp2, tmp_3d, & !$OMP tmpval_1, tmpval_2, & !$OMP tmpvec_1, tmpvec_2, tmpvec_3) & - !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, mo_class, & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, n_act_orb, list_act, & !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & - !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t_tmp, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & !$OMP tmp) - allocate(tmp_2d(mo_num,mo_num)) allocate(tmp_3d(mo_num,mo_num,mo_num)) allocate(tmp1(n_points_final_grid,3,mo_num)) allocate(tmp2(n_points_final_grid,mo_num)) - allocate(tmp3(n_points_final_grid,3,mo_num)) allocate(tmpval_1(n_points_final_grid)) allocate(tmpval_2(n_points_final_grid)) allocate(tmpvec_1(n_points_final_grid,3)) allocate(tmpvec_2(n_points_final_grid,3)) allocate(tmpvec_3(n_points_final_grid,3)) - tmp_2d = 0.d0 - tmp_3d = 0.d0 - tmp1 = 0.d0 - tmp2 = 0.d0 - tmp3 = 0.d0 - tmpval_1 = 0.d0 - tmpval_2 = 0.d0 - tmpvec_1 = 0.d0 - tmpvec_2 = 0.d0 - tmpvec_3 = 0.d0 - !$OMP DO - do h1 = 1, mo_num - tmp(:,:,:,h1) = 0.d0 - if(mo_class(h1) .ne. "Active") cycle + do hh1 = 1, n_act_orb + h1 = list_act(hh1) + tmp1 = 0.d0 + do ii = 1, Ne(2) + i = occ(ii,2) + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + enddo + + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = tmp1(ipoint,1,p1) + mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = tmp1(ipoint,2,p1) + mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = tmp1(ipoint,3,p1) + mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + + enddo ! ii + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp(1,1,1,h1), mo_num*mo_num) + + tmp1 = 0.d0 do ii = 1, Ne(2) i = occ(ii,2) do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) enddo - do p1 = 1, mo_num - tmp1(:,:,p1) = 0.d0 - if(mo_class(p1) .ne. "Active") cycle - + do pp2 = 1, n_act_orb + p2 = list_act(pp2) do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) + tmp1(ipoint,1,p2) = tmp1(ipoint,1,p2) + tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmp1(ipoint,2,p2) + tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmp1(ipoint,3,p2) + tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) enddo enddo - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + enddo ! ii - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + + tmp2 = 0.d0 + do ii = 1, Ne(2) + i = occ(ii,2) + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + enddo + + do hh2 = 1, n_act_orb + h2 = list_act(hh2) + + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = tmp2(ipoint,h2) + mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) enddo enddo - enddo - do p2 = 1, mo_num - tmp1(:,:,p2) = 0.d0 - if(mo_class(p2) .ne. "Active") cycle + enddo ! ii - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,1) - tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,2) - tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,3) - enddo - enddo + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 1.d0, tmp(1,1,p1,h1), mo_num) - call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & - , tmp1(1,1,1), 3*n_points_final_grid & - , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num) - - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) - enddo - enddo - enddo - - do p1 = 1, mo_num - if(mo_class(p1) .ne. "Active") cycle + tmp1 = 0.d0 + tmp2 = 0.d0 + do ii = 1, Ne(2) + i = occ(ii,2) do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & - ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) ) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp_tmp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) - - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) enddo - do h2 = 1, mo_num - tmp2( :,h2) = 0.d0 - tmp1(:,:,h2) = 0.d0 - if(mo_class(h2) .ne. "Active") cycle - + do pp2 = 1, n_act_orb + p2 = list_act(pp2) do ipoint = 1, n_points_final_grid - tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) & - + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & - + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) - - tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) - tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) - tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + tmp2(ipoint,p2) = tmp2(ipoint,p2) + int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + tmp1(ipoint,1,p2) = tmp1(ipoint,1,p2) + tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,p2) + tmp1(ipoint,2,p2) = tmp1(ipoint,2,p2) + tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,p2) + tmp1(ipoint,3,p2) = tmp1(ipoint,3,p2) + tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,p2) enddo enddo - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) + enddo ! ii - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t_tmp(1,1,1,h1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp(1,1,p1,h1), mo_num) - do p2 = 1, mo_num - tmp2( :,p2) = 0.d0 - tmp3(:,:,p2) = 0.d0 - if(mo_class(p2) .ne. "Active") cycle + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp_tmp(1,1), n_points_final_grid & + , 1.d0, tmp(1,1,p1,h1), mo_num) - do ipoint = 1, n_points_final_grid - - tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t_tmp(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & - + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t_tmp(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & - + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t_tmp(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) - - tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,1,p2,h1) - tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,2,p2,h1) - tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,3,p2,h1) - enddo - enddo - - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & - , tmp2(1,1), n_points_final_grid & - , mos_r_in_r_array_transp_tmp(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) - - call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & - , tmp3(1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 1.d0, tmp_2d(1,1), mo_num) - - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo - - enddo ! p1 - enddo ! i + enddo ! p1 enddo ! h1 !$OMP END DO - deallocate(tmp_2d) deallocate(tmp_3d) deallocate(tmp1) deallocate(tmp2) - deallocate(tmp3) deallocate(tmpval_1) deallocate(tmpval_2) deallocate(tmpvec_1) @@ -741,41 +656,29 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (ipoint, i, ii, h1, h2, p1, p2, tmp_2d, tmp_3d, tmp1, tmp2, tmp3, & + !$OMP PRIVATE (ipoint, i, ii, hh1, hh2, pp1, pp2, h1, h2, p1, p2, tmp_3d, tmp1, tmp2, & !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) & - !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, mo_class, & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, n_act_orb, list_act, & !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & - !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP final_weight_at_r_vector, int2_grad1_u12_bimo_t_tmp, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & !$OMP tmp) - allocate(tmp_2d(mo_num,mo_num)) allocate(tmp_3d(mo_num,mo_num,mo_num)) allocate(tmp1(n_points_final_grid,3,mo_num)) allocate(tmp2(n_points_final_grid,mo_num)) - allocate(tmp3(n_points_final_grid,3,mo_num)) allocate(tmpval_1(n_points_final_grid)) allocate(tmpval_2(n_points_final_grid)) allocate(tmpvec_1(n_points_final_grid,3)) allocate(tmpvec_2(n_points_final_grid,3)) allocate(tmpvec_3(n_points_final_grid,3)) - tmp_2d = 0.d0 - tmp_3d = 0.d0 - tmp1 = 0.d0 - tmp2 = 0.d0 - tmp3 = 0.d0 - tmpval_1 = 0.d0 - tmpval_2 = 0.d0 - tmpvec_1 = 0.d0 - tmpvec_2 = 0.d0 - tmpvec_3 = 0.d0 - !$OMP DO - do h1 = 1, mo_num - if(mo_class(h1) .ne. "Active") cycle + do hh1 = 1, n_act_orb + h1 = list_act(hh1) + tmp1 = 0.d0 do ii = Ne(2) + 1, Ne(1) i = occ(ii,1) @@ -783,163 +686,150 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_ tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp(ipoint,h1) + enddo - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = tmp1(ipoint,1,p1) + mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = tmp1(ipoint,2,p1) + mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = tmp1(ipoint,3,p1) + mos_l_in_r_array_transp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) + enddo + enddo + + enddo ! ii + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp(1,1,1,h1), mo_num*mo_num) + + tmp1 = 0.d0 + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + do ipoint = 1, n_points_final_grid + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,h1) tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) enddo - do p1 = 1, mo_num - tmp1(:,:,p1) = 0.d0 - if(mo_class(p1) .ne. "Active") cycle - + do pp2 = 1, n_act_orb + p2 = list_act(pp2) do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) - tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) - tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) + tmp1(ipoint,1,p2) = tmp1(ipoint,1,p2) + tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmp1(ipoint,2,p2) + tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmp1(ipoint,3,p2) + tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp(ipoint,p2) * tmpvec_2(ipoint,3) enddo enddo - call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & - , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + enddo ! ii - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + + do pp1 = 1, n_act_orb + p1 = list_act(pp1) + + tmp2 = 0.d0 + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t(ipoint,3,p1,h1) ) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp(ipoint,h1) + enddo + + do hh2 = 1, n_act_orb + h2 = list_act(hh2) + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = tmp2(ipoint,h2) + mos_r_in_r_array_transp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) enddo enddo - enddo - do p2 = 1, mo_num - tmp1(:,:,p2) = 0.d0 - if(mo_class(p2) .ne. "Active") cycle + enddo ! ii - do ipoint = 1, n_points_final_grid - tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,1) - tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,2) - tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,3) - enddo - enddo + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 1.d0, tmp(1,1,p1,h1), mo_num) - call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & - , tmp1(1,1,1), 3*n_points_final_grid & - , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & - , 0.d0, tmp_3d(1,1,1), mo_num) - do p1 = 1, mo_num - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) - enddo - enddo - enddo - - do p1 = 1, mo_num - if(mo_class(p1) .ne. "Active") cycle + tmp1 = 0.d0 + tmp2 = 0.d0 + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) do ipoint = 1, n_points_final_grid - tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & - ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & - + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) ) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) - tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp_tmp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) - - tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) - - tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) - tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) - tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp(ipoint,p1) tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) enddo - do h2 = 1, mo_num - tmp2( :,h2) = 0.d0 - tmp1(:,:,h2) = 0.d0 - if(mo_class(h2) .ne. "Active") cycle - + do pp2 = 1, n_act_orb + p2 = list_act(pp2) do ipoint = 1, n_points_final_grid - - tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) & - + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & - + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & - + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) - - tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) - tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) - tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) - + tmp1(ipoint,1,p2) = tmp1(ipoint,1,p2) + tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,p2) + tmp1(ipoint,2,p2) = tmp1(ipoint,2,p2) + tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,p2) + tmp1(ipoint,3,p2) = tmp1(ipoint,3,p2) + tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,p2) + tmp2(ipoint,p2) = tmp2(ipoint,p2) + int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) enddo enddo - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & - , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & - , tmp2(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) + enddo ! ii - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp_tmp(1,1), n_points_final_grid & + , 1.d0, tmp(1,1,p1,h1), mo_num) - do p2 = 1, mo_num - tmp2( :,p2) = 0.d0 - tmp3(:,:,p2) = 0.d0 - if(mo_class(p2) .ne. "Active") cycle + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t_tmp(1,1,1,h1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp(1,1,p1,h1), mo_num) - do ipoint = 1, n_points_final_grid - - tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t_tmp(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & - + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t_tmp(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & - + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t_tmp(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) - - tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,1,p2,h1) - tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,2,p2,h1) - tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,3,p2,h1) - enddo - enddo - - call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & - , tmp2(1,1), n_points_final_grid & - , mos_r_in_r_array_transp_tmp(1,1), n_points_final_grid & - , 0.d0, tmp_2d(1,1), mo_num) - - call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & - , tmp3(1,1,1), 3*n_points_final_grid & - , tmp1(1,1,1), 3*n_points_final_grid & - , 1.d0, tmp_2d(1,1), mo_num) - - do h2 = 1, mo_num - do p2 = 1, mo_num - tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) - enddo - enddo - - enddo ! p1 - enddo ! i + enddo ! p1 enddo ! h1 !$OMP END DO - deallocate(tmp_2d) deallocate(tmp_3d) deallocate(tmp1) deallocate(tmp2) - deallocate(tmp3) deallocate(tmpval_1) deallocate(tmpval_2) deallocate(tmpvec_1) diff --git a/src/tc_bi_ortho/normal_ordered_v0.irp.f b/src/tc_bi_ortho/normal_ordered_v0.irp.f new file mode 100644 index 00000000..784af9db --- /dev/null +++ b/src/tc_bi_ortho/normal_ordered_v0.irp.f @@ -0,0 +1,1022 @@ + +! --- + +BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_v0, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! Normal ordering of the three body interaction on the HF density + ! + END_DOC + + use bitmasks ! you need to include the bitmasks_module.f90 features + + implicit none + + integer :: i, ii, h1, p1, h2, p2, ipoint + integer :: hh1, hh2, pp1, pp2 + integer :: Ne(2) + double precision :: wall0, wall1, walli, wallf + integer, allocatable :: occ(:,:) + integer(bit_kind), allocatable :: key_i_core(:,:) + + PROVIDE mo_class + PROVIDE N_int + + print*,' Providing normal_two_body_bi_orth_v0 ...' + call wall_time(walli) + + if(read_tc_norm_ord) then + + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="read") + read(11) normal_two_body_bi_orth_v0 + close(11) + + else + + double precision, allocatable :: tmp_2d(:,:), tmp_3d(:,:,:) + double precision, allocatable :: tmp1(:,:,:), tmp2(:,:), tmp3(:,:,:) + double precision, allocatable :: tmpval_1(:), tmpval_2(:), tmpvec_1(:,:), tmpvec_2(:,:), tmpvec_3(:,:) + double precision, allocatable :: tmp(:,:,:,:) + double precision, allocatable :: int2_grad1_u12_bimo_t_tmp(:,:,:,:), mos_l_in_r_array_transp_tmp(:,:), mos_r_in_r_array_transp_tmp(:,:) + + PROVIDE int2_grad1_u12_bimo_t + PROVIDE mos_l_in_r_array_transp mos_r_in_r_array_transp + + allocate(int2_grad1_u12_bimo_t_tmp(n_points_final_grid,3,mo_num,mo_num)) + allocate(mos_l_in_r_array_transp_tmp(n_points_final_grid,mo_num)) + allocate(mos_r_in_r_array_transp_tmp(n_points_final_grid,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, p1) & + !$OMP SHARED (mo_num, mo_class, & + !$OMP int2_grad1_u12_bimo_t, int2_grad1_u12_bimo_t_tmp, & + !$OMP mos_l_in_r_array_transp, mos_l_in_r_array_transp_tmp, & + !$OMP mos_r_in_r_array_transp, mos_r_in_r_array_transp_tmp) + !$OMP DO + do h1 = 1, mo_num + + mos_l_in_r_array_transp_tmp(:,h1) = 0.d0 + mos_r_in_r_array_transp_tmp(:,h1) = 0.d0 + + if(mo_class(h1) .ne. "Active") cycle + + mos_l_in_r_array_transp_tmp(:,h1) = mos_l_in_r_array_transp(:,h1) + mos_r_in_r_array_transp_tmp(:,h1) = mos_r_in_r_array_transp(:,h1) + + do p1 = 1, mo_num + int2_grad1_u12_bimo_t_tmp(:,:,p1,h1) = 0.d0 + if(mo_class(p1) .ne. "Active") cycle + + int2_grad1_u12_bimo_t_tmp(:,:,p1,h1) = int2_grad1_u12_bimo_t(:,:,p1,h1) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + allocate( occ(N_int*bit_kind_size,2) ) + allocate( key_i_core(N_int,2) ) + + if(core_tc_op) then + do i = 1, N_int + key_i_core(i,1) = xor(ref_bitmask(i,1), core_bitmask(i,1)) + key_i_core(i,2) = xor(ref_bitmask(i,2), core_bitmask(i,2)) + enddo + call bitstring_to_list_ab(key_i_core, occ, Ne, N_int) + else + call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) + endif + + allocate(tmp(mo_num,mo_num,mo_num,mo_num)) + + ! --- + ! aba contraction + + print*,' Providing aba_contraction_v0 ...' + call wall_time(wall0) + + call set_multiple_levels_omp(.false.) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, h1, p1, h2, p2, i, ii, & + !$OMP tmp_3d, tmp_2d, tmp1, tmp2, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, mo_class, & + !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & + !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmp) + + allocate(tmp_3d(mo_num,mo_num,mo_num), tmp_2d(mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num), tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid), tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3), tmpvec_2(n_points_final_grid,3)) + + tmp_3d = 0.d0 + tmp_2d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmpval_1 = 0.d0 + tmpval_2 = 0.d0 + tmpvec_1 = 0.d0 + tmpvec_2 = 0.d0 + + ! TODO: active electrons + + !$OMP DO + + do h1 = 1, mo_num + tmp(:,:,:,h1) = 0.d0 + if(mo_class(h1) .ne. "Active") cycle + + do ii = 1, Ne(2) + i = occ(ii,2) + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + + do p1 = 1, mo_num + tmp1(:,:,p1) = 0.d0 + if(mo_class(p1) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + + do p1 = 1, mo_num + if(mo_class(p1) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) ) + enddo + + do h2 = 1, mo_num + tmp2(:,h2) = 0.d0 + if(mo_class(h2) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + + enddo ! p1 + enddo ! i + enddo ! h1 + + !$OMP END DO + + deallocate(tmp_3d, tmp_2d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2) + + !$OMP END PARALLEL + + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + + call set_multiple_levels_omp(.false.) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, h1, p1, h2, p2, i, ii, & + !$OMP tmp_3d, tmp_2d, tmp1, tmp2, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, mo_class, & + !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & + !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmp) + + Allocate(tmp_3d(mo_num,mo_num,mo_num), tmp_2d(mo_num,mo_num)) + Allocate(tmp1(n_points_final_grid,3,mo_num), tmp2(n_points_final_grid,mo_num)) + Allocate(tmpval_1(n_points_final_grid), tmpval_2(n_points_final_grid)) + Allocate(tmpvec_1(n_points_final_grid,3), tmpvec_2(n_points_final_grid,3)) + + Tmp_3d = 0.d0 + Tmp_2d = 0.d0 + Tmp1 = 0.d0 + Tmp2 = 0.d0 + Tmpval_1 = 0.d0 + Tmpval_2 = 0.d0 + Tmpvec_1 = 0.d0 + Tmpvec_2 = 0.d0 + + !$OMP DO + + do h1 = 1, mo_num + if(mo_class(h1) .ne. "Active") cycle + + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + + do p1 = 1, mo_num + tmp1(:,:,p1) = 0.d0 + if(mo_class(p1) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,1) - tmpvec_2(ipoint,1)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,2) - tmpvec_2(ipoint,2)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * (tmpvec_1(ipoint,3) - tmpvec_2(ipoint,3)) & + + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) - tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + + do p1 = 1, mo_num + if(mo_class(p1) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) & + - int2_grad1_u12_bimo_t(ipoint,1,p1,i) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) & + - int2_grad1_u12_bimo_t(ipoint,2,p1,i) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) & + - int2_grad1_u12_bimo_t(ipoint,3,p1,i) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) ) + enddo + + do h2 = 1, mo_num + tmp2(:,h2) = 0.d0 + if(mo_class(h2) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + + enddo ! p1 + enddo ! i + enddo ! h1 + !$OMP END DO + + deallocate(tmp_3d, tmp_2d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1, tmpval_2) + deallocate(tmpvec_1, tmpvec_2) + + !$OMP END PARALLEL + endif + + tmp = -0.5d0 * tmp + call sum_A_At(tmp(1,1,1,1), mo_num*mo_num) + + call wall_time(wall1) + print*,' Wall time for aba_contraction_v0', wall1-wall0 + + normal_two_body_bi_orth_v0 = tmp + + ! --- + ! aab contraction + + print*,' Providing aab_contraction_v0 ...' + call wall_time(wall0) + + call set_multiple_levels_omp(.false.) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, ii, i, h1, p1, h2, p2, & + !$OMP tmp_2d, tmp_3d, tmp1, tmp2, & + !$OMP tmpval_1, tmpvec_1) & + !$OMP SHARED (n_points_final_grid, mo_num, Ne, occ, mo_class, & + !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & + !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmp) + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + + tmp_2d = 0.d0 + tmp_3d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmpval_1 = 0.d0 + tmpvec_1 = 0.d0 + + !$OMP DO + + do h1 = 1, mo_num + tmp(:,:,:,h1) = 0.d0 + if(mo_class(h1) .ne. "Active") cycle + + do ii = 1, Ne(2) + i = occ(ii,2) + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + enddo + + do p1 = 1, mo_num + tmp1(:,:,p1) = 0.d0 + if(mo_class(p1) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + + do p1 = 1, mo_num + if(mo_class(p1) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) ) + enddo + + do h2 = 1, mo_num + if(mo_class(h2) .ne. "Active") cycle + tmp2(:,h2) = 0.d0 + + do ipoint = 1, n_points_final_grid + tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + + enddo ! p1 + enddo ! i + enddo ! h1 + + !$OMP END DO + + deallocate(tmp_3d) + deallocate(tmp1, tmp2) + deallocate(tmpval_1) + deallocate(tmpvec_1) + + !$OMP END PARALLEL + + tmp = -0.5d0 * tmp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (tmp, mo_num) + + !$OMP DO COLLAPSE(2) + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + tmp(p2,h2,p1,h1) -= tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO COLLAPSE(2) + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) = -tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print*,' Wall time for aab_contraction_v0', wall1-wall0 + + normal_two_body_bi_orth_v0 += tmp + + ! --- + ! aaa contraction + + if(Ne(2) .ge. 3) then + + print*,' Providing aaa_contraction_v0 ...' + call wall_time(wall0) + + call set_multiple_levels_omp(.false.) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, ii, h1, h2, p1, p2, & + !$OMP tmp_2d, tmp_3d, tmp1, tmp2, tmp3, & + !$OMP tmpval_1, tmpval_2, & + !$OMP tmpvec_1, tmpvec_2, tmpvec_3) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, mo_class, & + !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & + !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmp) + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmpvec_3(n_points_final_grid,3)) + + tmp_2d = 0.d0 + tmp_3d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmp3 = 0.d0 + tmpval_1 = 0.d0 + tmpval_2 = 0.d0 + tmpvec_1 = 0.d0 + tmpvec_2 = 0.d0 + tmpvec_3 = 0.d0 + + !$OMP DO + + do h1 = 1, mo_num + tmp(:,:,:,h1) = 0.d0 + if(mo_class(h1) .ne. "Active") cycle + + do ii = 1, Ne(2) + i = occ(ii,2) + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + + do p1 = 1, mo_num + tmp1(:,:,p1) = 0.d0 + if(mo_class(p1) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + + do p2 = 1, mo_num + tmp1(:,:,p2) = 0.d0 + if(mo_class(p2) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + + do p1 = 1, mo_num + if(mo_class(p1) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp_tmp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + + do h2 = 1, mo_num + tmp2( :,h2) = 0.d0 + tmp1(:,:,h2) = 0.d0 + if(mo_class(h2) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + + do p2 = 1, mo_num + tmp2( :,p2) = 0.d0 + tmp3(:,:,p2) = 0.d0 + if(mo_class(p2) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t_tmp(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t_tmp(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t_tmp(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,3,p2,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 1.d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp_tmp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 1.d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + + enddo ! p1 + enddo ! i + enddo ! h1 + !$OMP END DO + + deallocate(tmp_2d) + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmp3) + deallocate(tmpval_1) + deallocate(tmpval_2) + deallocate(tmpvec_1) + deallocate(tmpvec_2) + deallocate(tmpvec_3) + + !$OMP END PARALLEL + + ! purely open-shell part + if(Ne(2) < Ne(1)) then + + call set_multiple_levels_omp(.false.) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, i, ii, h1, h2, p1, p2, tmp_2d, tmp_3d, tmp1, tmp2, tmp3, & + !$OMP tmpval_1, tmpval_2, tmpvec_1, tmpvec_2, tmpvec_3) & + !$OMP SHARED (n_points_final_grid, Ne, occ, mo_num, mo_class, & + !$OMP mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp, & + !$OMP int2_grad1_u12_bimo_t_tmp, final_weight_at_r_vector, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmp) + + allocate(tmp_2d(mo_num,mo_num)) + allocate(tmp_3d(mo_num,mo_num,mo_num)) + allocate(tmp1(n_points_final_grid,3,mo_num)) + allocate(tmp2(n_points_final_grid,mo_num)) + allocate(tmp3(n_points_final_grid,3,mo_num)) + allocate(tmpval_1(n_points_final_grid)) + allocate(tmpval_2(n_points_final_grid)) + allocate(tmpvec_1(n_points_final_grid,3)) + allocate(tmpvec_2(n_points_final_grid,3)) + allocate(tmpvec_3(n_points_final_grid,3)) + + tmp_2d = 0.d0 + tmp_3d = 0.d0 + tmp1 = 0.d0 + tmp2 = 0.d0 + tmp3 = 0.d0 + tmpval_1 = 0.d0 + tmpval_2 = 0.d0 + tmpvec_1 = 0.d0 + tmpvec_2 = 0.d0 + tmpvec_3 = 0.d0 + + !$OMP DO + + do h1 = 1, mo_num + if(mo_class(h1) .ne. "Active") cycle + + do ii = Ne(2) + 1, Ne(1) + i = occ(ii,1) + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_r_in_r_array_transp(ipoint,i) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_r_in_r_array_transp(ipoint,i) + enddo + + do p1 = 1, mo_num + tmp1(:,:,p1) = 0.d0 + if(mo_class(p1) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,1) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) + tmp1(ipoint,2,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,2) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) + tmp1(ipoint,3,p1) = mos_l_in_r_array_transp_tmp(ipoint,p1) * tmpvec_1(ipoint,3) + tmpval_1(ipoint) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num*mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,h2,p1) + enddo + enddo + enddo + + do p2 = 1, mo_num + tmp1(:,:,p2) = 0.d0 + if(mo_class(p2) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,1) + tmp1(ipoint,2,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,2) + tmp1(ipoint,3,p2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p2,i) + mos_l_in_r_array_transp_tmp(ipoint,p2) * tmpvec_2(ipoint,3) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num*mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp1(1,1,1), 3*n_points_final_grid & + , int2_grad1_u12_bimo_t_tmp(1,1,1,1), 3*n_points_final_grid & + , 0.d0, tmp_3d(1,1,1), mo_num) + + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_3d(p2,p1,h2) + enddo + enddo + enddo + + do p1 = 1, mo_num + if(mo_class(p1) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + + tmpval_1(ipoint) = final_weight_at_r_vector(ipoint) * & + ( int2_grad1_u12_bimo_t(ipoint,1,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,1,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,2,p1,h1) & + + int2_grad1_u12_bimo_t(ipoint,3,i,i) * int2_grad1_u12_bimo_t_tmp(ipoint,3,p1,h1) ) + + tmpval_2(ipoint) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp_tmp(ipoint,p1) * mos_r_in_r_array_transp(ipoint,i) + + tmpvec_1(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + tmpvec_1(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_r_in_r_array_transp_tmp(ipoint,h1) + + tmpvec_2(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) + tmpvec_2(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) + tmpvec_2(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h1) * mos_l_in_r_array_transp_tmp(ipoint,p1) + + tmpvec_3(ipoint,1) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,2) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p1,i) * mos_l_in_r_array_transp(ipoint,i) + tmpvec_3(ipoint,3) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p1,i) * mos_l_in_r_array_transp(ipoint,i) + enddo + + do h2 = 1, mo_num + tmp2( :,h2) = 0.d0 + tmp1(:,:,h2) = 0.d0 + if(mo_class(h2) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,h2) = mos_r_in_r_array_transp_tmp(ipoint,h2) * tmpval_1(ipoint) & + + int2_grad1_u12_bimo_t(ipoint,1,i,h2) * tmpvec_1(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,i,h2) * tmpvec_1(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,i,h2) * tmpvec_1(ipoint,3) + + tmp1(ipoint,1,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,h2) + tmp1(ipoint,2,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,h2) + tmp1(ipoint,3,h2) = tmpval_2(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,h2) + + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , mos_l_in_r_array_transp_tmp(1,1), n_points_final_grid & + , tmp2(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + + do p2 = 1, mo_num + tmp2( :,p2) = 0.d0 + tmp3(:,:,p2) = 0.d0 + if(mo_class(p2) .ne. "Active") cycle + + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,p2) = int2_grad1_u12_bimo_t(ipoint,1,p2,i) * tmpvec_2(ipoint,1) + int2_grad1_u12_bimo_t_tmp(ipoint,1,p2,h1) * tmpvec_3(ipoint,1) & + + int2_grad1_u12_bimo_t(ipoint,2,p2,i) * tmpvec_2(ipoint,2) + int2_grad1_u12_bimo_t_tmp(ipoint,2,p2,h1) * tmpvec_3(ipoint,2) & + + int2_grad1_u12_bimo_t(ipoint,3,p2,i) * tmpvec_2(ipoint,3) + int2_grad1_u12_bimo_t_tmp(ipoint,3,p2,h1) * tmpvec_3(ipoint,3) + + tmp3(ipoint,1,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,1,p2,h1) + tmp3(ipoint,2,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,2,p2,h1) + tmp3(ipoint,3,p2) = int2_grad1_u12_bimo_t_tmp(ipoint,3,p2,h1) + enddo + enddo + + call dgemm( 'T', 'N', mo_num, mo_num, n_points_final_grid, 0.5d0 & + , tmp2(1,1), n_points_final_grid & + , mos_r_in_r_array_transp_tmp(1,1), n_points_final_grid & + , 0.d0, tmp_2d(1,1), mo_num) + + call dgemm( 'T', 'N', mo_num, mo_num, 3*n_points_final_grid, 0.5d0 & + , tmp3(1,1,1), 3*n_points_final_grid & + , tmp1(1,1,1), 3*n_points_final_grid & + , 1.d0, tmp_2d(1,1), mo_num) + + do h2 = 1, mo_num + do p2 = 1, mo_num + tmp(p2,h2,p1,h1) = tmp(p2,h2,p1,h1) + tmp_2d(p2,h2) + enddo + enddo + + enddo ! p1 + enddo ! i + enddo ! h1 + !$OMP END DO + + deallocate(tmp_2d) + deallocate(tmp_3d) + deallocate(tmp1) + deallocate(tmp2) + deallocate(tmp3) + deallocate(tmpval_1) + deallocate(tmpval_2) + deallocate(tmpvec_1) + deallocate(tmpvec_2) + deallocate(tmpvec_3) + + !$OMP END PARALLEL + endif + + tmp = -0.5d0 * tmp + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (h1, h2, p1, p2) & + !$OMP SHARED (tmp, mo_num) + + !$OMP DO COLLAPSE(2) + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 1, mo_num + do p2 = p1, mo_num + tmp(p2,h2,p1,h1) -= tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO COLLAPSE(2) + do h1 = 1, mo_num + do h2 = 1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) = -tmp(p1,h2,p2,h1) + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do h1 = 1, mo_num-1 + do h2 = h1+1, mo_num + do p1 = 2, mo_num + do p2 = 1, p1-1 + tmp(p2,h2,p1,h1) *= -1.d0 + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print*,' Wall time for aaa_contraction_v0', wall1-wall0 + + normal_two_body_bi_orth_v0 += tmp + endif ! Ne(2) .ge. 3 + + deallocate(tmp) + deallocate(int2_grad1_u12_bimo_t_tmp, mos_l_in_r_array_transp_tmp, mos_r_in_r_array_transp_tmp) + + endif ! read_tc_norm_ord + + if(write_tc_norm_ord.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/normal_two_body_bi_orth', action="write") + call ezfio_set_work_empty(.False.) + write(11) normal_two_body_bi_orth_v0 + close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') + endif + + call wall_time(wallf) + print*,' Wall time for normal_two_body_bi_orth_v0 ', wallf-walli + +END_PROVIDER + +! --- + diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index 3301fcc2..d25a1f70 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -34,7 +34,8 @@ program tc_bi_ortho !call test_no_aab() !call test_no_aaa() - call test_no() + !call test_no() + call test_no_v0() end @@ -273,6 +274,52 @@ end ! --- +subroutine test_no_v0() + + implicit none + integer :: i, j, k, l + double precision :: accu, contrib, new, ref, thr, norm + + print*, ' test_no_v0 ...' + + thr = 1d-8 + + PROVIDE normal_two_body_bi_orth_v0 + PROVIDE normal_two_body_bi_orth + + accu = 0.d0 + norm = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = normal_two_body_bi_orth (l,k,j,i) + ref = normal_two_body_bi_orth_v0(l,k,j,i) + + contrib = dabs(new - ref) + if(contrib .gt. thr) then + print*, ' problem on normal_two_body_bi_orth' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + accu += contrib + norm += dabs(ref) + enddo + enddo + enddo + enddo + + print*, ' accu (%) = ', 100.d0*accu/norm + + return +end subroutine test_no + +! --- + + subroutine test_no() implicit none From a88730f10f90795e69253dd57255a234a69501ae Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Tue, 12 Sep 2023 11:57:38 +0200 Subject: [PATCH 279/337] NO-L0 V0 --- src/tc_bi_ortho/no_dressing_naive.irp.f | 92 +++++++++++++++++++++ src/tc_bi_ortho/no_dressing_v0.irp.f | 105 ++++++++++++++++++++++++ src/tc_bi_ortho/tc_utils.irp.f | 22 +++++ src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 26 +++++- 4 files changed, 243 insertions(+), 2 deletions(-) create mode 100644 src/tc_bi_ortho/no_dressing_naive.irp.f create mode 100644 src/tc_bi_ortho/no_dressing_v0.irp.f diff --git a/src/tc_bi_ortho/no_dressing_naive.irp.f b/src/tc_bi_ortho/no_dressing_naive.irp.f new file mode 100644 index 00000000..10c80cec --- /dev/null +++ b/src/tc_bi_ortho/no_dressing_naive.irp.f @@ -0,0 +1,92 @@ + +! --- + +BEGIN_PROVIDER [double precision, no_0_naive] + + implicit none + integer :: ii, jj, kk + integer :: i, j, k + double precision :: sigma_i, sigma_j, sigma_k + double precision :: tmp + double precision :: I_ijk_ijk, I_ijk_kij, I_ijk_jki, I_ijk_jik, I_ijk_kji, I_ijk_ikj + double precision :: t0, t1 + logical, external :: is_same_spin + + print*, " Providing no_0_naive ..." + call wall_time(t0) + + + tmp = 0.d0 + do ii = 1, elec_num + + if(ii .le. elec_beta_num) then + i = ii + sigma_i = -1.d0 + else + i = ii - elec_beta_num + sigma_i = +1.d0 + endif + + do jj = 1, elec_num + + if(jj .le. elec_beta_num) then + j = jj + sigma_j = -1.d0 + else + j = jj - elec_beta_num + sigma_j = +1.d0 + endif + + do kk = 1, elec_num + + if(kk .le. elec_beta_num) then + k = kk + sigma_k = -1.d0 + else + k = kk - elec_beta_num + sigma_k = +1.d0 + endif + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, j, sigma_j, k, sigma_k & + , i, sigma_i, j, sigma_j, k, sigma_k & + , I_ijk_ijk) + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, j, sigma_j, k, sigma_k & + , k, sigma_k, i, sigma_i, j, sigma_j & + , I_ijk_kij) + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, j, sigma_j, k, sigma_k & + , j, sigma_j, k, sigma_k, i, sigma_i & + , I_ijk_jki) + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, j, sigma_j, k, sigma_k & + , j, sigma_j, i, sigma_i, k, sigma_k & + , I_ijk_jik) + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, j, sigma_j, k, sigma_k & + , k, sigma_k, j, sigma_j, i, sigma_i & + , I_ijk_kji) + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, j, sigma_j, k, sigma_k & + , i, sigma_i, k, sigma_k, j, sigma_j & + , I_ijk_ikj) + + + tmp = tmp + I_ijk_ijk + I_ijk_kij + I_ijk_jki - I_ijk_jik - I_ijk_kji - I_ijk_ikj + !tmp = tmp + I_ijk_ijk + 2.d0 * I_ijk_kij - 3.d0 * I_ijk_jik + enddo + enddo + enddo + + no_0_naive = -1.d0 * (-tmp) / 6.d0 + + call wall_time(t1) + print*, " Wall time for no_0_naive (sec) = ", (t1 - t0)/60.d0 + + print*, " no_0_naive = ", no_0_naive + +END_PROVIDER + +! --- + + diff --git a/src/tc_bi_ortho/no_dressing_v0.irp.f b/src/tc_bi_ortho/no_dressing_v0.irp.f new file mode 100644 index 00000000..9a070dab --- /dev/null +++ b/src/tc_bi_ortho/no_dressing_v0.irp.f @@ -0,0 +1,105 @@ + +! --- + +BEGIN_PROVIDER [double precision, no_0_v0] + + implicit none + integer :: i, j, k + double precision :: tmp + double precision :: I_ijk_ijk, I_ijk_kij, I_ijk_jik, I_ijk_jki, I_ijk_ikj, I_ijk_kji + double precision :: t0, t1 + + call wall_time(t0) + print*, " Providing no_0_v0 ..." + + if(elec_alpha_num .eq. elec_beta_num) then + + tmp = 0.d0 + + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do k = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(i, j, k, i, j, k, I_ijk_ijk) + call give_integrals_3_body_bi_ort(i, j, k, k, i, j, I_ijk_kij) + call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) + + tmp = tmp + 4.d0 * (2.d0 * I_ijk_ijk + I_ijk_kij - 3.d0 * I_ijk_jik) + enddo + enddo + enddo + + no_0_v0 = -1.d0 * (-tmp) / 6.d0 + + else + + tmp = 0.d0 + + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do k = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(i, j, k, i, j, k, I_ijk_ijk) + call give_integrals_3_body_bi_ort(i, j, k, k, i, j, I_ijk_kij) + call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) + + tmp = tmp + 4.d0 * (2.d0 * I_ijk_ijk + I_ijk_kij - 3.d0 * I_ijk_jik) + enddo + enddo + enddo + + do i = elec_beta_num+1, elec_alpha_num + do j = elec_beta_num+1, elec_alpha_num + do k = elec_beta_num+1, elec_alpha_num + + call give_integrals_3_body_bi_ort(i, j, k, i, j, k, I_ijk_ijk) + call give_integrals_3_body_bi_ort(i, j, k, k, i, j, I_ijk_kij) + call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) + + tmp = tmp + I_ijk_ijk + 2.d0 * I_ijk_kij - 3.d0 * I_ijk_jik + enddo + enddo + enddo + + do i = elec_beta_num+1, elec_alpha_num + do j = 1, elec_beta_num + + do k = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(i, j, k, i, j, k, I_ijk_ijk) + call give_integrals_3_body_bi_ort(i, j, k, j, k, i, I_ijk_jki) + call give_integrals_3_body_bi_ort(i, j, k, i, k, j, I_ijk_ikj) + call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) + call give_integrals_3_body_bi_ort(i, j, k, k, j, i, I_ijk_kji) + + tmp = tmp + 6.d0 * (2.d0 * I_ijk_ijk + I_ijk_jki - I_ijk_ikj - I_ijk_jik - I_ijk_kji) + enddo + + do k = elec_beta_num+1, elec_alpha_num + + call give_integrals_3_body_bi_ort(i, j, k, i, j, k, I_ijk_ijk) + call give_integrals_3_body_bi_ort(i, j, k, j, k, i, I_ijk_jki) + call give_integrals_3_body_bi_ort(i, j, k, i, k, j, I_ijk_ikj) + call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) + call give_integrals_3_body_bi_ort(i, j, k, k, j, i, I_ijk_kji) + + tmp = tmp + 3.d0 * (2.d0 * I_ijk_ijk + 2.d0 * I_ijk_jki - I_ijk_ikj - I_ijk_jik - 2.d0 * I_ijk_kji) + enddo + + enddo + enddo + + no_0_v0 = -1.d0 * (-tmp) / 6.d0 + + endif + + call wall_time(t1) + print*, " Wall time for no_0_v0 (sec) = ", (t1 - t0)/60.d0 + + print*, " no_0_v0 = ", no_0_v0 + +END_PROVIDER + +! --- + + diff --git a/src/tc_bi_ortho/tc_utils.irp.f b/src/tc_bi_ortho/tc_utils.irp.f index 53fe5884..ba5ffff9 100644 --- a/src/tc_bi_ortho/tc_utils.irp.f +++ b/src/tc_bi_ortho/tc_utils.irp.f @@ -66,3 +66,25 @@ end ! --- + +logical function is_same_spin(sigma_1, sigma_2) + + BEGIN_DOC + ! + ! true if sgn(sigma_1) = sgn(sigma_2) + ! + END_DOC + + implicit none + double precision, intent(in) :: sigma_1, sigma_2 + + if((sigma_1 * sigma_2) .gt. 0.d0) then + is_same_spin = .true. + else + is_same_spin = .false. + endif + +end function is_same_spin + +! --- + diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index d25a1f70..226854ed 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -35,7 +35,9 @@ program tc_bi_ortho !call test_no_aaa() !call test_no() - call test_no_v0() + !call test_no_v0() + + call test_no_0() end @@ -315,7 +317,7 @@ subroutine test_no_v0() print*, ' accu (%) = ', 100.d0*accu/norm return -end subroutine test_no +end subroutine test_no_0 ! --- @@ -498,3 +500,23 @@ end ! --- +subroutine test_no_0() + + implicit none + double precision :: accu, norm + + print*, ' testing test_no_0 ...' + + PROVIDE no_0_naive + PROVIDE no_0_v0 + + accu = dabs(no_0_naive - no_0_v0) + norm = dabs(no_0_naive) + + print*, ' accu (%) = ', 100.d0*accu/norm + + return +end + +! --- + From e288c40d8b117b17b87db4f75e1aff902a7fa0c7 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Tue, 12 Sep 2023 16:52:58 +0200 Subject: [PATCH 280/337] NO: L1 & L2 added and tested --- src/bi_ort_ints/three_body_ints_bi_ort.irp.f | 54 +++ src/tc_bi_ortho/no_dressing_naive.irp.f | 450 ++++++++++++++++++- src/tc_bi_ortho/no_dressing_v0.irp.f | 277 +++++++++++- src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 89 +++- 4 files changed, 834 insertions(+), 36 deletions(-) diff --git a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f index d8145c3e..cb5c08cf 100644 --- a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f +++ b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f @@ -68,6 +68,60 @@ END_PROVIDER ! --- +subroutine give_integrals_3_body_bi_ort_spin( n, sigma_n, l, sigma_l, k, sigma_k & + , m, sigma_m, j, sigma_j, i, sigma_i & + , integral) + + BEGIN_DOC + ! + ! < n l k | -L | m j i > with a BI-ORTHONORMAL SPIN-ORBITALS + ! + END_DOC + + implicit none + integer, intent(in) :: n, l, k, m, j, i + double precision, intent(in) :: sigma_n, sigma_l, sigma_k, sigma_m, sigma_j, sigma_i + double precision, intent(out) :: integral + integer :: ipoint + double precision :: weight, tmp + logical, external :: is_same_spin + + integral = 0.d0 + + if( is_same_spin(sigma_n, sigma_m) .and. & + is_same_spin(sigma_l, sigma_j) .and. & + is_same_spin(sigma_k, sigma_i) ) then + + PROVIDE mo_l_coef mo_r_coef + PROVIDE int2_grad1_u12_bimo_t + + do ipoint = 1, n_points_final_grid + + tmp = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & + * ( int2_grad1_u12_bimo_t(ipoint,1,n,m) * int2_grad1_u12_bimo_t(ipoint,1,l,j) & + + int2_grad1_u12_bimo_t(ipoint,2,n,m) * int2_grad1_u12_bimo_t(ipoint,2,l,j) & + + int2_grad1_u12_bimo_t(ipoint,3,n,m) * int2_grad1_u12_bimo_t(ipoint,3,l,j) ) + + tmp = tmp + mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & + * ( int2_grad1_u12_bimo_t(ipoint,1,n,m) * int2_grad1_u12_bimo_t(ipoint,1,k,i) & + + int2_grad1_u12_bimo_t(ipoint,2,n,m) * int2_grad1_u12_bimo_t(ipoint,2,k,i) & + + int2_grad1_u12_bimo_t(ipoint,3,n,m) * int2_grad1_u12_bimo_t(ipoint,3,k,i) ) + + tmp = tmp + mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,m) & + * ( int2_grad1_u12_bimo_t(ipoint,1,l,j) * int2_grad1_u12_bimo_t(ipoint,1,k,i) & + + int2_grad1_u12_bimo_t(ipoint,2,l,j) * int2_grad1_u12_bimo_t(ipoint,2,k,i) & + + int2_grad1_u12_bimo_t(ipoint,3,l,j) * int2_grad1_u12_bimo_t(ipoint,3,k,i) ) + + integral = integral + tmp * final_weight_at_r_vector(ipoint) + enddo + + endif + + return +end subroutine give_integrals_3_body_bi_ort_spin + +! --- + subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral) BEGIN_DOC diff --git a/src/tc_bi_ortho/no_dressing_naive.irp.f b/src/tc_bi_ortho/no_dressing_naive.irp.f index 10c80cec..a57b1723 100644 --- a/src/tc_bi_ortho/no_dressing_naive.irp.f +++ b/src/tc_bi_ortho/no_dressing_naive.irp.f @@ -4,19 +4,26 @@ BEGIN_PROVIDER [double precision, no_0_naive] implicit none - integer :: ii, jj, kk - integer :: i, j, k - double precision :: sigma_i, sigma_j, sigma_k - double precision :: tmp - double precision :: I_ijk_ijk, I_ijk_kij, I_ijk_jki, I_ijk_jik, I_ijk_kji, I_ijk_ikj - double precision :: t0, t1 - logical, external :: is_same_spin + integer :: ii, jj, kk + integer :: i, j, k + double precision :: sigma_i, sigma_j, sigma_k + double precision :: I_ijk_ijk, I_ijk_kij, I_ijk_jki, I_ijk_jik, I_ijk_kji, I_ijk_ikj + double precision :: t0, t1 + double precision, allocatable :: tmp(:) print*, " Providing no_0_naive ..." call wall_time(t0) + allocate(tmp(elec_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ii, i, sigma_i, jj, j, sigma_j, kk, k, sigma_k, & + !$OMP I_ijk_ijk, I_ijk_kij, I_ijk_jki, I_ijk_jik, & + !$OMP I_ijk_kji, I_ijk_ikj) & + !$OMP SHARED (elec_beta_num, elec_num, tmp) + !$OMP DO - tmp = 0.d0 do ii = 1, elec_num if(ii .le. elec_beta_num) then @@ -27,6 +34,8 @@ BEGIN_PROVIDER [double precision, no_0_naive] sigma_i = +1.d0 endif + tmp(ii) = 0.d0 + do jj = 1, elec_num if(jj .le. elec_beta_num) then @@ -72,16 +81,20 @@ BEGIN_PROVIDER [double precision, no_0_naive] , I_ijk_ikj) - tmp = tmp + I_ijk_ijk + I_ijk_kij + I_ijk_jki - I_ijk_jik - I_ijk_kji - I_ijk_ikj - !tmp = tmp + I_ijk_ijk + 2.d0 * I_ijk_kij - 3.d0 * I_ijk_jik + tmp(ii) = tmp(ii) + I_ijk_ijk + I_ijk_kij + I_ijk_jki - I_ijk_jik - I_ijk_kji - I_ijk_ikj + ! = tmp(ii) + I_ijk_ijk + 2.d0 * I_ijk_kij - 3.d0 * I_ijk_jik enddo enddo enddo + !$OMP END DO + !$OMP END PARALLEL - no_0_naive = -1.d0 * (-tmp) / 6.d0 + no_0_naive = -1.d0 * (-sum(tmp)) / 6.d0 + + deallocate(tmp) call wall_time(t1) - print*, " Wall time for no_0_naive (sec) = ", (t1 - t0)/60.d0 + print*, " Wall time for no_0_naive (min) = ", (t1 - t0)/60.d0 print*, " no_0_naive = ", no_0_naive @@ -89,4 +102,417 @@ END_PROVIDER ! --- +BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] + + BEGIN_DOC + ! + ! < p | H(1) | s > is dressed with no_1_naive(p,s) + ! + END_DOC + + implicit none + integer :: ii, jj + integer :: i, j, p, s + double precision :: sigma_i, sigma_j, sigma_p, sigma_s + double precision :: I_pij_sji, I_pij_sij, I_pij_jis, I_pij_ijs, I_pij_isj, I_pij_jsi + double precision :: t0, t1 + + print*, " Providing no_1_naive ..." + call wall_time(t0) + + ! ---- + ! up-up part + + sigma_p = +1.d0 + sigma_s = +1.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ii, i, sigma_i, jj, j, sigma_j, & + !$OMP I_pij_sji, I_pij_sij, I_pij_jis, & + !$OMP I_pij_ijs, I_pij_isj, I_pij_jsi ) & + !$OMP SHARED (mo_num, elec_beta_num, elec_num, & + !$OMP sigma_p, sigma_s, no_1_naive) + + !$OMP DO COLLAPSE (2) + + do s = 1, mo_num + do p = 1, mo_num + + no_1_naive(p,s) = 0.d0 + do ii = 1, elec_num + if(ii .le. elec_beta_num) then + i = ii + sigma_i = -1.d0 + else + i = ii - elec_beta_num + sigma_i = +1.d0 + endif + + do jj = 1, elec_num + if(jj .le. elec_beta_num) then + j = jj + sigma_j = -1.d0 + else + j = jj - elec_beta_num + sigma_j = +1d0 + endif + + call give_integrals_3_body_bi_ort_spin( p, sigma_p, i, sigma_i, j, sigma_j & + , s, sigma_s, j, sigma_j, i, sigma_i & + , I_pij_sji) + + call give_integrals_3_body_bi_ort_spin( p, sigma_p, i, sigma_i, j, sigma_j & + , s, sigma_s, i, sigma_i, j, sigma_j & + , I_pij_sij) + + call give_integrals_3_body_bi_ort_spin( p, sigma_p, i, sigma_i, j, sigma_j & + , j, sigma_j, i, sigma_i, s, sigma_s & + , I_pij_jis) + + call give_integrals_3_body_bi_ort_spin( p, sigma_p, i, sigma_i, j, sigma_j & + , i, sigma_i, j, sigma_j, s, sigma_s & + , I_pij_ijs) + + call give_integrals_3_body_bi_ort_spin( p, sigma_p, i, sigma_i, j, sigma_j & + , i, sigma_i, s, sigma_s, j, sigma_j & + , I_pij_isj) + + call give_integrals_3_body_bi_ort_spin( p, sigma_p, i, sigma_i, j, sigma_j & + , j, sigma_j, s, sigma_s, i, sigma_i & + , I_pij_jsi) + + ! x (-1) because integrals are over -L + ! x 0.5 because we consider 0.5 (up + down) + no_1_naive(p,s) = no_1_naive(p,s) + 0.25d0 * (I_pij_sji - I_pij_sij + I_pij_jis - I_pij_ijs + I_pij_isj - I_pij_jsi) + enddo ! j + enddo ! i + enddo ! s + enddo ! p + !$OMP END DO + !$OMP END PARALLEL + + + ! ---- + ! down-down part + + sigma_p = -1.d0 + sigma_s = -1.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ii, i, sigma_i, jj, j, sigma_j, & + !$OMP I_pij_sji, I_pij_sij, I_pij_jis, & + !$OMP I_pij_ijs, I_pij_isj, I_pij_jsi ) & + !$OMP SHARED (mo_num, elec_beta_num, elec_num, & + !$OMP sigma_p, sigma_s, no_1_naive) + + !$OMP DO COLLAPSE (2) + + do s = 1, mo_num + do p = 1, mo_num + + do ii = 1, elec_num + if(ii .le. elec_beta_num) then + i = ii + sigma_i = -1.d0 + else + i = ii - elec_beta_num + sigma_i = +1.d0 + endif + + do jj = 1, elec_num + if(jj .le. elec_beta_num) then + j = jj + sigma_j = -1.d0 + else + j = jj - elec_beta_num + sigma_j = +1d0 + endif + + call give_integrals_3_body_bi_ort_spin( p, sigma_p, i, sigma_i, j, sigma_j & + , s, sigma_s, j, sigma_j, i, sigma_i & + , I_pij_sji) + + call give_integrals_3_body_bi_ort_spin( p, sigma_p, i, sigma_i, j, sigma_j & + , s, sigma_s, i, sigma_i, j, sigma_j & + , I_pij_sij) + + call give_integrals_3_body_bi_ort_spin( p, sigma_p, i, sigma_i, j, sigma_j & + , j, sigma_j, i, sigma_i, s, sigma_s & + , I_pij_jis) + + call give_integrals_3_body_bi_ort_spin( p, sigma_p, i, sigma_i, j, sigma_j & + , i, sigma_i, j, sigma_j, s, sigma_s & + , I_pij_ijs) + + call give_integrals_3_body_bi_ort_spin( p, sigma_p, i, sigma_i, j, sigma_j & + , i, sigma_i, s, sigma_s, j, sigma_j & + , I_pij_isj) + + call give_integrals_3_body_bi_ort_spin( p, sigma_p, i, sigma_i, j, sigma_j & + , j, sigma_j, s, sigma_s, i, sigma_i & + , I_pij_jsi) + + ! x (-1) because integrals are over -L + ! x 0.5 because we consider 0.5 (up + down) + no_1_naive(p,s) = no_1_naive(p,s) + 0.25d0 * (I_pij_sji - I_pij_sij + I_pij_jis - I_pij_ijs + I_pij_isj - I_pij_jsi) + enddo ! j + enddo ! i + enddo ! s + enddo ! p + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + call wall_time(t1) + print*, " Wall time for no_1_naive (min) = ", (t1 - t0)/60.d0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! < p q | H(2) | s t > is dressed with no_2_naive(p,q,s,t) + ! + END_DOC + + implicit none + integer :: ii + integer :: i, p, q, s, t + double precision :: sigma_i, sigma_p, sigma_q, sigma_s, sigma_t + double precision :: I_ipq_ist, I_ipq_sit, I_ipq_tsi + double precision :: t0, t1 + + print*, " Providing no_2_naive ..." + call wall_time(t0) + + ! ---- + ! up-up & up-up part + + sigma_p = +1.d0 + sigma_s = +1.d0 + sigma_q = +1.d0 + sigma_t = +1.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ii, i, sigma_i, p, q, s, t, & + !$OMP I_ipq_ist, I_ipq_sit, I_ipq_tsi) & + !$OMP SHARED (mo_num, elec_beta_num, elec_num, & + !$OMP sigma_p, sigma_q, sigma_s, sigma_t, & + !$OMP no_2_naive) + + !$OMP DO COLLAPSE (4) + do t = 1, mo_num + do s = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + no_2_naive(p,q,s,t) = 0.d0 + do ii = 1, elec_num + if(ii .le. elec_beta_num) then + i = ii + sigma_i = -1.d0 + else + i = ii - elec_beta_num + sigma_i = +1.d0 + endif + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, p, sigma_p, q, sigma_q & + , i, sigma_i, s, sigma_s, t, sigma_t & + , I_ipq_ist) + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, p, sigma_p, q, sigma_q & + , s, sigma_s, i, sigma_i, t, sigma_t & + , I_ipq_sit) + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, p, sigma_p, q, sigma_q & + , t, sigma_t, s, sigma_s, i, sigma_i & + , I_ipq_tsi) + + ! x (-1) because integrals are over -L + ! x 0.25 because we consider 0.25 (up-up + up-down + down-up + down-down) + no_2_naive(p,q,s,t) = no_2_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) + enddo ! i + enddo ! p + enddo ! q + enddo ! s + enddo ! t + !$OMP END DO + !$OMP END PARALLEL + + ! ---- + ! up-up & down-down part + + sigma_p = +1.d0 + sigma_s = +1.d0 + sigma_q = -1.d0 + sigma_t = -1.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ii, i, sigma_i, p, q, s, t, & + !$OMP I_ipq_ist, I_ipq_sit, I_ipq_tsi) & + !$OMP SHARED (mo_num, elec_beta_num, elec_num, & + !$OMP sigma_p, sigma_q, sigma_s, sigma_t, & + !$OMP no_2_naive) + + !$OMP DO COLLAPSE (4) + do t = 1, mo_num + do s = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + do ii = 1, elec_num + if(ii .le. elec_beta_num) then + i = ii + sigma_i = -1.d0 + else + i = ii - elec_beta_num + sigma_i = +1.d0 + endif + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, p, sigma_p, q, sigma_q & + , i, sigma_i, s, sigma_s, t, sigma_t & + , I_ipq_ist) + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, p, sigma_p, q, sigma_q & + , s, sigma_s, i, sigma_i, t, sigma_t & + , I_ipq_sit) + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, p, sigma_p, q, sigma_q & + , t, sigma_t, s, sigma_s, i, sigma_i & + , I_ipq_tsi) + + ! x (-1) because integrals are over -L + ! x 0.25 because we consider 0.25 (up-up + up-down + down-up + down-down) + no_2_naive(p,q,s,t) = no_2_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) + enddo ! i + enddo ! p + enddo ! q + enddo ! s + enddo ! t + !$OMP END DO + !$OMP END PARALLEL + + ! ---- + ! down-down & up-up part + + sigma_p = -1.d0 + sigma_s = -1.d0 + sigma_q = +1.d0 + sigma_t = +1.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ii, i, sigma_i, p, q, s, t, & + !$OMP I_ipq_ist, I_ipq_sit, I_ipq_tsi) & + !$OMP SHARED (mo_num, elec_beta_num, elec_num, & + !$OMP sigma_p, sigma_q, sigma_s, sigma_t, & + !$OMP no_2_naive) + + !$OMP DO COLLAPSE (4) + do t = 1, mo_num + do s = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + do ii = 1, elec_num + if(ii .le. elec_beta_num) then + i = ii + sigma_i = -1.d0 + else + i = ii - elec_beta_num + sigma_i = +1.d0 + endif + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, p, sigma_p, q, sigma_q & + , i, sigma_i, s, sigma_s, t, sigma_t & + , I_ipq_ist) + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, p, sigma_p, q, sigma_q & + , s, sigma_s, i, sigma_i, t, sigma_t & + , I_ipq_sit) + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, p, sigma_p, q, sigma_q & + , t, sigma_t, s, sigma_s, i, sigma_i & + , I_ipq_tsi) + + ! x (-1) because integrals are over -L + ! x 0.25 because we consider 0.25 (up-up + up-down + down-up + down-down) + no_2_naive(p,q,s,t) = no_2_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) + enddo ! i + enddo ! p + enddo ! q + enddo ! s + enddo ! t + !$OMP END DO + !$OMP END PARALLEL + + ! ---- + ! down-down & down-down part + + sigma_p = -1.d0 + sigma_s = -1.d0 + sigma_q = -1.d0 + sigma_t = -1.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ii, i, sigma_i, p, q, s, t, & + !$OMP I_ipq_ist, I_ipq_sit, I_ipq_tsi) & + !$OMP SHARED (mo_num, elec_beta_num, elec_num, & + !$OMP sigma_p, sigma_q, sigma_s, sigma_t, & + !$OMP no_2_naive) + + !$OMP DO COLLAPSE (4) + do t = 1, mo_num + do s = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + do ii = 1, elec_num + if(ii .le. elec_beta_num) then + i = ii + sigma_i = -1.d0 + else + i = ii - elec_beta_num + sigma_i = +1.d0 + endif + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, p, sigma_p, q, sigma_q & + , i, sigma_i, s, sigma_s, t, sigma_t & + , I_ipq_ist) + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, p, sigma_p, q, sigma_q & + , s, sigma_s, i, sigma_i, t, sigma_t & + , I_ipq_sit) + + call give_integrals_3_body_bi_ort_spin( i, sigma_i, p, sigma_p, q, sigma_q & + , t, sigma_t, s, sigma_s, i, sigma_i & + , I_ipq_tsi) + + ! x (-1) because integrals are over -L + ! x 0.25 because we consider 0.25 (up-up + up-down + down-up + down-down) + no_2_naive(p,q,s,t) = no_2_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) + enddo ! i + enddo ! p + enddo ! q + enddo ! s + enddo ! t + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(t1) + print*, " Wall time for no_2_naive (min) = ", (t1 - t0)/60.d0 + +END_PROVIDER + +! --- + diff --git a/src/tc_bi_ortho/no_dressing_v0.irp.f b/src/tc_bi_ortho/no_dressing_v0.irp.f index 9a070dab..4d40c76f 100644 --- a/src/tc_bi_ortho/no_dressing_v0.irp.f +++ b/src/tc_bi_ortho/no_dressing_v0.irp.f @@ -4,19 +4,28 @@ BEGIN_PROVIDER [double precision, no_0_v0] implicit none - integer :: i, j, k - double precision :: tmp - double precision :: I_ijk_ijk, I_ijk_kij, I_ijk_jik, I_ijk_jki, I_ijk_ikj, I_ijk_kji - double precision :: t0, t1 + integer :: i, j, k + double precision :: I_ijk_ijk, I_ijk_kij, I_ijk_jik, I_ijk_jki, I_ijk_ikj, I_ijk_kji + double precision :: t0, t1 + double precision, allocatable :: tmp(:) call wall_time(t0) print*, " Providing no_0_v0 ..." if(elec_alpha_num .eq. elec_beta_num) then - tmp = 0.d0 + allocate(tmp(elec_beta_num)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, k, & + !$OMP I_ijk_ijk, I_ijk_kij, I_ijk_jik) & + !$OMP SHARED (elec_beta_num, tmp) + + !$OMP DO do i = 1, elec_beta_num + + tmp(i) = 0.d0 do j = 1, elec_beta_num do k = 1, elec_beta_num @@ -24,18 +33,32 @@ BEGIN_PROVIDER [double precision, no_0_v0] call give_integrals_3_body_bi_ort(i, j, k, k, i, j, I_ijk_kij) call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) - tmp = tmp + 4.d0 * (2.d0 * I_ijk_ijk + I_ijk_kij - 3.d0 * I_ijk_jik) + tmp(i) = tmp(i) + 4.d0 * (2.d0 * I_ijk_ijk + I_ijk_kij - 3.d0 * I_ijk_jik) enddo enddo enddo + !$OMP END DO + !$OMP END PARALLEL - no_0_v0 = -1.d0 * (-tmp) / 6.d0 + no_0_v0 = -1.d0 * (-sum(tmp)) / 6.d0 + + deallocate(tmp) else - tmp = 0.d0 + allocate(tmp(elec_alpha_num)) + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, k, & + !$OMP I_ijk_ijk, I_ijk_kij, I_ijk_jik, & + !$OMP I_ijk_jki, I_ijk_ikj, I_ijk_kji) & + !$OMP SHARED (elec_beta_num, elec_alpha_num, tmp) + + !$OMP DO do i = 1, elec_beta_num + + tmp(i) = 0.d0 do j = 1, elec_beta_num do k = 1, elec_beta_num @@ -43,12 +66,16 @@ BEGIN_PROVIDER [double precision, no_0_v0] call give_integrals_3_body_bi_ort(i, j, k, k, i, j, I_ijk_kij) call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) - tmp = tmp + 4.d0 * (2.d0 * I_ijk_ijk + I_ijk_kij - 3.d0 * I_ijk_jik) + tmp(i) = tmp(i) + 4.d0 * (2.d0 * I_ijk_ijk + I_ijk_kij - 3.d0 * I_ijk_jik) enddo enddo enddo + !$OMP END DO + !$OMP DO do i = elec_beta_num+1, elec_alpha_num + + tmp(i) = 0.d0 do j = elec_beta_num+1, elec_alpha_num do k = elec_beta_num+1, elec_alpha_num @@ -56,14 +83,11 @@ BEGIN_PROVIDER [double precision, no_0_v0] call give_integrals_3_body_bi_ort(i, j, k, k, i, j, I_ijk_kij) call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) - tmp = tmp + I_ijk_ijk + 2.d0 * I_ijk_kij - 3.d0 * I_ijk_jik - enddo - enddo - enddo + tmp(i) = tmp(i) + I_ijk_ijk + 2.d0 * I_ijk_kij - 3.d0 * I_ijk_jik + enddo ! k + enddo ! j - do i = elec_beta_num+1, elec_alpha_num do j = 1, elec_beta_num - do k = 1, elec_beta_num call give_integrals_3_body_bi_ort(i, j, k, i, j, k, I_ijk_ijk) @@ -72,8 +96,8 @@ BEGIN_PROVIDER [double precision, no_0_v0] call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) call give_integrals_3_body_bi_ort(i, j, k, k, j, i, I_ijk_kji) - tmp = tmp + 6.d0 * (2.d0 * I_ijk_ijk + I_ijk_jki - I_ijk_ikj - I_ijk_jik - I_ijk_kji) - enddo + tmp(i) = tmp(i) + 6.d0 * (2.d0 * I_ijk_ijk + I_ijk_jki - I_ijk_ikj - I_ijk_jik - I_ijk_kji) + enddo ! k do k = elec_beta_num+1, elec_alpha_num @@ -83,18 +107,21 @@ BEGIN_PROVIDER [double precision, no_0_v0] call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) call give_integrals_3_body_bi_ort(i, j, k, k, j, i, I_ijk_kji) - tmp = tmp + 3.d0 * (2.d0 * I_ijk_ijk + 2.d0 * I_ijk_jki - I_ijk_ikj - I_ijk_jik - 2.d0 * I_ijk_kji) - enddo + tmp(i) = tmp(i) + 3.d0 * (2.d0 * I_ijk_ijk + 2.d0 * I_ijk_jki - I_ijk_ikj - I_ijk_jik - 2.d0 * I_ijk_kji) + enddo ! k + enddo ! j + enddo ! i + !$OMP END DO + !$OMP END PARALLEL - enddo - enddo + no_0_v0 = -1.d0 * (-sum(tmp)) / 6.d0 - no_0_v0 = -1.d0 * (-tmp) / 6.d0 + deallocate(tmp) endif call wall_time(t1) - print*, " Wall time for no_0_v0 (sec) = ", (t1 - t0)/60.d0 + print*, " Wall time for no_0_v0 (min) = ", (t1 - t0)/60.d0 print*, " no_0_v0 = ", no_0_v0 @@ -102,4 +129,208 @@ END_PROVIDER ! --- +BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] + + BEGIN_DOC + ! + ! x (-1) because integrals are over -L + ! + END_DOC + + implicit none + integer :: p, s, i, j + double precision :: I_pij_sij, I_pij_isj, I_pij_ijs, I_pij_sji, I_pij_jsi, I_pij_jis + double precision :: t0, t1 + + call wall_time(t0) + print*, " Providing no_1_v0 ..." + + if(elec_alpha_num .eq. elec_beta_num) then + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p, s, i, j, & + !$OMP I_pij_sij, I_pij_isj, I_pij_ijs, & + !$OMP I_pij_sji) & + !$OMP SHARED (mo_num, elec_beta_num, no_1_v0) + + !$OMP DO COLLAPSE(2) + do s = 1, mo_num + do p = 1, mo_num + + no_1_v0(p,s) = 0.d0 + do i = 1, elec_beta_num + do j = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(p, i, j, s, i, j, I_pij_sij) + call give_integrals_3_body_bi_ort(p, i, j, i, s, j, I_pij_isj) + call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) + call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) + + no_1_v0(p,s) = no_1_v0(p,s) - (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + else + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p, s, i, j, & + !$OMP I_pij_sij, I_pij_isj, I_pij_ijs, & + !$OMP I_pij_sji, I_pij_jsi, I_pij_jis) & + !$OMP SHARED (mo_num, elec_beta_num, elec_alpha_num, no_1_v0) + + !$OMP DO COLLAPSE(2) + do s = 1, mo_num + do p = 1, mo_num + + no_1_v0(p,s) = 0.d0 + do i = 1, elec_beta_num + do j = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(p, i, j, s, i, j, I_pij_sij) + call give_integrals_3_body_bi_ort(p, i, j, i, s, j, I_pij_isj) + call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) + call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) + + no_1_v0(p,s) = no_1_v0(p,s) - (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) + enddo ! j + enddo ! i + + do i = elec_beta_num+1, elec_alpha_num + do j = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) + call give_integrals_3_body_bi_ort(p, i, j, j, s, i, I_pij_jsi) + call give_integrals_3_body_bi_ort(p, i, j, j, i, s, I_pij_jis) + call give_integrals_3_body_bi_ort(p, i, j, s, i, j, I_pij_sij) + call give_integrals_3_body_bi_ort(p, i, j, i, s, j, I_pij_isj) + call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) + + no_1_v0(p,s) = no_1_v0(p,s) + 0.5d0 * (2.d0*I_pij_sji - I_pij_jsi + 2.d0*I_pij_jis - 4.d0*I_pij_sij + 2.d0*I_pij_isj - I_pij_ijs) + enddo ! j + + do j = elec_beta_num+1, elec_alpha_num + + call give_integrals_3_body_bi_ort(p, i, j, s, i, j, I_pij_sij) + call give_integrals_3_body_bi_ort(p, i, j, i, s, j, I_pij_isj) + call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) + call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) + + no_1_v0(p,s) = no_1_v0(p,s) - 0.5d0 * (I_pij_sij - I_pij_isj + I_pij_ijs - I_pij_sji) + enddo ! j + enddo ! i + + enddo ! p + enddo ! s + !$OMP END DO + !$OMP END PARALLEL + + endif + + call wall_time(t1) + print*, " Wall time for no_1_v0 (min) = ", (t1 - t0)/60.d0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! x (-1) because integrals are over -L + ! + END_DOC + + implicit none + integer :: p, q, s, t, i + double precision :: I_ipq_sit, I_ipq_tsi, I_ipq_ist + double precision :: t0, t1 + + call wall_time(t0) + print*, " Providing no_2_v0 ..." + + if(elec_alpha_num .eq. elec_beta_num) then + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p, q, s, t, i, & + !$OMP I_ipq_sit, I_ipq_tsi, I_ipq_ist) & + !$OMP SHARED (mo_num, elec_beta_num, no_2_v0) + + !$OMP DO COLLAPSE(4) + do t = 1, mo_num + do s = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + no_2_v0(p,q,s,t) = 0.d0 + do i = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(i, p, q, s, i, t, I_ipq_sit) + call give_integrals_3_body_bi_ort(i, p, q, t, s, i, I_ipq_tsi) + call give_integrals_3_body_bi_ort(i, p, q, i, s, t, I_ipq_ist) + + no_2_v0(p,q,s,t) = no_2_v0(p,q,s,t) - 0.5d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + else + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p, q, s, t, i, & + !$OMP I_ipq_sit, I_ipq_tsi, I_ipq_ist) & + !$OMP SHARED (mo_num, elec_beta_num, elec_alpha_num, no_2_v0) + + !$OMP DO COLLAPSE(4) + do t = 1, mo_num + do s = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + no_2_v0(p,q,s,t) = 0.d0 + do i = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(i, p, q, s, i, t, I_ipq_sit) + call give_integrals_3_body_bi_ort(i, p, q, t, s, i, I_ipq_tsi) + call give_integrals_3_body_bi_ort(i, p, q, i, s, t, I_ipq_ist) + + no_2_v0(p,q,s,t) = no_2_v0(p,q,s,t) - 0.5d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) + enddo ! i + + do i = elec_beta_num+1, elec_alpha_num + + call give_integrals_3_body_bi_ort(i, p, q, s, i, t, I_ipq_sit) + call give_integrals_3_body_bi_ort(i, p, q, t, s, i, I_ipq_tsi) + call give_integrals_3_body_bi_ort(i, p, q, i, s, t, I_ipq_ist) + + no_2_v0(p,q,s,t) = no_2_v0(p,q,s,t) - 0.25d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) + enddo ! i + + enddo ! p + enddo ! q + enddo ! s + enddo ! t + !$OMP END DO + !$OMP END PARALLEL + + endif + + call wall_time(t1) + print*, " Wall time for no_2_v0 (min) = ", (t1 - t0)/60.d0 + +END_PROVIDER + +! --- diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index 226854ed..9cbf7748 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -38,6 +38,8 @@ program tc_bi_ortho !call test_no_v0() call test_no_0() + call test_no_1() + call test_no_2() end @@ -505,7 +507,7 @@ subroutine test_no_0() implicit none double precision :: accu, norm - print*, ' testing test_no_0 ...' + print*, ' testing no_0 ...' PROVIDE no_0_naive PROVIDE no_0_v0 @@ -520,3 +522,88 @@ end ! --- +subroutine test_no_1() + + implicit none + integer :: i, j + double precision :: accu, contrib, new, ref, thr, norm + + print*, ' testing no_1 ...' + + PROVIDE no_1_naive + PROVIDE no_1_v0 + + thr = 1d-8 + + accu = 0.d0 + norm = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + + new = no_1_v0 (j,i) + ref = no_1_naive(j,i) + contrib = dabs(new - ref) + if(contrib .gt. thr) then + print*, ' problem on no_aaa_contraction' + print*, j, i + print*, ref, new, contrib + stop + endif + + accu += contrib + norm += dabs(ref) + enddo + enddo + + print*, ' accu (%) = ', 100.d0*accu/norm + + return +end + +! --- + +subroutine test_no_2() + + implicit none + integer :: i, j, k, l + double precision :: accu, contrib, new, ref, thr, norm + + print*, ' testing no_2 ...' + + PROVIDE no_2_naive + PROVIDE no_2_v0 + + thr = 1d-8 + + accu = 0.d0 + norm = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = no_2_v0 (l,k,j,i) + ref = no_2_naive(l,k,j,i) + contrib = dabs(new - ref) + if(contrib .gt. thr) then + print*, ' problem on no_aaa_contraction' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + accu += contrib + norm += dabs(ref) + enddo + enddo + enddo + enddo + + print*, ' accu (%) = ', 100.d0*accu/norm + + return +end + +! --- + + From 19b9976d134ceee49dc3afe180d3de8ae0dede96 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Tue, 12 Sep 2023 20:43:54 +0200 Subject: [PATCH 281/337] noL dredding --- src/bi_ort_ints/one_e_bi_ort.irp.f | 7 ++- src/bi_ort_ints/total_twoe_pot.irp.f | 6 +++ src/tc_bi_ortho/no_dressing_naive.irp.f | 54 ++++++++++++------------ src/tc_bi_ortho/no_dressing_v0.irp.f | 54 ++++++++++++------------ src/tc_bi_ortho/slater_tc_opt.irp.f | 6 ++- src/tc_bi_ortho/slater_tc_opt_diag.irp.f | 10 +++++ src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 24 +++++------ src/tc_keywords/EZFIO.cfg | 6 +++ 8 files changed, 98 insertions(+), 69 deletions(-) diff --git a/src/bi_ort_ints/one_e_bi_ort.irp.f b/src/bi_ort_ints/one_e_bi_ort.irp.f index 49181182..7c2ac860 100644 --- a/src/bi_ort_ints/one_e_bi_ort.irp.f +++ b/src/bi_ort_ints/one_e_bi_ort.irp.f @@ -29,7 +29,7 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_one_e, (mo_num, mo_num)] +BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_one_e, (mo_num, mo_num)] BEGIN_DOC ! @@ -41,6 +41,11 @@ BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_one_e, (mo_num, mo_num)] call ao_to_mo_bi_ortho(ao_one_e_integrals_tc_tot, ao_num, mo_bi_ortho_tc_one_e, mo_num) + if(noL_standard) then + PROVIDE noL_1e + mo_bi_ortho_tc_one_e = mo_bi_ortho_tc_one_e + noL_1e + endif + END_PROVIDER ! --- diff --git a/src/bi_ort_ints/total_twoe_pot.irp.f b/src/bi_ort_ints/total_twoe_pot.irp.f index f03e8a34..49f613b5 100644 --- a/src/bi_ort_ints/total_twoe_pot.irp.f +++ b/src/bi_ort_ints/total_twoe_pot.irp.f @@ -256,6 +256,12 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e, (mo_num, mo_num, mo_num, FREE mo_bi_ortho_tc_two_e_chemist + if(noL_standard) then + PROVIDE noL_2e + mo_bi_ortho_tc_two_e = mo_bi_ortho_tc_two_e + noL_2e + FREE noL_2e + endif + END_PROVIDER ! --- diff --git a/src/tc_bi_ortho/no_dressing_naive.irp.f b/src/tc_bi_ortho/no_dressing_naive.irp.f index a57b1723..a0c488b3 100644 --- a/src/tc_bi_ortho/no_dressing_naive.irp.f +++ b/src/tc_bi_ortho/no_dressing_naive.irp.f @@ -1,7 +1,7 @@ ! --- -BEGIN_PROVIDER [double precision, no_0_naive] +BEGIN_PROVIDER [double precision, noL_0e_naive] implicit none integer :: ii, jj, kk @@ -11,7 +11,7 @@ BEGIN_PROVIDER [double precision, no_0_naive] double precision :: t0, t1 double precision, allocatable :: tmp(:) - print*, " Providing no_0_naive ..." + print*, " Providing noL_0e_naive ..." call wall_time(t0) allocate(tmp(elec_num)) @@ -89,24 +89,24 @@ BEGIN_PROVIDER [double precision, no_0_naive] !$OMP END DO !$OMP END PARALLEL - no_0_naive = -1.d0 * (-sum(tmp)) / 6.d0 + noL_0e_naive = -1.d0 * (-sum(tmp)) / 6.d0 deallocate(tmp) call wall_time(t1) - print*, " Wall time for no_0_naive (min) = ", (t1 - t0)/60.d0 + print*, " Wall time for noL_0e_naive (min) = ", (t1 - t0)/60.d0 - print*, " no_0_naive = ", no_0_naive + print*, " noL_0e_naive = ", noL_0e_naive END_PROVIDER ! --- -BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] +BEGIN_PROVIDER [double precision, noL_1e_naive, (mo_num, mo_num)] BEGIN_DOC ! - ! < p | H(1) | s > is dressed with no_1_naive(p,s) + ! < p | H(1) | s > is dressed with noL_1e_naive(p,s) ! END_DOC @@ -117,7 +117,7 @@ BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] double precision :: I_pij_sji, I_pij_sij, I_pij_jis, I_pij_ijs, I_pij_isj, I_pij_jsi double precision :: t0, t1 - print*, " Providing no_1_naive ..." + print*, " Providing noL_1e_naive ..." call wall_time(t0) ! ---- @@ -132,14 +132,14 @@ BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] !$OMP I_pij_sji, I_pij_sij, I_pij_jis, & !$OMP I_pij_ijs, I_pij_isj, I_pij_jsi ) & !$OMP SHARED (mo_num, elec_beta_num, elec_num, & - !$OMP sigma_p, sigma_s, no_1_naive) + !$OMP sigma_p, sigma_s, noL_1e_naive) !$OMP DO COLLAPSE (2) do s = 1, mo_num do p = 1, mo_num - no_1_naive(p,s) = 0.d0 + noL_1e_naive(p,s) = 0.d0 do ii = 1, elec_num if(ii .le. elec_beta_num) then i = ii @@ -184,7 +184,7 @@ BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] ! x (-1) because integrals are over -L ! x 0.5 because we consider 0.5 (up + down) - no_1_naive(p,s) = no_1_naive(p,s) + 0.25d0 * (I_pij_sji - I_pij_sij + I_pij_jis - I_pij_ijs + I_pij_isj - I_pij_jsi) + noL_1e_naive(p,s) = noL_1e_naive(p,s) + 0.25d0 * (I_pij_sji - I_pij_sij + I_pij_jis - I_pij_ijs + I_pij_isj - I_pij_jsi) enddo ! j enddo ! i enddo ! s @@ -205,7 +205,7 @@ BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] !$OMP I_pij_sji, I_pij_sij, I_pij_jis, & !$OMP I_pij_ijs, I_pij_isj, I_pij_jsi ) & !$OMP SHARED (mo_num, elec_beta_num, elec_num, & - !$OMP sigma_p, sigma_s, no_1_naive) + !$OMP sigma_p, sigma_s, noL_1e_naive) !$OMP DO COLLAPSE (2) @@ -256,7 +256,7 @@ BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] ! x (-1) because integrals are over -L ! x 0.5 because we consider 0.5 (up + down) - no_1_naive(p,s) = no_1_naive(p,s) + 0.25d0 * (I_pij_sji - I_pij_sij + I_pij_jis - I_pij_ijs + I_pij_isj - I_pij_jsi) + noL_1e_naive(p,s) = noL_1e_naive(p,s) + 0.25d0 * (I_pij_sji - I_pij_sij + I_pij_jis - I_pij_ijs + I_pij_isj - I_pij_jsi) enddo ! j enddo ! i enddo ! s @@ -267,17 +267,17 @@ BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] ! --- call wall_time(t1) - print*, " Wall time for no_1_naive (min) = ", (t1 - t0)/60.d0 + print*, " Wall time for noL_1e_naive (min) = ", (t1 - t0)/60.d0 END_PROVIDER ! --- -BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] +BEGIN_PROVIDER [double precision, noL_2e_naive, (mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC ! - ! < p q | H(2) | s t > is dressed with no_2_naive(p,q,s,t) + ! < p q | H(2) | s t > is dressed with noL_2e_naive(p,q,s,t) ! END_DOC @@ -288,7 +288,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] double precision :: I_ipq_ist, I_ipq_sit, I_ipq_tsi double precision :: t0, t1 - print*, " Providing no_2_naive ..." + print*, " Providing noL_2e_naive ..." call wall_time(t0) ! ---- @@ -305,7 +305,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] !$OMP I_ipq_ist, I_ipq_sit, I_ipq_tsi) & !$OMP SHARED (mo_num, elec_beta_num, elec_num, & !$OMP sigma_p, sigma_q, sigma_s, sigma_t, & - !$OMP no_2_naive) + !$OMP noL_2e_naive) !$OMP DO COLLAPSE (4) do t = 1, mo_num @@ -313,7 +313,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] do q = 1, mo_num do p = 1, mo_num - no_2_naive(p,q,s,t) = 0.d0 + noL_2e_naive(p,q,s,t) = 0.d0 do ii = 1, elec_num if(ii .le. elec_beta_num) then i = ii @@ -337,7 +337,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] ! x (-1) because integrals are over -L ! x 0.25 because we consider 0.25 (up-up + up-down + down-up + down-down) - no_2_naive(p,q,s,t) = no_2_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) + noL_2e_naive(p,q,s,t) = noL_2e_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) enddo ! i enddo ! p enddo ! q @@ -360,7 +360,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] !$OMP I_ipq_ist, I_ipq_sit, I_ipq_tsi) & !$OMP SHARED (mo_num, elec_beta_num, elec_num, & !$OMP sigma_p, sigma_q, sigma_s, sigma_t, & - !$OMP no_2_naive) + !$OMP noL_2e_naive) !$OMP DO COLLAPSE (4) do t = 1, mo_num @@ -391,7 +391,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] ! x (-1) because integrals are over -L ! x 0.25 because we consider 0.25 (up-up + up-down + down-up + down-down) - no_2_naive(p,q,s,t) = no_2_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) + noL_2e_naive(p,q,s,t) = noL_2e_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) enddo ! i enddo ! p enddo ! q @@ -414,7 +414,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] !$OMP I_ipq_ist, I_ipq_sit, I_ipq_tsi) & !$OMP SHARED (mo_num, elec_beta_num, elec_num, & !$OMP sigma_p, sigma_q, sigma_s, sigma_t, & - !$OMP no_2_naive) + !$OMP noL_2e_naive) !$OMP DO COLLAPSE (4) do t = 1, mo_num @@ -445,7 +445,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] ! x (-1) because integrals are over -L ! x 0.25 because we consider 0.25 (up-up + up-down + down-up + down-down) - no_2_naive(p,q,s,t) = no_2_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) + noL_2e_naive(p,q,s,t) = noL_2e_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) enddo ! i enddo ! p enddo ! q @@ -468,7 +468,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] !$OMP I_ipq_ist, I_ipq_sit, I_ipq_tsi) & !$OMP SHARED (mo_num, elec_beta_num, elec_num, & !$OMP sigma_p, sigma_q, sigma_s, sigma_t, & - !$OMP no_2_naive) + !$OMP noL_2e_naive) !$OMP DO COLLAPSE (4) do t = 1, mo_num @@ -499,7 +499,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] ! x (-1) because integrals are over -L ! x 0.25 because we consider 0.25 (up-up + up-down + down-up + down-down) - no_2_naive(p,q,s,t) = no_2_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) + noL_2e_naive(p,q,s,t) = noL_2e_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) enddo ! i enddo ! p enddo ! q @@ -509,7 +509,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] !$OMP END PARALLEL call wall_time(t1) - print*, " Wall time for no_2_naive (min) = ", (t1 - t0)/60.d0 + print*, " Wall time for noL_2e_naive (min) = ", (t1 - t0)/60.d0 END_PROVIDER diff --git a/src/tc_bi_ortho/no_dressing_v0.irp.f b/src/tc_bi_ortho/no_dressing_v0.irp.f index 4d40c76f..efcf51db 100644 --- a/src/tc_bi_ortho/no_dressing_v0.irp.f +++ b/src/tc_bi_ortho/no_dressing_v0.irp.f @@ -1,7 +1,7 @@ ! --- -BEGIN_PROVIDER [double precision, no_0_v0] +BEGIN_PROVIDER [double precision, noL_0e] implicit none integer :: i, j, k @@ -10,7 +10,7 @@ BEGIN_PROVIDER [double precision, no_0_v0] double precision, allocatable :: tmp(:) call wall_time(t0) - print*, " Providing no_0_v0 ..." + print*, " Providing noL_0e ..." if(elec_alpha_num .eq. elec_beta_num) then @@ -40,7 +40,7 @@ BEGIN_PROVIDER [double precision, no_0_v0] !$OMP END DO !$OMP END PARALLEL - no_0_v0 = -1.d0 * (-sum(tmp)) / 6.d0 + noL_0e = -1.d0 * (-sum(tmp)) / 6.d0 deallocate(tmp) @@ -114,22 +114,22 @@ BEGIN_PROVIDER [double precision, no_0_v0] !$OMP END DO !$OMP END PARALLEL - no_0_v0 = -1.d0 * (-sum(tmp)) / 6.d0 + noL_0e = -1.d0 * (-sum(tmp)) / 6.d0 deallocate(tmp) endif call wall_time(t1) - print*, " Wall time for no_0_v0 (min) = ", (t1 - t0)/60.d0 + print*, " Wall time for noL_0e (min) = ", (t1 - t0)/60.d0 - print*, " no_0_v0 = ", no_0_v0 + print*, " noL_0e = ", noL_0e END_PROVIDER ! --- -BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] +BEGIN_PROVIDER [double precision, noL_1e, (mo_num, mo_num)] BEGIN_DOC ! @@ -143,7 +143,7 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] double precision :: t0, t1 call wall_time(t0) - print*, " Providing no_1_v0 ..." + print*, " Providing noL_1e ..." if(elec_alpha_num .eq. elec_beta_num) then @@ -152,13 +152,13 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] !$OMP PRIVATE (p, s, i, j, & !$OMP I_pij_sij, I_pij_isj, I_pij_ijs, & !$OMP I_pij_sji) & - !$OMP SHARED (mo_num, elec_beta_num, no_1_v0) + !$OMP SHARED (mo_num, elec_beta_num, noL_1e) !$OMP DO COLLAPSE(2) do s = 1, mo_num do p = 1, mo_num - no_1_v0(p,s) = 0.d0 + noL_1e(p,s) = 0.d0 do i = 1, elec_beta_num do j = 1, elec_beta_num @@ -167,7 +167,7 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) - no_1_v0(p,s) = no_1_v0(p,s) - (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) + noL_1e(p,s) = noL_1e(p,s) - (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) enddo enddo enddo @@ -182,13 +182,13 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] !$OMP PRIVATE (p, s, i, j, & !$OMP I_pij_sij, I_pij_isj, I_pij_ijs, & !$OMP I_pij_sji, I_pij_jsi, I_pij_jis) & - !$OMP SHARED (mo_num, elec_beta_num, elec_alpha_num, no_1_v0) + !$OMP SHARED (mo_num, elec_beta_num, elec_alpha_num, noL_1e) !$OMP DO COLLAPSE(2) do s = 1, mo_num do p = 1, mo_num - no_1_v0(p,s) = 0.d0 + noL_1e(p,s) = 0.d0 do i = 1, elec_beta_num do j = 1, elec_beta_num @@ -197,7 +197,7 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) - no_1_v0(p,s) = no_1_v0(p,s) - (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) + noL_1e(p,s) = noL_1e(p,s) - (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) enddo ! j enddo ! i @@ -211,7 +211,7 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(p, i, j, i, s, j, I_pij_isj) call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) - no_1_v0(p,s) = no_1_v0(p,s) + 0.5d0 * (2.d0*I_pij_sji - I_pij_jsi + 2.d0*I_pij_jis - 4.d0*I_pij_sij + 2.d0*I_pij_isj - I_pij_ijs) + noL_1e(p,s) = noL_1e(p,s) + 0.5d0 * (2.d0*I_pij_sji - I_pij_jsi + 2.d0*I_pij_jis - 4.d0*I_pij_sij + 2.d0*I_pij_isj - I_pij_ijs) enddo ! j do j = elec_beta_num+1, elec_alpha_num @@ -221,7 +221,7 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) - no_1_v0(p,s) = no_1_v0(p,s) - 0.5d0 * (I_pij_sij - I_pij_isj + I_pij_ijs - I_pij_sji) + noL_1e(p,s) = noL_1e(p,s) - 0.5d0 * (I_pij_sij - I_pij_isj + I_pij_ijs - I_pij_sji) enddo ! j enddo ! i @@ -233,13 +233,13 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] endif call wall_time(t1) - print*, " Wall time for no_1_v0 (min) = ", (t1 - t0)/60.d0 + print*, " Wall time for noL_1e (min) = ", (t1 - t0)/60.d0 END_PROVIDER ! --- -BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] +BEGIN_PROVIDER [double precision, noL_2e, (mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC ! @@ -253,7 +253,7 @@ BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] double precision :: t0, t1 call wall_time(t0) - print*, " Providing no_2_v0 ..." + print*, " Providing noL_2e ..." if(elec_alpha_num .eq. elec_beta_num) then @@ -261,7 +261,7 @@ BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] !$OMP DEFAULT (NONE) & !$OMP PRIVATE (p, q, s, t, i, & !$OMP I_ipq_sit, I_ipq_tsi, I_ipq_ist) & - !$OMP SHARED (mo_num, elec_beta_num, no_2_v0) + !$OMP SHARED (mo_num, elec_beta_num, noL_2e) !$OMP DO COLLAPSE(4) do t = 1, mo_num @@ -269,14 +269,14 @@ BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] do q = 1, mo_num do p = 1, mo_num - no_2_v0(p,q,s,t) = 0.d0 + noL_2e(p,q,s,t) = 0.d0 do i = 1, elec_beta_num call give_integrals_3_body_bi_ort(i, p, q, s, i, t, I_ipq_sit) call give_integrals_3_body_bi_ort(i, p, q, t, s, i, I_ipq_tsi) call give_integrals_3_body_bi_ort(i, p, q, i, s, t, I_ipq_ist) - no_2_v0(p,q,s,t) = no_2_v0(p,q,s,t) - 0.5d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) + noL_2e(p,q,s,t) = noL_2e(p,q,s,t) - 0.5d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) enddo enddo enddo @@ -291,7 +291,7 @@ BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] !$OMP DEFAULT (NONE) & !$OMP PRIVATE (p, q, s, t, i, & !$OMP I_ipq_sit, I_ipq_tsi, I_ipq_ist) & - !$OMP SHARED (mo_num, elec_beta_num, elec_alpha_num, no_2_v0) + !$OMP SHARED (mo_num, elec_beta_num, elec_alpha_num, noL_2e) !$OMP DO COLLAPSE(4) do t = 1, mo_num @@ -299,14 +299,14 @@ BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] do q = 1, mo_num do p = 1, mo_num - no_2_v0(p,q,s,t) = 0.d0 + noL_2e(p,q,s,t) = 0.d0 do i = 1, elec_beta_num call give_integrals_3_body_bi_ort(i, p, q, s, i, t, I_ipq_sit) call give_integrals_3_body_bi_ort(i, p, q, t, s, i, I_ipq_tsi) call give_integrals_3_body_bi_ort(i, p, q, i, s, t, I_ipq_ist) - no_2_v0(p,q,s,t) = no_2_v0(p,q,s,t) - 0.5d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) + noL_2e(p,q,s,t) = noL_2e(p,q,s,t) - 0.5d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) enddo ! i do i = elec_beta_num+1, elec_alpha_num @@ -315,7 +315,7 @@ BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] call give_integrals_3_body_bi_ort(i, p, q, t, s, i, I_ipq_tsi) call give_integrals_3_body_bi_ort(i, p, q, i, s, t, I_ipq_ist) - no_2_v0(p,q,s,t) = no_2_v0(p,q,s,t) - 0.25d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) + noL_2e(p,q,s,t) = noL_2e(p,q,s,t) - 0.25d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) enddo ! i enddo ! p @@ -328,7 +328,7 @@ BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] endif call wall_time(t1) - print*, " Wall time for no_2_v0 (min) = ", (t1 - t0)/60.d0 + print*, " Wall time for noL_2e (min) = ", (t1 - t0)/60.d0 END_PROVIDER diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index 72f55aca..f12b83e3 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -34,7 +34,9 @@ end ! --- subroutine htilde_mu_mat_opt_bi_ortho_tot(key_j, key_i, Nint, htot) - implicit none + + implicit none + BEGIN_DOC ! ! where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis @@ -82,7 +84,7 @@ subroutine htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, hmono = 0.d0 htwoe = 0.d0 htot = 0.d0 - hthree = 0.D0 + hthree = 0.d0 call get_excitation_degree(key_i, key_j, degree, Nint) if(degree.gt.2) return diff --git a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f index d95c87b1..367d90dd 100644 --- a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f @@ -29,6 +29,11 @@ ref_tc_energy_tot = ref_tc_energy_1e + ref_tc_energy_2e + ref_tc_energy_3e + nuclear_repulsion + if(noL_standard) then + PROVIDE noL_0e + ref_tc_energy_tot += noL_0e + endif + END_PROVIDER ! --- @@ -107,6 +112,11 @@ subroutine diag_htilde_mu_mat_fock_bi_ortho(Nint, det_in, hmono, htwoe, hthree, htot = hmono + htwoe + hthree + nuclear_repulsion + if(noL_standard) then + PROVIDE noL_0e + htot += noL_0e + endif + end ! --- diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index 9cbf7748..b55419a8 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -509,11 +509,11 @@ subroutine test_no_0() print*, ' testing no_0 ...' - PROVIDE no_0_naive - PROVIDE no_0_v0 + PROVIDE noL_0e_naive + PROVIDE noL_0e - accu = dabs(no_0_naive - no_0_v0) - norm = dabs(no_0_naive) + accu = dabs(noL_0e_naive - noL_0e) + norm = dabs(noL_0e_naive) print*, ' accu (%) = ', 100.d0*accu/norm @@ -530,8 +530,8 @@ subroutine test_no_1() print*, ' testing no_1 ...' - PROVIDE no_1_naive - PROVIDE no_1_v0 + PROVIDE noL_1e_naive + PROVIDE noL_1e thr = 1d-8 @@ -540,8 +540,8 @@ subroutine test_no_1() do i = 1, mo_num do j = 1, mo_num - new = no_1_v0 (j,i) - ref = no_1_naive(j,i) + new = noL_1e (j,i) + ref = noL_1e_naive(j,i) contrib = dabs(new - ref) if(contrib .gt. thr) then print*, ' problem on no_aaa_contraction' @@ -570,8 +570,8 @@ subroutine test_no_2() print*, ' testing no_2 ...' - PROVIDE no_2_naive - PROVIDE no_2_v0 + PROVIDE noL_2e_naive + PROVIDE noL_2e thr = 1d-8 @@ -582,8 +582,8 @@ subroutine test_no_2() do k = 1, mo_num do l = 1, mo_num - new = no_2_v0 (l,k,j,i) - ref = no_2_naive(l,k,j,i) + new = noL_2e (l,k,j,i) + ref = noL_2e_naive(l,k,j,i) contrib = dabs(new - ref) if(contrib .gt. thr) then print*, ' problem on no_aaa_contraction' diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index a70ccc63..fee492b4 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -46,6 +46,12 @@ doc: If |true|, contracted double excitation three-body terms are included interface: ezfio,provider,ocaml default: False +[noL_standard] +type: logical +doc: If |true|, standard normal-ordering for L +interface: ezfio,provider,ocaml +default: False + [core_tc_op] type: logical doc: If |true|, takes the usual Hamiltonian for core orbitals (assumed to be doubly occupied) From 677b58ae61e8733ec28068ffdf4167e53950a120 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Tue, 12 Sep 2023 20:43:54 +0200 Subject: [PATCH 282/337] noL dredding --- .../no_dressing_naive.irp.f | 54 +++++++++---------- .../no_dressing_v0.irp.f | 54 +++++++++---------- src/bi_ort_ints/one_e_bi_ort.irp.f | 7 ++- src/bi_ort_ints/total_twoe_pot.irp.f | 6 +++ src/tc_bi_ortho/slater_tc_opt.irp.f | 6 ++- src/tc_bi_ortho/slater_tc_opt_diag.irp.f | 10 ++++ src/tc_bi_ortho/tc_utils.irp.f | 22 -------- src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 24 ++++----- src/tc_keywords/EZFIO.cfg | 6 +++ src/utils/util.irp.f | 25 +++++++++ 10 files changed, 123 insertions(+), 91 deletions(-) rename src/{tc_bi_ortho => bi_ort_ints}/no_dressing_naive.irp.f (90%) rename src/{tc_bi_ortho => bi_ort_ints}/no_dressing_v0.irp.f (84%) diff --git a/src/tc_bi_ortho/no_dressing_naive.irp.f b/src/bi_ort_ints/no_dressing_naive.irp.f similarity index 90% rename from src/tc_bi_ortho/no_dressing_naive.irp.f rename to src/bi_ort_ints/no_dressing_naive.irp.f index a57b1723..a0c488b3 100644 --- a/src/tc_bi_ortho/no_dressing_naive.irp.f +++ b/src/bi_ort_ints/no_dressing_naive.irp.f @@ -1,7 +1,7 @@ ! --- -BEGIN_PROVIDER [double precision, no_0_naive] +BEGIN_PROVIDER [double precision, noL_0e_naive] implicit none integer :: ii, jj, kk @@ -11,7 +11,7 @@ BEGIN_PROVIDER [double precision, no_0_naive] double precision :: t0, t1 double precision, allocatable :: tmp(:) - print*, " Providing no_0_naive ..." + print*, " Providing noL_0e_naive ..." call wall_time(t0) allocate(tmp(elec_num)) @@ -89,24 +89,24 @@ BEGIN_PROVIDER [double precision, no_0_naive] !$OMP END DO !$OMP END PARALLEL - no_0_naive = -1.d0 * (-sum(tmp)) / 6.d0 + noL_0e_naive = -1.d0 * (-sum(tmp)) / 6.d0 deallocate(tmp) call wall_time(t1) - print*, " Wall time for no_0_naive (min) = ", (t1 - t0)/60.d0 + print*, " Wall time for noL_0e_naive (min) = ", (t1 - t0)/60.d0 - print*, " no_0_naive = ", no_0_naive + print*, " noL_0e_naive = ", noL_0e_naive END_PROVIDER ! --- -BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] +BEGIN_PROVIDER [double precision, noL_1e_naive, (mo_num, mo_num)] BEGIN_DOC ! - ! < p | H(1) | s > is dressed with no_1_naive(p,s) + ! < p | H(1) | s > is dressed with noL_1e_naive(p,s) ! END_DOC @@ -117,7 +117,7 @@ BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] double precision :: I_pij_sji, I_pij_sij, I_pij_jis, I_pij_ijs, I_pij_isj, I_pij_jsi double precision :: t0, t1 - print*, " Providing no_1_naive ..." + print*, " Providing noL_1e_naive ..." call wall_time(t0) ! ---- @@ -132,14 +132,14 @@ BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] !$OMP I_pij_sji, I_pij_sij, I_pij_jis, & !$OMP I_pij_ijs, I_pij_isj, I_pij_jsi ) & !$OMP SHARED (mo_num, elec_beta_num, elec_num, & - !$OMP sigma_p, sigma_s, no_1_naive) + !$OMP sigma_p, sigma_s, noL_1e_naive) !$OMP DO COLLAPSE (2) do s = 1, mo_num do p = 1, mo_num - no_1_naive(p,s) = 0.d0 + noL_1e_naive(p,s) = 0.d0 do ii = 1, elec_num if(ii .le. elec_beta_num) then i = ii @@ -184,7 +184,7 @@ BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] ! x (-1) because integrals are over -L ! x 0.5 because we consider 0.5 (up + down) - no_1_naive(p,s) = no_1_naive(p,s) + 0.25d0 * (I_pij_sji - I_pij_sij + I_pij_jis - I_pij_ijs + I_pij_isj - I_pij_jsi) + noL_1e_naive(p,s) = noL_1e_naive(p,s) + 0.25d0 * (I_pij_sji - I_pij_sij + I_pij_jis - I_pij_ijs + I_pij_isj - I_pij_jsi) enddo ! j enddo ! i enddo ! s @@ -205,7 +205,7 @@ BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] !$OMP I_pij_sji, I_pij_sij, I_pij_jis, & !$OMP I_pij_ijs, I_pij_isj, I_pij_jsi ) & !$OMP SHARED (mo_num, elec_beta_num, elec_num, & - !$OMP sigma_p, sigma_s, no_1_naive) + !$OMP sigma_p, sigma_s, noL_1e_naive) !$OMP DO COLLAPSE (2) @@ -256,7 +256,7 @@ BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] ! x (-1) because integrals are over -L ! x 0.5 because we consider 0.5 (up + down) - no_1_naive(p,s) = no_1_naive(p,s) + 0.25d0 * (I_pij_sji - I_pij_sij + I_pij_jis - I_pij_ijs + I_pij_isj - I_pij_jsi) + noL_1e_naive(p,s) = noL_1e_naive(p,s) + 0.25d0 * (I_pij_sji - I_pij_sij + I_pij_jis - I_pij_ijs + I_pij_isj - I_pij_jsi) enddo ! j enddo ! i enddo ! s @@ -267,17 +267,17 @@ BEGIN_PROVIDER [double precision, no_1_naive, (mo_num, mo_num)] ! --- call wall_time(t1) - print*, " Wall time for no_1_naive (min) = ", (t1 - t0)/60.d0 + print*, " Wall time for noL_1e_naive (min) = ", (t1 - t0)/60.d0 END_PROVIDER ! --- -BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] +BEGIN_PROVIDER [double precision, noL_2e_naive, (mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC ! - ! < p q | H(2) | s t > is dressed with no_2_naive(p,q,s,t) + ! < p q | H(2) | s t > is dressed with noL_2e_naive(p,q,s,t) ! END_DOC @@ -288,7 +288,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] double precision :: I_ipq_ist, I_ipq_sit, I_ipq_tsi double precision :: t0, t1 - print*, " Providing no_2_naive ..." + print*, " Providing noL_2e_naive ..." call wall_time(t0) ! ---- @@ -305,7 +305,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] !$OMP I_ipq_ist, I_ipq_sit, I_ipq_tsi) & !$OMP SHARED (mo_num, elec_beta_num, elec_num, & !$OMP sigma_p, sigma_q, sigma_s, sigma_t, & - !$OMP no_2_naive) + !$OMP noL_2e_naive) !$OMP DO COLLAPSE (4) do t = 1, mo_num @@ -313,7 +313,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] do q = 1, mo_num do p = 1, mo_num - no_2_naive(p,q,s,t) = 0.d0 + noL_2e_naive(p,q,s,t) = 0.d0 do ii = 1, elec_num if(ii .le. elec_beta_num) then i = ii @@ -337,7 +337,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] ! x (-1) because integrals are over -L ! x 0.25 because we consider 0.25 (up-up + up-down + down-up + down-down) - no_2_naive(p,q,s,t) = no_2_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) + noL_2e_naive(p,q,s,t) = noL_2e_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) enddo ! i enddo ! p enddo ! q @@ -360,7 +360,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] !$OMP I_ipq_ist, I_ipq_sit, I_ipq_tsi) & !$OMP SHARED (mo_num, elec_beta_num, elec_num, & !$OMP sigma_p, sigma_q, sigma_s, sigma_t, & - !$OMP no_2_naive) + !$OMP noL_2e_naive) !$OMP DO COLLAPSE (4) do t = 1, mo_num @@ -391,7 +391,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] ! x (-1) because integrals are over -L ! x 0.25 because we consider 0.25 (up-up + up-down + down-up + down-down) - no_2_naive(p,q,s,t) = no_2_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) + noL_2e_naive(p,q,s,t) = noL_2e_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) enddo ! i enddo ! p enddo ! q @@ -414,7 +414,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] !$OMP I_ipq_ist, I_ipq_sit, I_ipq_tsi) & !$OMP SHARED (mo_num, elec_beta_num, elec_num, & !$OMP sigma_p, sigma_q, sigma_s, sigma_t, & - !$OMP no_2_naive) + !$OMP noL_2e_naive) !$OMP DO COLLAPSE (4) do t = 1, mo_num @@ -445,7 +445,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] ! x (-1) because integrals are over -L ! x 0.25 because we consider 0.25 (up-up + up-down + down-up + down-down) - no_2_naive(p,q,s,t) = no_2_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) + noL_2e_naive(p,q,s,t) = noL_2e_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) enddo ! i enddo ! p enddo ! q @@ -468,7 +468,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] !$OMP I_ipq_ist, I_ipq_sit, I_ipq_tsi) & !$OMP SHARED (mo_num, elec_beta_num, elec_num, & !$OMP sigma_p, sigma_q, sigma_s, sigma_t, & - !$OMP no_2_naive) + !$OMP noL_2e_naive) !$OMP DO COLLAPSE (4) do t = 1, mo_num @@ -499,7 +499,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] ! x (-1) because integrals are over -L ! x 0.25 because we consider 0.25 (up-up + up-down + down-up + down-down) - no_2_naive(p,q,s,t) = no_2_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) + noL_2e_naive(p,q,s,t) = noL_2e_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) enddo ! i enddo ! p enddo ! q @@ -509,7 +509,7 @@ BEGIN_PROVIDER [double precision, no_2_naive, (mo_num, mo_num, mo_num, mo_num)] !$OMP END PARALLEL call wall_time(t1) - print*, " Wall time for no_2_naive (min) = ", (t1 - t0)/60.d0 + print*, " Wall time for noL_2e_naive (min) = ", (t1 - t0)/60.d0 END_PROVIDER diff --git a/src/tc_bi_ortho/no_dressing_v0.irp.f b/src/bi_ort_ints/no_dressing_v0.irp.f similarity index 84% rename from src/tc_bi_ortho/no_dressing_v0.irp.f rename to src/bi_ort_ints/no_dressing_v0.irp.f index 4d40c76f..efcf51db 100644 --- a/src/tc_bi_ortho/no_dressing_v0.irp.f +++ b/src/bi_ort_ints/no_dressing_v0.irp.f @@ -1,7 +1,7 @@ ! --- -BEGIN_PROVIDER [double precision, no_0_v0] +BEGIN_PROVIDER [double precision, noL_0e] implicit none integer :: i, j, k @@ -10,7 +10,7 @@ BEGIN_PROVIDER [double precision, no_0_v0] double precision, allocatable :: tmp(:) call wall_time(t0) - print*, " Providing no_0_v0 ..." + print*, " Providing noL_0e ..." if(elec_alpha_num .eq. elec_beta_num) then @@ -40,7 +40,7 @@ BEGIN_PROVIDER [double precision, no_0_v0] !$OMP END DO !$OMP END PARALLEL - no_0_v0 = -1.d0 * (-sum(tmp)) / 6.d0 + noL_0e = -1.d0 * (-sum(tmp)) / 6.d0 deallocate(tmp) @@ -114,22 +114,22 @@ BEGIN_PROVIDER [double precision, no_0_v0] !$OMP END DO !$OMP END PARALLEL - no_0_v0 = -1.d0 * (-sum(tmp)) / 6.d0 + noL_0e = -1.d0 * (-sum(tmp)) / 6.d0 deallocate(tmp) endif call wall_time(t1) - print*, " Wall time for no_0_v0 (min) = ", (t1 - t0)/60.d0 + print*, " Wall time for noL_0e (min) = ", (t1 - t0)/60.d0 - print*, " no_0_v0 = ", no_0_v0 + print*, " noL_0e = ", noL_0e END_PROVIDER ! --- -BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] +BEGIN_PROVIDER [double precision, noL_1e, (mo_num, mo_num)] BEGIN_DOC ! @@ -143,7 +143,7 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] double precision :: t0, t1 call wall_time(t0) - print*, " Providing no_1_v0 ..." + print*, " Providing noL_1e ..." if(elec_alpha_num .eq. elec_beta_num) then @@ -152,13 +152,13 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] !$OMP PRIVATE (p, s, i, j, & !$OMP I_pij_sij, I_pij_isj, I_pij_ijs, & !$OMP I_pij_sji) & - !$OMP SHARED (mo_num, elec_beta_num, no_1_v0) + !$OMP SHARED (mo_num, elec_beta_num, noL_1e) !$OMP DO COLLAPSE(2) do s = 1, mo_num do p = 1, mo_num - no_1_v0(p,s) = 0.d0 + noL_1e(p,s) = 0.d0 do i = 1, elec_beta_num do j = 1, elec_beta_num @@ -167,7 +167,7 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) - no_1_v0(p,s) = no_1_v0(p,s) - (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) + noL_1e(p,s) = noL_1e(p,s) - (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) enddo enddo enddo @@ -182,13 +182,13 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] !$OMP PRIVATE (p, s, i, j, & !$OMP I_pij_sij, I_pij_isj, I_pij_ijs, & !$OMP I_pij_sji, I_pij_jsi, I_pij_jis) & - !$OMP SHARED (mo_num, elec_beta_num, elec_alpha_num, no_1_v0) + !$OMP SHARED (mo_num, elec_beta_num, elec_alpha_num, noL_1e) !$OMP DO COLLAPSE(2) do s = 1, mo_num do p = 1, mo_num - no_1_v0(p,s) = 0.d0 + noL_1e(p,s) = 0.d0 do i = 1, elec_beta_num do j = 1, elec_beta_num @@ -197,7 +197,7 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) - no_1_v0(p,s) = no_1_v0(p,s) - (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) + noL_1e(p,s) = noL_1e(p,s) - (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) enddo ! j enddo ! i @@ -211,7 +211,7 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(p, i, j, i, s, j, I_pij_isj) call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) - no_1_v0(p,s) = no_1_v0(p,s) + 0.5d0 * (2.d0*I_pij_sji - I_pij_jsi + 2.d0*I_pij_jis - 4.d0*I_pij_sij + 2.d0*I_pij_isj - I_pij_ijs) + noL_1e(p,s) = noL_1e(p,s) + 0.5d0 * (2.d0*I_pij_sji - I_pij_jsi + 2.d0*I_pij_jis - 4.d0*I_pij_sij + 2.d0*I_pij_isj - I_pij_ijs) enddo ! j do j = elec_beta_num+1, elec_alpha_num @@ -221,7 +221,7 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) - no_1_v0(p,s) = no_1_v0(p,s) - 0.5d0 * (I_pij_sij - I_pij_isj + I_pij_ijs - I_pij_sji) + noL_1e(p,s) = noL_1e(p,s) - 0.5d0 * (I_pij_sij - I_pij_isj + I_pij_ijs - I_pij_sji) enddo ! j enddo ! i @@ -233,13 +233,13 @@ BEGIN_PROVIDER [double precision, no_1_v0, (mo_num, mo_num)] endif call wall_time(t1) - print*, " Wall time for no_1_v0 (min) = ", (t1 - t0)/60.d0 + print*, " Wall time for noL_1e (min) = ", (t1 - t0)/60.d0 END_PROVIDER ! --- -BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] +BEGIN_PROVIDER [double precision, noL_2e, (mo_num, mo_num, mo_num, mo_num)] BEGIN_DOC ! @@ -253,7 +253,7 @@ BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] double precision :: t0, t1 call wall_time(t0) - print*, " Providing no_2_v0 ..." + print*, " Providing noL_2e ..." if(elec_alpha_num .eq. elec_beta_num) then @@ -261,7 +261,7 @@ BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] !$OMP DEFAULT (NONE) & !$OMP PRIVATE (p, q, s, t, i, & !$OMP I_ipq_sit, I_ipq_tsi, I_ipq_ist) & - !$OMP SHARED (mo_num, elec_beta_num, no_2_v0) + !$OMP SHARED (mo_num, elec_beta_num, noL_2e) !$OMP DO COLLAPSE(4) do t = 1, mo_num @@ -269,14 +269,14 @@ BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] do q = 1, mo_num do p = 1, mo_num - no_2_v0(p,q,s,t) = 0.d0 + noL_2e(p,q,s,t) = 0.d0 do i = 1, elec_beta_num call give_integrals_3_body_bi_ort(i, p, q, s, i, t, I_ipq_sit) call give_integrals_3_body_bi_ort(i, p, q, t, s, i, I_ipq_tsi) call give_integrals_3_body_bi_ort(i, p, q, i, s, t, I_ipq_ist) - no_2_v0(p,q,s,t) = no_2_v0(p,q,s,t) - 0.5d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) + noL_2e(p,q,s,t) = noL_2e(p,q,s,t) - 0.5d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) enddo enddo enddo @@ -291,7 +291,7 @@ BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] !$OMP DEFAULT (NONE) & !$OMP PRIVATE (p, q, s, t, i, & !$OMP I_ipq_sit, I_ipq_tsi, I_ipq_ist) & - !$OMP SHARED (mo_num, elec_beta_num, elec_alpha_num, no_2_v0) + !$OMP SHARED (mo_num, elec_beta_num, elec_alpha_num, noL_2e) !$OMP DO COLLAPSE(4) do t = 1, mo_num @@ -299,14 +299,14 @@ BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] do q = 1, mo_num do p = 1, mo_num - no_2_v0(p,q,s,t) = 0.d0 + noL_2e(p,q,s,t) = 0.d0 do i = 1, elec_beta_num call give_integrals_3_body_bi_ort(i, p, q, s, i, t, I_ipq_sit) call give_integrals_3_body_bi_ort(i, p, q, t, s, i, I_ipq_tsi) call give_integrals_3_body_bi_ort(i, p, q, i, s, t, I_ipq_ist) - no_2_v0(p,q,s,t) = no_2_v0(p,q,s,t) - 0.5d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) + noL_2e(p,q,s,t) = noL_2e(p,q,s,t) - 0.5d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) enddo ! i do i = elec_beta_num+1, elec_alpha_num @@ -315,7 +315,7 @@ BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] call give_integrals_3_body_bi_ort(i, p, q, t, s, i, I_ipq_tsi) call give_integrals_3_body_bi_ort(i, p, q, i, s, t, I_ipq_ist) - no_2_v0(p,q,s,t) = no_2_v0(p,q,s,t) - 0.25d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) + noL_2e(p,q,s,t) = noL_2e(p,q,s,t) - 0.25d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) enddo ! i enddo ! p @@ -328,7 +328,7 @@ BEGIN_PROVIDER [double precision, no_2_v0, (mo_num, mo_num, mo_num, mo_num)] endif call wall_time(t1) - print*, " Wall time for no_2_v0 (min) = ", (t1 - t0)/60.d0 + print*, " Wall time for noL_2e (min) = ", (t1 - t0)/60.d0 END_PROVIDER diff --git a/src/bi_ort_ints/one_e_bi_ort.irp.f b/src/bi_ort_ints/one_e_bi_ort.irp.f index 49181182..7c2ac860 100644 --- a/src/bi_ort_ints/one_e_bi_ort.irp.f +++ b/src/bi_ort_ints/one_e_bi_ort.irp.f @@ -29,7 +29,7 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_one_e, (mo_num, mo_num)] +BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_one_e, (mo_num, mo_num)] BEGIN_DOC ! @@ -41,6 +41,11 @@ BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_one_e, (mo_num, mo_num)] call ao_to_mo_bi_ortho(ao_one_e_integrals_tc_tot, ao_num, mo_bi_ortho_tc_one_e, mo_num) + if(noL_standard) then + PROVIDE noL_1e + mo_bi_ortho_tc_one_e = mo_bi_ortho_tc_one_e + noL_1e + endif + END_PROVIDER ! --- diff --git a/src/bi_ort_ints/total_twoe_pot.irp.f b/src/bi_ort_ints/total_twoe_pot.irp.f index f03e8a34..49f613b5 100644 --- a/src/bi_ort_ints/total_twoe_pot.irp.f +++ b/src/bi_ort_ints/total_twoe_pot.irp.f @@ -256,6 +256,12 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e, (mo_num, mo_num, mo_num, FREE mo_bi_ortho_tc_two_e_chemist + if(noL_standard) then + PROVIDE noL_2e + mo_bi_ortho_tc_two_e = mo_bi_ortho_tc_two_e + noL_2e + FREE noL_2e + endif + END_PROVIDER ! --- diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index 72f55aca..f12b83e3 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -34,7 +34,9 @@ end ! --- subroutine htilde_mu_mat_opt_bi_ortho_tot(key_j, key_i, Nint, htot) - implicit none + + implicit none + BEGIN_DOC ! ! where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis @@ -82,7 +84,7 @@ subroutine htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, hmono = 0.d0 htwoe = 0.d0 htot = 0.d0 - hthree = 0.D0 + hthree = 0.d0 call get_excitation_degree(key_i, key_j, degree, Nint) if(degree.gt.2) return diff --git a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f index d95c87b1..367d90dd 100644 --- a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f @@ -29,6 +29,11 @@ ref_tc_energy_tot = ref_tc_energy_1e + ref_tc_energy_2e + ref_tc_energy_3e + nuclear_repulsion + if(noL_standard) then + PROVIDE noL_0e + ref_tc_energy_tot += noL_0e + endif + END_PROVIDER ! --- @@ -107,6 +112,11 @@ subroutine diag_htilde_mu_mat_fock_bi_ortho(Nint, det_in, hmono, htwoe, hthree, htot = hmono + htwoe + hthree + nuclear_repulsion + if(noL_standard) then + PROVIDE noL_0e + htot += noL_0e + endif + end ! --- diff --git a/src/tc_bi_ortho/tc_utils.irp.f b/src/tc_bi_ortho/tc_utils.irp.f index ba5ffff9..53fe5884 100644 --- a/src/tc_bi_ortho/tc_utils.irp.f +++ b/src/tc_bi_ortho/tc_utils.irp.f @@ -66,25 +66,3 @@ end ! --- - -logical function is_same_spin(sigma_1, sigma_2) - - BEGIN_DOC - ! - ! true if sgn(sigma_1) = sgn(sigma_2) - ! - END_DOC - - implicit none - double precision, intent(in) :: sigma_1, sigma_2 - - if((sigma_1 * sigma_2) .gt. 0.d0) then - is_same_spin = .true. - else - is_same_spin = .false. - endif - -end function is_same_spin - -! --- - diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index 9cbf7748..b55419a8 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -509,11 +509,11 @@ subroutine test_no_0() print*, ' testing no_0 ...' - PROVIDE no_0_naive - PROVIDE no_0_v0 + PROVIDE noL_0e_naive + PROVIDE noL_0e - accu = dabs(no_0_naive - no_0_v0) - norm = dabs(no_0_naive) + accu = dabs(noL_0e_naive - noL_0e) + norm = dabs(noL_0e_naive) print*, ' accu (%) = ', 100.d0*accu/norm @@ -530,8 +530,8 @@ subroutine test_no_1() print*, ' testing no_1 ...' - PROVIDE no_1_naive - PROVIDE no_1_v0 + PROVIDE noL_1e_naive + PROVIDE noL_1e thr = 1d-8 @@ -540,8 +540,8 @@ subroutine test_no_1() do i = 1, mo_num do j = 1, mo_num - new = no_1_v0 (j,i) - ref = no_1_naive(j,i) + new = noL_1e (j,i) + ref = noL_1e_naive(j,i) contrib = dabs(new - ref) if(contrib .gt. thr) then print*, ' problem on no_aaa_contraction' @@ -570,8 +570,8 @@ subroutine test_no_2() print*, ' testing no_2 ...' - PROVIDE no_2_naive - PROVIDE no_2_v0 + PROVIDE noL_2e_naive + PROVIDE noL_2e thr = 1d-8 @@ -582,8 +582,8 @@ subroutine test_no_2() do k = 1, mo_num do l = 1, mo_num - new = no_2_v0 (l,k,j,i) - ref = no_2_naive(l,k,j,i) + new = noL_2e (l,k,j,i) + ref = noL_2e_naive(l,k,j,i) contrib = dabs(new - ref) if(contrib .gt. thr) then print*, ' problem on no_aaa_contraction' diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index a70ccc63..fee492b4 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -46,6 +46,12 @@ doc: If |true|, contracted double excitation three-body terms are included interface: ezfio,provider,ocaml default: False +[noL_standard] +type: logical +doc: If |true|, standard normal-ordering for L +interface: ezfio,provider,ocaml +default: False + [core_tc_op] type: logical doc: If |true|, takes the usual Hamiltonian for core orbitals (assumed to be doubly occupied) diff --git a/src/utils/util.irp.f b/src/utils/util.irp.f index a9f1a438..ebb13781 100644 --- a/src/utils/util.irp.f +++ b/src/utils/util.irp.f @@ -556,3 +556,28 @@ subroutine sub_A_At(A, N) !$OMP END PARALLEL end + +! --- + +logical function is_same_spin(sigma_1, sigma_2) + + BEGIN_DOC + ! + ! true if sgn(sigma_1) = sgn(sigma_2) + ! + END_DOC + + implicit none + double precision, intent(in) :: sigma_1, sigma_2 + + if((sigma_1 * sigma_2) .gt. 0.d0) then + is_same_spin = .true. + else + is_same_spin = .false. + endif + +end function is_same_spin + +! --- + + From 3f95bf40edda376243fa5e423f3e81ef8029bbd9 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 13 Sep 2023 12:58:26 +0200 Subject: [PATCH 283/337] tc-two-rdm broken ... --- src/tc_bi_ortho/tc_natorb.irp.f | 22 +++++++------- src/tc_bi_ortho/test_tc_two_rdm.irp.f | 32 ++++++++++----------- src/tc_bi_ortho/two_rdm_naive.irp.f | 41 +++++++++++++++++++++------ 3 files changed, 57 insertions(+), 38 deletions(-) diff --git a/src/tc_bi_ortho/tc_natorb.irp.f b/src/tc_bi_ortho/tc_natorb.irp.f index 1b5a66f3..a72d356a 100644 --- a/src/tc_bi_ortho/tc_natorb.irp.f +++ b/src/tc_bi_ortho/tc_natorb.irp.f @@ -32,19 +32,17 @@ thr_d = 1.d-6 thr_nd = 1.d-6 thr_deg = 1.d-3 - if(n_core_orb.ne.0)then -! print*,'core orbitals' -! pause - call diag_mat_per_fock_degen_core( fock_diag, dm_tmp, list_core, n_core_orb, mo_num, thr_d, thr_nd, thr_deg & - , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) - else - call diag_mat_per_fock_degen( fock_diag, dm_tmp, mo_num, thr_d, thr_nd, thr_deg & - , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) - endif -! call non_hrmt_bieig( mo_num, dm_tmp& -! , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo& -! , mo_num, natorb_tc_eigval ) +! if(n_core_orb.ne.0)then +! call diag_mat_per_fock_degen_core( fock_diag, dm_tmp, list_core, n_core_orb, mo_num, thr_d, thr_nd, thr_deg & +! , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) +! else +! call diag_mat_per_fock_degen( fock_diag, dm_tmp, mo_num, thr_d, thr_nd, thr_deg & +! , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) +! endif + call non_hrmt_bieig(mo_num, dm_tmp, thresh_biorthog_diag, thresh_biorthog_nondiag & + , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo & + , mo_num, natorb_tc_eigval ) accu = 0.d0 do i = 1, mo_num print*,'natorb_tc_eigval(i) = ',-natorb_tc_eigval(i) diff --git a/src/tc_bi_ortho/test_tc_two_rdm.irp.f b/src/tc_bi_ortho/test_tc_two_rdm.irp.f index 3e556312..044c31e0 100644 --- a/src/tc_bi_ortho/test_tc_two_rdm.irp.f +++ b/src/tc_bi_ortho/test_tc_two_rdm.irp.f @@ -25,49 +25,47 @@ end subroutine test implicit none - integer :: h1,p1,h2,p2,i,j,istate - double precision :: rdm, integral, accu,ref + integer :: h1,p1,h2,p2,i,j,istate,s1,s2 + double precision :: rdm, integral, accu,ref, accu_new ,rdm_new double precision :: hmono, htwoe, hthree, htot accu = 0.d0 + accu_new = 0.d0 do h1 = 1, mo_num do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num integral = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) rdm = tc_two_rdm(p1,h1,p2,h2) + accu += integral * rdm + rdm_new = 0.d0 + do s2 = 1, 2 + do s1 = 1, 2 + rdm_new += tc_two_rdm_chemist_s1s2(p1,h1,p2,h2,s1,s2) + enddo + enddo + accu_new += integral * rdm_new ! if(dabs(rdm).gt.1.d-10)then ! print*,h1,p1,h2,p2 ! print*,rdm,integral,rdm*integral ! endif - accu += integral * rdm enddo enddo enddo enddo accu *= 0.5d0 - print*,'accu = ',accu -! print*,mo_bi_ortho_tc_two_e(2,15,2,1) -! print*,mo_bi_ortho_tc_two_e(15,2,2,1) -! print*,mo_bi_ortho_tc_two_e(2,1,2,15) -! print*,mo_bi_ortho_tc_two_e(2,1,15,2) + accu_new *= 0.5d0 + print*,'accu = ',accu + print*,'accu_new = ',accu_new ref = 0.d0 do i = 1, N_det do j = 1, N_det -! if(i.eq.j)cycle call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) do istate = 1,N_states -! print*,'i,j',i,j -! print*,psi_l_coef_bi_ortho(i,istate) , psi_r_coef_bi_ortho(j,istate) , htwoe -! print*,psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * htwoe -! if(i.ne.j)then -! print*,psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) , htwoe -! print*,psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * htwoe -! endif ref += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * state_average_weight(istate) * htwoe enddo enddo enddo - print*,' ref = ',ref + print*,' ref = ',ref print*,'delta= ',ref-accu end diff --git a/src/tc_bi_ortho/two_rdm_naive.irp.f b/src/tc_bi_ortho/two_rdm_naive.irp.f index 3963d09e..d21d6a87 100644 --- a/src/tc_bi_ortho/two_rdm_naive.irp.f +++ b/src/tc_bi_ortho/two_rdm_naive.irp.f @@ -1,4 +1,5 @@ -BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, mo_num)] + BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist_s1s2, (mo_num, mo_num, mo_num, mo_num, 2,2)] implicit none BEGIN_DOC ! tc_two_rdm_chemist(p,s,q,r) = = CHEMIST NOTATION @@ -14,6 +15,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, other_spin(2) = 1 allocate(occ(N_int*bit_kind_size,2)) tc_two_rdm_chemist = 0.d0 + tc_two_rdm_chemist_s1s2 = 0.d0 do i = 1, N_det ! psi_left do j = 1, N_det ! psi_right @@ -21,14 +23,16 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, if(degree.gt.2)cycle if(degree.gt.0)then ! get excitation operators: from psi_det(j) --> psi_det(i) - call get_excitation(psi_det(1,1,j),psi_det(1,1,i),exc,degree,phase,N_int) - call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) - contrib = psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * phase * state_average_weight(1) - do istate = 2, N_states - contrib += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * phase * state_average_weight(istate) - enddo + ! T_{j-->i} = a^p1_s1 a_h1_s1 + call get_excitation(psi_det(1,1,j),psi_det(1,1,i),exc,degree,phase,N_int) + call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) + contrib = psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * phase * state_average_weight(1) + do istate = 2, N_states + contrib += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * phase * state_average_weight(istate) + enddo if(degree == 2)then call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) +! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) else if(degree==1)then ! occupation of the determinant psi_det(j) call bitstring_to_list_ab(psi_det(1,1,j), occ, n_occ_ab, N_int) @@ -40,6 +44,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, h2 = m p2 = m call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) +! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo ! run over the electrons of same spin than the excitation s2 = s1 @@ -48,6 +53,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, h2 = m p2 = m call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) +! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo endif else if(degree == 0)then @@ -69,6 +75,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, h2 = m p2 = m call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) +! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo ! run over the couple of alpha-alpha electrons s2 = s1 @@ -78,6 +85,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, p2 = m if(h2.le.h1)cycle call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) +! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo enddo s1 = 2 @@ -92,6 +100,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, p2 = m if(h2.le.h1)cycle call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) +! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo enddo endif @@ -124,12 +133,13 @@ subroutine update_tc_rdm(h1,p1,h2,p2,s1,s2,array,sze,contrib) end -BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] + BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, tc_two_rdm_s1s2, (mo_num, mo_num, mo_num, mo_num,2,2)] implicit none BEGIN_DOC ! tc_two_rdm(p,q,s,r) = = PHYSICIST NOTATION END_DOC - integer :: p,q,r,s + integer :: p,q,r,s,s1,s2 do r = 1, mo_num do q = 1, mo_num do s = 1, mo_num @@ -139,5 +149,18 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] enddo enddo enddo + do s2 = 1, 2 + do s1 = 1, 2 + do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + do p = 1, mo_num + tc_two_rdm_s1s2(p,q,s,r,s1,s2) = tc_two_rdm_chemist_s1s2(p,s,q,r,s1,s2) + enddo + enddo + enddo + enddo + enddo + enddo END_PROVIDER From 8b14a2b7ab3a6138ce8410671f8edd18640979b7 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 13 Sep 2023 12:58:26 +0200 Subject: [PATCH 284/337] Added spin dependent two-rdm. --- src/tc_bi_ortho/tc_natorb.irp.f | 22 +++++++------- src/tc_bi_ortho/test_tc_two_rdm.irp.f | 32 ++++++++++----------- src/tc_bi_ortho/two_rdm_naive.irp.f | 41 +++++++++++++++++++++------ 3 files changed, 57 insertions(+), 38 deletions(-) diff --git a/src/tc_bi_ortho/tc_natorb.irp.f b/src/tc_bi_ortho/tc_natorb.irp.f index 1b5a66f3..a72d356a 100644 --- a/src/tc_bi_ortho/tc_natorb.irp.f +++ b/src/tc_bi_ortho/tc_natorb.irp.f @@ -32,19 +32,17 @@ thr_d = 1.d-6 thr_nd = 1.d-6 thr_deg = 1.d-3 - if(n_core_orb.ne.0)then -! print*,'core orbitals' -! pause - call diag_mat_per_fock_degen_core( fock_diag, dm_tmp, list_core, n_core_orb, mo_num, thr_d, thr_nd, thr_deg & - , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) - else - call diag_mat_per_fock_degen( fock_diag, dm_tmp, mo_num, thr_d, thr_nd, thr_deg & - , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) - endif -! call non_hrmt_bieig( mo_num, dm_tmp& -! , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo& -! , mo_num, natorb_tc_eigval ) +! if(n_core_orb.ne.0)then +! call diag_mat_per_fock_degen_core( fock_diag, dm_tmp, list_core, n_core_orb, mo_num, thr_d, thr_nd, thr_deg & +! , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) +! else +! call diag_mat_per_fock_degen( fock_diag, dm_tmp, mo_num, thr_d, thr_nd, thr_deg & +! , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) +! endif + call non_hrmt_bieig(mo_num, dm_tmp, thresh_biorthog_diag, thresh_biorthog_nondiag & + , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo & + , mo_num, natorb_tc_eigval ) accu = 0.d0 do i = 1, mo_num print*,'natorb_tc_eigval(i) = ',-natorb_tc_eigval(i) diff --git a/src/tc_bi_ortho/test_tc_two_rdm.irp.f b/src/tc_bi_ortho/test_tc_two_rdm.irp.f index 3e556312..044c31e0 100644 --- a/src/tc_bi_ortho/test_tc_two_rdm.irp.f +++ b/src/tc_bi_ortho/test_tc_two_rdm.irp.f @@ -25,49 +25,47 @@ end subroutine test implicit none - integer :: h1,p1,h2,p2,i,j,istate - double precision :: rdm, integral, accu,ref + integer :: h1,p1,h2,p2,i,j,istate,s1,s2 + double precision :: rdm, integral, accu,ref, accu_new ,rdm_new double precision :: hmono, htwoe, hthree, htot accu = 0.d0 + accu_new = 0.d0 do h1 = 1, mo_num do p1 = 1, mo_num do h2 = 1, mo_num do p2 = 1, mo_num integral = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) rdm = tc_two_rdm(p1,h1,p2,h2) + accu += integral * rdm + rdm_new = 0.d0 + do s2 = 1, 2 + do s1 = 1, 2 + rdm_new += tc_two_rdm_chemist_s1s2(p1,h1,p2,h2,s1,s2) + enddo + enddo + accu_new += integral * rdm_new ! if(dabs(rdm).gt.1.d-10)then ! print*,h1,p1,h2,p2 ! print*,rdm,integral,rdm*integral ! endif - accu += integral * rdm enddo enddo enddo enddo accu *= 0.5d0 - print*,'accu = ',accu -! print*,mo_bi_ortho_tc_two_e(2,15,2,1) -! print*,mo_bi_ortho_tc_two_e(15,2,2,1) -! print*,mo_bi_ortho_tc_two_e(2,1,2,15) -! print*,mo_bi_ortho_tc_two_e(2,1,15,2) + accu_new *= 0.5d0 + print*,'accu = ',accu + print*,'accu_new = ',accu_new ref = 0.d0 do i = 1, N_det do j = 1, N_det -! if(i.eq.j)cycle call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) do istate = 1,N_states -! print*,'i,j',i,j -! print*,psi_l_coef_bi_ortho(i,istate) , psi_r_coef_bi_ortho(j,istate) , htwoe -! print*,psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * htwoe -! if(i.ne.j)then -! print*,psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) , htwoe -! print*,psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * htwoe -! endif ref += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * state_average_weight(istate) * htwoe enddo enddo enddo - print*,' ref = ',ref + print*,' ref = ',ref print*,'delta= ',ref-accu end diff --git a/src/tc_bi_ortho/two_rdm_naive.irp.f b/src/tc_bi_ortho/two_rdm_naive.irp.f index 3963d09e..d21d6a87 100644 --- a/src/tc_bi_ortho/two_rdm_naive.irp.f +++ b/src/tc_bi_ortho/two_rdm_naive.irp.f @@ -1,4 +1,5 @@ -BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, mo_num)] + BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist_s1s2, (mo_num, mo_num, mo_num, mo_num, 2,2)] implicit none BEGIN_DOC ! tc_two_rdm_chemist(p,s,q,r) = = CHEMIST NOTATION @@ -14,6 +15,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, other_spin(2) = 1 allocate(occ(N_int*bit_kind_size,2)) tc_two_rdm_chemist = 0.d0 + tc_two_rdm_chemist_s1s2 = 0.d0 do i = 1, N_det ! psi_left do j = 1, N_det ! psi_right @@ -21,14 +23,16 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, if(degree.gt.2)cycle if(degree.gt.0)then ! get excitation operators: from psi_det(j) --> psi_det(i) - call get_excitation(psi_det(1,1,j),psi_det(1,1,i),exc,degree,phase,N_int) - call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) - contrib = psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * phase * state_average_weight(1) - do istate = 2, N_states - contrib += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * phase * state_average_weight(istate) - enddo + ! T_{j-->i} = a^p1_s1 a_h1_s1 + call get_excitation(psi_det(1,1,j),psi_det(1,1,i),exc,degree,phase,N_int) + call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) + contrib = psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * phase * state_average_weight(1) + do istate = 2, N_states + contrib += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * phase * state_average_weight(istate) + enddo if(degree == 2)then call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) +! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) else if(degree==1)then ! occupation of the determinant psi_det(j) call bitstring_to_list_ab(psi_det(1,1,j), occ, n_occ_ab, N_int) @@ -40,6 +44,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, h2 = m p2 = m call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) +! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo ! run over the electrons of same spin than the excitation s2 = s1 @@ -48,6 +53,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, h2 = m p2 = m call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) +! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo endif else if(degree == 0)then @@ -69,6 +75,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, h2 = m p2 = m call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) +! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo ! run over the couple of alpha-alpha electrons s2 = s1 @@ -78,6 +85,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, p2 = m if(h2.le.h1)cycle call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) +! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo enddo s1 = 2 @@ -92,6 +100,7 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, p2 = m if(h2.le.h1)cycle call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) +! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo enddo endif @@ -124,12 +133,13 @@ subroutine update_tc_rdm(h1,p1,h2,p2,s1,s2,array,sze,contrib) end -BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] + BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, tc_two_rdm_s1s2, (mo_num, mo_num, mo_num, mo_num,2,2)] implicit none BEGIN_DOC ! tc_two_rdm(p,q,s,r) = = PHYSICIST NOTATION END_DOC - integer :: p,q,r,s + integer :: p,q,r,s,s1,s2 do r = 1, mo_num do q = 1, mo_num do s = 1, mo_num @@ -139,5 +149,18 @@ BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] enddo enddo enddo + do s2 = 1, 2 + do s1 = 1, 2 + do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + do p = 1, mo_num + tc_two_rdm_s1s2(p,q,s,r,s1,s2) = tc_two_rdm_chemist_s1s2(p,s,q,r,s1,s2) + enddo + enddo + enddo + enddo + enddo + enddo END_PROVIDER From 6ba3f48acb7e4016ca5adf425d39e646c6e6628c Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 13 Sep 2023 18:28:52 +0200 Subject: [PATCH 285/337] added general Slater rules --- src/tc_bi_ortho/h_mat_triple.irp.f | 330 ++++++++++++++++++++++++++ src/tc_bi_ortho/slater_tc_opt.irp.f | 31 ++- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 14 +- src/tc_bi_ortho/test_tc_two_rdm.irp.f | 8 +- src/tc_bi_ortho/two_rdm_naive.irp.f | 12 +- 5 files changed, 373 insertions(+), 22 deletions(-) create mode 100644 src/tc_bi_ortho/h_mat_triple.irp.f diff --git a/src/tc_bi_ortho/h_mat_triple.irp.f b/src/tc_bi_ortho/h_mat_triple.irp.f new file mode 100644 index 00000000..5e1d32fe --- /dev/null +++ b/src/tc_bi_ortho/h_mat_triple.irp.f @@ -0,0 +1,330 @@ +subroutine get_excitation_general(key_i,key_j, Nint,degree_array,holes_array, particles_array,phase) + use bitmasks + BEGIN_DOC +! returns the array, for each spin, of holes/particles between key_i and key_j +! +! with the following convention: a^+_{particle} a_{hole}|key_i> = |key_j> + END_DOC + include 'utils/constants.include.F' + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: holes_array(100,2),particles_array(100,2),degree_array(2) + double precision, intent(out) :: phase + integer :: ispin,k,i,pos + integer(bit_kind) :: key_hole, key_particle + integer(bit_kind) :: xorvec(N_int_max,2) + holes_array = -1 + particles_array = -1 + degree_array = 0 + do i = 1, N_int + xorvec(i,1) = xor( key_i(i,1), key_j(i,1)) + xorvec(i,2) = xor( key_i(i,2), key_j(i,2)) + degree_array(1) += popcnt(xorvec(i,1)) + degree_array(2) += popcnt(xorvec(i,2)) + enddo + degree_array(1) = shiftr(degree_array(1),1) + degree_array(2) = shiftr(degree_array(2),1) + + do ispin = 1, 2 + k = 1 + !!! GETTING THE HOLES + do i = 1, N_int + key_hole = iand(xorvec(i,ispin),key_i(i,ispin)) + do while(key_hole .ne.0_bit_kind) + pos = trailz(key_hole) + holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_hole = ibclr(key_hole,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_excitation_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo + do ispin = 1, 2 + k = 1 + !!! GETTING THE PARTICLES + do i = 1, N_int + key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) + do while(key_particle .ne.0_bit_kind) + pos = trailz(key_particle) + particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_particle = ibclr(key_particle,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_excitation_general ' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo + integer :: h,p, i_ok + integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) + integer :: exc(0:2,2,2) + double precision :: phase_tmp + allocate(det_i(Nint,2),det_ip(N_int,2)) + det_i = key_i + phase = 1.d0 + do ispin = 1, 2 + do i = 1, degree_array(ispin) + h = holes_array(i,ispin) + p = particles_array(i,ispin) + det_ip = det_i + call do_single_excitation(det_ip,h,p,ispin,i_ok) + if(i_ok == -1)then + print*,'excitation was not possible ' + stop + endif + call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) + phase *= phase_tmp + det_i = det_ip + enddo + enddo + +end + +subroutine get_holes_general(key_i, key_j,Nint, holes_array) + use bitmasks + BEGIN_DOC +! returns the array, per spin, of holes between key_i and key_j +! +! with the following convention: a_{hole}|key_i> --> |key_j> + END_DOC + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: holes_array(100,2) + integer(bit_kind) :: key_hole + integer :: ispin,k,i,pos + holes_array = -1 + do ispin = 1, 2 + k = 1 + do i = 1, N_int + key_hole = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_i(i,ispin)) + do while(key_hole .ne.0_bit_kind) + pos = trailz(key_hole) + holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_hole = ibclr(key_hole,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_holes_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo +end + +subroutine get_particles_general(key_i, key_j,Nint,particles_array) + use bitmasks + BEGIN_DOC +! returns the array, per spin, of particles between key_i and key_j +! +! with the following convention: a^dagger_{particle}|key_i> --> |key_j> + END_DOC + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: particles_array(100,2) + integer(bit_kind) :: key_particle + integer :: ispin,k,i,pos + particles_array = -1 + do ispin = 1, 2 + k = 1 + do i = 1, N_int + key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) + do while(key_particle .ne.0_bit_kind) + pos = trailz(key_particle) + particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_particle = ibclr(key_particle,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_holes_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'Those are the two determinants' + call debug_det(key_i, N_int) + call debug_det(key_j, N_int) + print*,'stoping ...' + stop + endif + enddo + enddo + enddo +end + +subroutine get_phase_general(key_i,Nint,degree, holes_array, particles_array,phase) + implicit none + integer, intent(in) :: degree(2), Nint + integer(bit_kind), intent(in) :: key_i(Nint,2) + integer, intent(in) :: holes_array(100,2),particles_array(100,2) + double precision, intent(out) :: phase + integer :: i,ispin,h,p, i_ok + integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) + integer :: exc(0:2,2,2) + double precision :: phase_tmp + allocate(det_i(Nint,2),det_ip(N_int,2)) + det_i = key_i + phase = 1.d0 + do ispin = 1, 2 + do i = 1, degree(ispin) + h = holes_array(i,ispin) + p = particles_array(i,ispin) + det_ip = det_i + call do_single_excitation(det_ip,h,p,ispin,i_ok) + if(i_ok == -1)then + print*,'excitation was not possible ' + stop + endif + call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) + phase *= phase_tmp + det_i = det_ip + enddo + enddo + +end + +subroutine H_tc_s2_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze) + BEGIN_DOC + ! Computes $v_0 = H^TC | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + call H_tc_s2_u_0_opt(v_0, s_0, u_0, N_st, sze) + integer :: i,j,degree,ist + double precision :: hmono, htwoe, hthree, htot + do i = 1, N_det + do j = 1, N_det + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if(degree .ne. 3)cycle + call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,i), psi_det(1,1,j), hmono, htwoe, hthree, htot) + do ist = 1, N_st + v_0(i,ist) += htot * u_0(j,ist) + enddo + enddo + enddo +end + +! --- + +subroutine H_tc_s2_dagger_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze) + BEGIN_DOC + ! Computes $v_0 = (H^TC)^dagger | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + call H_tc_s2_dagger_u_0_opt(v_0, s_0, u_0, N_st, sze) + integer :: i,j,degree,ist + double precision :: hmono, htwoe, hthree, htot + do i = 1, N_det + do j = 1, N_det + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if(degree .ne. 3)cycle + call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,j), psi_det(1,1,i), hmono, htwoe, hthree, htot) + do ist = 1, N_st + v_0(i,ist) += htot * u_0(j,ist) + enddo + enddo + enddo +end + +! --- +subroutine triple_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) + use bitmasks + BEGIN_DOC +! for triple excitation +!! +!! WARNING !! +! +! Genuine triple excitations of the same spin are not yet implemented + END_DOC + implicit none + integer(bit_kind), intent(in) :: key_j(N_int,2),key_i(N_int,2) + integer, intent(in) :: Nint + double precision, intent(out) :: hmono, htwoe, hthree, htot + integer :: occ(N_int*bit_kind_size,2) + integer :: Ne(2),i,j,ii,jj,ispin,jspin,k,kk + integer :: degree,exc_double(0:2,2,2),exc_single(0:2,2,2) + integer :: degree_alpha,degree_beta + integer :: h1, p1, h2, p2, s1, s2, h3, p3, s3, h4, p4, s4 + double precision :: phase_double, phase_single + integer(bit_kind) :: key_j_alpha(N_int,2),key_i_alpha(N_int,2) + integer(bit_kind) :: key_j_beta(N_int,2),key_i_beta(N_int,2) + integer :: other_spin(2) + + hmono = 0.d0 + htwoe = 0.d0 + hthree = 0.d0 + htot = 0.d0 + call bitstring_to_list_ab(key_i,occ,Ne,N_int) + call get_excitation_degree(key_i,key_j,degree,N_int) + if(degree.ne.3)then + return + endif + other_spin(1) = 2 + other_spin(2) = 1 + do i = 1, N_int + key_j_alpha(i,1) = key_j(i,1) + key_j_alpha(i,2) = 0_bit_kind + key_i_alpha(i,1) = key_i(i,1) + key_i_alpha(i,2) = 0_bit_kind + + key_j_beta(i,2) = key_j(i,2) + key_j_beta(i,1) = 0_bit_kind + key_i_beta(i,2) = key_i(i,2) + key_i_beta(i,1) = 0_bit_kind + enddo + ! check whether it is a triple excitation of the same spin + + call get_excitation_degree(key_i_alpha,key_j_alpha,degree_alpha,N_int) + call get_excitation_degree(key_i_beta,key_j_beta,degree_beta,N_int) + if(degree_alpha==3.or.degree_beta==3)then + return + else + if(degree_alpha == 2.and.degree_beta == 1)then ! double alpha + single beta + call get_double_excitation(key_i_alpha,key_j_alpha,exc_double,phase_double,N_int) + call decode_exc(exc_double,2,h1,p1,h2,p2,s1,s2) + call get_single_excitation(key_i_beta,key_j_beta,exc_single,phase_single,N_int) + call decode_exc(exc_single,1,h3,p3,h4,p4,s3,s4) + else if(degree_beta == 2 .and. degree_alpha == 1)then ! double beta + single alpha + call get_double_excitation(key_i_beta,key_j_beta,exc_double,phase_double,N_int) + call decode_exc(exc_double,2,h1,p1,h2,p2,s1,s2) + call get_single_excitation(key_i_alpha,key_j_alpha,exc_single,phase_single,N_int) + call decode_exc(exc_single,1,h3,p3,h4,p4,s3,s4) + else + print*,'PB !!' + print*,'degree_beta, degree_alpha',degree_beta, degree_alpha + print*,'degree',degree + stop + endif + hthree = three_body_ints_bi_ort(p3,p2,p1,h3,h2,h1) - three_body_ints_bi_ort(p3,p2,p1,h3,h1,h2) + hthree *= phase_single * phase_double + endif + htot = hthree + end + diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index 72f55aca..c69632f6 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -85,14 +85,29 @@ subroutine htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, hthree = 0.D0 call get_excitation_degree(key_i, key_j, degree, Nint) - if(degree.gt.2) return - - if(degree == 0) then - call diag_htilde_mu_mat_fock_bi_ortho (Nint, key_i, hmono, htwoe, hthree, htot) - else if (degree == 1) then - call single_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i , hmono, htwoe, hthree, htot) - else if(degree == 2) then - call double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) + if(.not.pure_three_body_h_tc)then + if(degree.gt.2) return + if(degree == 0) then + call diag_htilde_mu_mat_fock_bi_ortho (Nint, key_i, hmono, htwoe, hthree, htot) + else if (degree == 1) then + call single_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i , hmono, htwoe, hthree, htot) + else if(degree == 2) then + call double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) + endif + else + if(degree==3)then + print*,'degree == 3' + endif + if(degree.gt.3) return + if(degree == 0) then + call diag_htilde_mu_mat_fock_bi_ortho (Nint, key_i, hmono, htwoe, hthree, htot) + else if (degree == 1) then + call single_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i , hmono, htwoe, hthree, htot) + else if(degree == 2) then + call double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) + else + call triple_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) + endif endif if(degree==0) then diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index 48257943..7cb23d77 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -225,6 +225,8 @@ end external H_tc_dagger_u_0_opt external H_tc_s2_dagger_u_0_opt external H_tc_s2_u_0_opt + external H_tc_s2_dagger_u_0_with_pure_three + external H_tc_s2_u_0_with_pure_three allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag)) @@ -250,7 +252,11 @@ end converged = .False. i_it = 0 do while (.not.converged) - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + if(.not.pure_three_body_h_tc)then + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + else + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_with_pure_three) + endif i_it += 1 if(i_it .gt. 5) exit enddo @@ -275,7 +281,11 @@ end converged = .False. i_it = 0 do while (.not. converged) - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_opt) + if(.not.pure_three_body_h_tc)then + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_opt) + else + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_with_pure_three) + endif i_it += 1 if(i_it .gt. 5) exit enddo diff --git a/src/tc_bi_ortho/test_tc_two_rdm.irp.f b/src/tc_bi_ortho/test_tc_two_rdm.irp.f index 044c31e0..68b96f37 100644 --- a/src/tc_bi_ortho/test_tc_two_rdm.irp.f +++ b/src/tc_bi_ortho/test_tc_two_rdm.irp.f @@ -35,19 +35,15 @@ subroutine test do h2 = 1, mo_num do p2 = 1, mo_num integral = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) - rdm = tc_two_rdm(p1,h1,p2,h2) + rdm = tc_two_rdm(p2,p1,h2,h1) accu += integral * rdm rdm_new = 0.d0 do s2 = 1, 2 do s1 = 1, 2 - rdm_new += tc_two_rdm_chemist_s1s2(p1,h1,p2,h2,s1,s2) + rdm_new += tc_two_rdm_s1s2(p2,p1,h2,h1,s1,s2) enddo enddo accu_new += integral * rdm_new -! if(dabs(rdm).gt.1.d-10)then -! print*,h1,p1,h2,p2 -! print*,rdm,integral,rdm*integral -! endif enddo enddo enddo diff --git a/src/tc_bi_ortho/two_rdm_naive.irp.f b/src/tc_bi_ortho/two_rdm_naive.irp.f index d21d6a87..90163de5 100644 --- a/src/tc_bi_ortho/two_rdm_naive.irp.f +++ b/src/tc_bi_ortho/two_rdm_naive.irp.f @@ -32,7 +32,7 @@ enddo if(degree == 2)then call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) -! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) else if(degree==1)then ! occupation of the determinant psi_det(j) call bitstring_to_list_ab(psi_det(1,1,j), occ, n_occ_ab, N_int) @@ -44,7 +44,7 @@ h2 = m p2 = m call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) -! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo ! run over the electrons of same spin than the excitation s2 = s1 @@ -53,7 +53,7 @@ h2 = m p2 = m call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) -! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo endif else if(degree == 0)then @@ -75,7 +75,7 @@ h2 = m p2 = m call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) -! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo ! run over the couple of alpha-alpha electrons s2 = s1 @@ -85,7 +85,7 @@ p2 = m if(h2.le.h1)cycle call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) -! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo enddo s1 = 2 @@ -100,7 +100,7 @@ p2 = m if(h2.le.h1)cycle call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) -! call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) enddo enddo endif From 14edfa839b39d3b43687920498e177c96bc3d41b Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 13 Sep 2023 18:42:50 +0200 Subject: [PATCH 286/337] added full three body --- src/tc_bi_ortho/h_mat_triple.irp.f | 85 +++++++++++++---------------- src/tc_bi_ortho/slater_tc_opt.irp.f | 3 - 2 files changed, 39 insertions(+), 49 deletions(-) diff --git a/src/tc_bi_ortho/h_mat_triple.irp.f b/src/tc_bi_ortho/h_mat_triple.irp.f index 5e1d32fe..5f332599 100644 --- a/src/tc_bi_ortho/h_mat_triple.irp.f +++ b/src/tc_bi_ortho/h_mat_triple.irp.f @@ -267,64 +267,57 @@ subroutine triple_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, integer(bit_kind), intent(in) :: key_j(N_int,2),key_i(N_int,2) integer, intent(in) :: Nint double precision, intent(out) :: hmono, htwoe, hthree, htot - integer :: occ(N_int*bit_kind_size,2) - integer :: Ne(2),i,j,ii,jj,ispin,jspin,k,kk - integer :: degree,exc_double(0:2,2,2),exc_single(0:2,2,2) - integer :: degree_alpha,degree_beta - integer :: h1, p1, h2, p2, s1, s2, h3, p3, s3, h4, p4, s4 - double precision :: phase_double, phase_single - integer(bit_kind) :: key_j_alpha(N_int,2),key_i_alpha(N_int,2) - integer(bit_kind) :: key_j_beta(N_int,2),key_i_beta(N_int,2) - integer :: other_spin(2) + integer :: degree + integer :: h1, p1, h2, p2, s1, s2, h3, p3, s3 + integer :: holes_array(100,2),particles_array(100,2),degree_array(2) + double precision :: phase,sym_3_e_int_from_6_idx_tensor hmono = 0.d0 htwoe = 0.d0 hthree = 0.d0 htot = 0.d0 - call bitstring_to_list_ab(key_i,occ,Ne,N_int) - call get_excitation_degree(key_i,key_j,degree,N_int) - if(degree.ne.3)then - return - endif - other_spin(1) = 2 - other_spin(2) = 1 - do i = 1, N_int - key_j_alpha(i,1) = key_j(i,1) - key_j_alpha(i,2) = 0_bit_kind - key_i_alpha(i,1) = key_i(i,1) - key_i_alpha(i,2) = 0_bit_kind - - key_j_beta(i,2) = key_j(i,2) - key_j_beta(i,1) = 0_bit_kind - key_i_beta(i,2) = key_i(i,2) - key_i_beta(i,1) = 0_bit_kind - enddo - ! check whether it is a triple excitation of the same spin - - call get_excitation_degree(key_i_alpha,key_j_alpha,degree_alpha,N_int) - call get_excitation_degree(key_i_beta,key_j_beta,degree_beta,N_int) - if(degree_alpha==3.or.degree_beta==3)then - return + call get_excitation_general(key_j, key_i, Nint,degree_array,holes_array, particles_array,phase) + degree = degree_array(1) + degree_array(2) + if(degree .ne. 3)return + if(degree_array(1)==3.or.degree_array(2)==3)then + if(degree_array(1) == 3)then + h1 = holes_array(1,1) + h2 = holes_array(2,1) + h3 = holes_array(1,1) + p1 = particles_array(1,1) + p2 = particles_array(2,1) + p3 = particles_array(1,1) + else + h1 = holes_array(1,2) + h2 = holes_array(2,2) + h3 = holes_array(1,2) + p1 = particles_array(1,2) + p2 = particles_array(2,2) + p3 = particles_array(1,2) + endif + hthree = sym_3_e_int_from_6_idx_tensor(p3, p2, p1, h3, h2, h1) else - if(degree_alpha == 2.and.degree_beta == 1)then ! double alpha + single beta - call get_double_excitation(key_i_alpha,key_j_alpha,exc_double,phase_double,N_int) - call decode_exc(exc_double,2,h1,p1,h2,p2,s1,s2) - call get_single_excitation(key_i_beta,key_j_beta,exc_single,phase_single,N_int) - call decode_exc(exc_single,1,h3,p3,h4,p4,s3,s4) - else if(degree_beta == 2 .and. degree_alpha == 1)then ! double beta + single alpha - call get_double_excitation(key_i_beta,key_j_beta,exc_double,phase_double,N_int) - call decode_exc(exc_double,2,h1,p1,h2,p2,s1,s2) - call get_single_excitation(key_i_alpha,key_j_alpha,exc_single,phase_single,N_int) - call decode_exc(exc_single,1,h3,p3,h4,p4,s3,s4) + if(degree_array(1) == 2.and.degree_array(2) == 1)then ! double alpha + single beta + h1 = holes_array(1,1) + h2 = holes_array(2,1) + h3 = holes_array(1,2) + p1 = particles_array(1,1) + p2 = particles_array(2,1) + p3 = particles_array(1,2) + else if(degree_array(2) == 2 .and. degree_array(1) == 1)then ! double beta + single alpha + h1 = holes_array(1,2) + h2 = holes_array(2,2) + h3 = holes_array(1,1) + p1 = particles_array(1,2) + p2 = particles_array(2,2) + p3 = particles_array(1,1) else print*,'PB !!' - print*,'degree_beta, degree_alpha',degree_beta, degree_alpha - print*,'degree',degree stop endif hthree = three_body_ints_bi_ort(p3,p2,p1,h3,h2,h1) - three_body_ints_bi_ort(p3,p2,p1,h3,h1,h2) - hthree *= phase_single * phase_double endif + hthree *= phase htot = hthree end diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index c69632f6..e398d8f2 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -95,9 +95,6 @@ subroutine htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, call double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) endif else - if(degree==3)then - print*,'degree == 3' - endif if(degree.gt.3) return if(degree == 0) then call diag_htilde_mu_mat_fock_bi_ortho (Nint, key_i, hmono, htwoe, hthree, htot) From 73fc6078caf1a818dfdee1b117c6b20df0aee443 Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 14 Sep 2023 19:55:41 +0200 Subject: [PATCH 287/337] fixed stupid bug in purely parallel spin triple excitation term --- src/tc_bi_ortho/h_mat_triple.irp.f | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tc_bi_ortho/h_mat_triple.irp.f b/src/tc_bi_ortho/h_mat_triple.irp.f index 5f332599..8d5d1ce4 100644 --- a/src/tc_bi_ortho/h_mat_triple.irp.f +++ b/src/tc_bi_ortho/h_mat_triple.irp.f @@ -283,17 +283,17 @@ subroutine triple_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, if(degree_array(1) == 3)then h1 = holes_array(1,1) h2 = holes_array(2,1) - h3 = holes_array(1,1) + h3 = holes_array(3,1) p1 = particles_array(1,1) p2 = particles_array(2,1) - p3 = particles_array(1,1) + p3 = particles_array(3,1) else h1 = holes_array(1,2) h2 = holes_array(2,2) - h3 = holes_array(1,2) + h3 = holes_array(3,2) p1 = particles_array(1,2) p2 = particles_array(2,2) - p3 = particles_array(1,2) + p3 = particles_array(3,2) endif hthree = sym_3_e_int_from_6_idx_tensor(p3, p2, p1, h3, h2, h1) else From 9b4082c2350a66e5c70296efdd4401d3e8cc49cc Mon Sep 17 00:00:00 2001 From: eginer Date: Fri, 15 Sep 2023 01:06:32 +0200 Subject: [PATCH 288/337] added OMP loops in H_tc_triple psi --- src/tc_bi_ortho/h_mat_triple.irp.f | 68 ++++++++++++++++++ src/tc_bi_ortho/slater_tc_opt.irp.f | 3 + src/tc_bi_ortho/tc_h_eigvectors.irp.f | 8 +-- src/tc_bi_ortho/test_s2_tc.irp.f | 100 +++++++++++++------------- 4 files changed, 125 insertions(+), 54 deletions(-) diff --git a/src/tc_bi_ortho/h_mat_triple.irp.f b/src/tc_bi_ortho/h_mat_triple.irp.f index 8d5d1ce4..4c8c107a 100644 --- a/src/tc_bi_ortho/h_mat_triple.irp.f +++ b/src/tc_bi_ortho/h_mat_triple.irp.f @@ -221,6 +221,40 @@ subroutine H_tc_s2_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze) enddo end +subroutine H_tc_s2_u_0_with_pure_three_omp(v_0, s_0, u_0, N_st, sze) + BEGIN_DOC + ! Computes $v_0 = H^TC | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + call H_tc_s2_u_0_opt(v_0, s_0, u_0, N_st, sze) + integer :: i,j,degree,ist + double precision :: hmono, htwoe, hthree, htot + !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & + !$OMP SHARED(N_st, N_det, N_int, psi_det, u_0, v_0) & + !$OMP PRIVATE(ist, i, j, degree, hmono, htwoe, hthree,htot) + do i = 1, N_det + do j = 1, N_det + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if(degree .ne. 3)cycle + call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,i), psi_det(1,1,j), hmono, htwoe, hthree, htot) + do ist = 1, N_st + v_0(i,ist) += htot * u_0(j,ist) + enddo + enddo + enddo + !$OMP END PARALLEL DO +end + ! --- subroutine H_tc_s2_dagger_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze) @@ -253,6 +287,40 @@ subroutine H_tc_s2_dagger_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze) enddo end +subroutine H_tc_s2_dagger_u_0_with_pure_three_omp(v_0, s_0, u_0, N_st, sze) + BEGIN_DOC + ! Computes $v_0 = (H^TC)^dagger | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + call H_tc_s2_dagger_u_0_opt(v_0, s_0, u_0, N_st, sze) + integer :: i,j,degree,ist + double precision :: hmono, htwoe, hthree, htot + !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & + !$OMP SHARED(N_st, N_det, N_int, psi_det, u_0, v_0) & + !$OMP PRIVATE(ist, i, j, degree, hmono, htwoe, hthree,htot) + do i = 1, N_det + do j = 1, N_det + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if(degree .ne. 3)cycle + call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,j), psi_det(1,1,i), hmono, htwoe, hthree, htot) + do ist = 1, N_st + v_0(i,ist) += htot * u_0(j,ist) + enddo + enddo + enddo + !$OMP END PARALLEL DO +end + ! --- subroutine triple_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) use bitmasks diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index e398d8f2..ab21d3e8 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -19,6 +19,9 @@ subroutine provide_all_three_ints_bi_ortho() if(three_e_4_idx_term) then PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort endif + if(pure_three_body_h_tc)then + provide three_body_ints_bi_ort + endif if(.not. double_normal_ord .and. three_e_5_idx_term) then PROVIDE three_e_5_idx_direct_bi_ort diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index 7cb23d77..a9e22e03 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -225,8 +225,8 @@ end external H_tc_dagger_u_0_opt external H_tc_s2_dagger_u_0_opt external H_tc_s2_u_0_opt - external H_tc_s2_dagger_u_0_with_pure_three - external H_tc_s2_u_0_with_pure_three + external H_tc_s2_dagger_u_0_with_pure_three_omp + external H_tc_s2_u_0_with_pure_three_omp allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag)) @@ -255,7 +255,7 @@ end if(.not.pure_three_body_h_tc)then call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) else - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_with_pure_three) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_with_pure_three_omp) endif i_it += 1 if(i_it .gt. 5) exit @@ -284,7 +284,7 @@ end if(.not.pure_three_body_h_tc)then call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_opt) else - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_with_pure_three) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_with_pure_three_omp) endif i_it += 1 if(i_it .gt. 5) exit diff --git a/src/tc_bi_ortho/test_s2_tc.irp.f b/src/tc_bi_ortho/test_s2_tc.irp.f index b398507a..7c70b119 100644 --- a/src/tc_bi_ortho/test_s2_tc.irp.f +++ b/src/tc_bi_ortho/test_s2_tc.irp.f @@ -14,12 +14,14 @@ program test_tc read_wf = .True. touch read_wf - call routine_test_s2 - call routine_test_s2_davidson + call provide_all_three_ints_bi_ortho() + call routine_h_triple_left + call routine_h_triple_right +! call routine_test_s2_davidson end -subroutine routine_test_s2 +subroutine routine_h_triple_right implicit none logical :: do_right integer :: sze ,i, N_st, j @@ -29,67 +31,65 @@ subroutine routine_test_s2 sze = N_det N_st = 1 allocate(v_0_ref(N_det,1),u_0(N_det,1),s_0_ref(N_det,1),s_0_new(N_det,1),v_0_new(N_det,1)) - print*,'Checking first the Left ' - do_right = .False. - do i = 1, sze - u_0(i,1) = psi_l_coef_bi_ortho(i,1) - enddo - call H_tc_u_0_nstates_openmp(v_0_ref,u_0,N_st,sze, do_right) - s_0_ref = 0.d0 - do i = 1, sze - do j = 1, sze - call get_s2(psi_det(1,1,i),psi_det(1,1,j),N_int,sij) - s_0_ref(i,1) += u_0(j,1) * sij - enddo - enddo - call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,u_0,N_st,sze, do_right) - accu_e = 0.d0 - accu_s = 0.d0 - accu_e_0 = 0.d0 - accu_s_0 = 0.d0 - do i = 1, sze - accu_e_0 += v_0_ref(i,1) * psi_r_coef_bi_ortho(i,1) - accu_s_0 += s_0_ref(i,1) * psi_r_coef_bi_ortho(i,1) - accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) - accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) - enddo - print*,'accu_e = ',accu_e - print*,'accu_s = ',accu_s - print*,'accu_e_0 = ',accu_e_0 - print*,'accu_s_0 = ',accu_s_0 - - print*,'Checking then the right ' - do_right = .True. + print*,'Checking first the Right ' do i = 1, sze u_0(i,1) = psi_r_coef_bi_ortho(i,1) enddo - call H_tc_u_0_nstates_openmp(v_0_ref,u_0,N_st,sze, do_right) - s_0_ref = 0.d0 - do i = 1, sze - do j = 1, sze - call get_s2(psi_det(1,1,i),psi_det(1,1,j),N_int,sij) - s_0_ref(i,1) += u_0(j,1) * sij - enddo - enddo - call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,u_0,N_st,sze, do_right) + double precision :: wall0,wall1 + call wall_time(wall0) + call H_tc_s2_u_0_with_pure_three_omp(v_0_ref,s_0_ref, u_0,N_st,sze) + call wall_time(wall1) + print*,'time for omp',wall1 - wall0 + call wall_time(wall0) + call H_tc_s2_u_0_with_pure_three(v_0_new, s_0_new, u_0, N_st, sze) + call wall_time(wall1) + print*,'time serial ',wall1 - wall0 accu_e = 0.d0 accu_s = 0.d0 - accu_e_0 = 0.d0 - accu_s_0 = 0.d0 do i = 1, sze - accu_e_0 += v_0_ref(i,1) * psi_l_coef_bi_ortho(i,1) - accu_s_0 += s_0_ref(i,1) * psi_l_coef_bi_ortho(i,1) accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) enddo print*,'accu_e = ',accu_e print*,'accu_s = ',accu_s - print*,'accu_e_0 = ',accu_e_0 - print*,'accu_s_0 = ',accu_s_0 - end +subroutine routine_h_triple_left + implicit none + logical :: do_right + integer :: sze ,i, N_st, j + double precision :: sij, accu_e, accu_s, accu_e_0, accu_s_0 + double precision, allocatable :: v_0_ref(:,:),u_0(:,:),s_0_ref(:,:) + double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) + sze = N_det + N_st = 1 + allocate(v_0_ref(N_det,1),u_0(N_det,1),s_0_ref(N_det,1),s_0_new(N_det,1),v_0_new(N_det,1)) + print*,'Checking the Left ' + do i = 1, sze + u_0(i,1) = psi_l_coef_bi_ortho(i,1) + enddo + double precision :: wall0,wall1 + call wall_time(wall0) + call H_tc_s2_dagger_u_0_with_pure_three_omp(v_0_ref,s_0_ref, u_0,N_st,sze) + call wall_time(wall1) + print*,'time for omp',wall1 - wall0 + call wall_time(wall0) + call H_tc_s2_dagger_u_0_with_pure_three(v_0_new, s_0_new, u_0, N_st, sze) + call wall_time(wall1) + print*,'time serial ',wall1 - wall0 + accu_e = 0.d0 + accu_s = 0.d0 + do i = 1, sze + accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) + accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) + enddo + print*,'accu_e = ',accu_e + print*,'accu_s = ',accu_s + +end + + subroutine routine_test_s2_davidson implicit none double precision, allocatable :: H_jj(:),vec_tmp(:,:), energies(:) , s2(:) From bb155c0dfd0b12dadf34b321f0aaa6eb25ac3055 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Fri, 15 Sep 2023 11:30:10 +0200 Subject: [PATCH 289/337] J_qmckl en & ee --- src/non_h_ints_mu/NEED | 1 + src/non_h_ints_mu/jast_deriv.irp.f | 95 ++++++++++++++++++++++++++ src/non_h_ints_mu/qmckl.irp.f | 102 ++++++++++++++++++++++++++++ src/non_h_ints_mu/tc_integ_an.irp.f | 14 ++-- src/qmckl/README.md | 4 ++ src/qmckl/qmckl.F90 | 1 + 6 files changed, 213 insertions(+), 4 deletions(-) create mode 100644 src/non_h_ints_mu/qmckl.irp.f create mode 100644 src/qmckl/README.md create mode 100644 src/qmckl/qmckl.F90 diff --git a/src/non_h_ints_mu/NEED b/src/non_h_ints_mu/NEED index d09ab4a5..ecde6390 100644 --- a/src/non_h_ints_mu/NEED +++ b/src/non_h_ints_mu/NEED @@ -1,2 +1,3 @@ +qmckl ao_tc_eff_map bi_ortho_mos diff --git a/src/non_h_ints_mu/jast_deriv.irp.f b/src/non_h_ints_mu/jast_deriv.irp.f index ee01886c..6b8445b1 100644 --- a/src/non_h_ints_mu/jast_deriv.irp.f +++ b/src/non_h_ints_mu/jast_deriv.irp.f @@ -24,11 +24,15 @@ double precision :: v1b_r1, v1b_r2, u2b_r12 double precision :: grad1_v1b(3), grad1_u2b(3) double precision :: dx, dy, dz + double precision :: time0, time1 double precision, external :: j12_mu, j1b_nucl PROVIDE j1b_type PROVIDE final_grid_points_extra + print*, ' providing grad1_u12_num & grad1_u12_squared_num ...' + call wall_time(time0) + grad1_u12_num = 0.d0 grad1_u12_squared_num = 0.d0 @@ -112,6 +116,94 @@ !$OMP END DO !$OMP END PARALLEL + elseif (j1b_type .eq. 1000) then + + double precision :: f + f = 1.d0 / dble(elec_num - 1) + + double precision, allocatable :: rij(:,:,:) + allocate( rij(3, 2, n_points_extra_final_grid) ) + + use qmckl + integer(qmckl_exit_code) :: rc + + integer*8 :: npoints + npoints = n_points_extra_final_grid + + double precision, allocatable :: gl(:,:,:) + allocate( gl(2,4,n_points_extra_final_grid) ) + + do ipoint = 1, n_points_final_grid ! r1 + + do jpoint = 1, n_points_extra_final_grid ! r2 + rij(1:3, 1, jpoint) = final_grid_points (1:3, ipoint) + rij(1:3, 2, jpoint) = final_grid_points_extra(1:3, jpoint) + enddo + + + rc = qmckl_set_electron_coord(qmckl_ctx_jastrow, 'N', npoints, rij, npoints*6_8) + if (rc /= QMCKL_SUCCESS) then + print *, irp_here, 'qmckl error in set_electron_coord' + stop -1 + endif + + + ! --- + ! e-e term + + rc = qmckl_get_jastrow_champ_factor_ee_gl(qmckl_ctx_jastrow, gl, 8_8*npoints) + if (rc /= QMCKL_SUCCESS) then + print *, irp_here, 'qmckl error in fact_ee_gl' + stop -1 + endif + + do jpoint = 1, n_points_extra_final_grid ! r2 + grad1_u12_num(jpoint,ipoint,1) = gl(1,1,jpoint) + grad1_u12_num(jpoint,ipoint,2) = gl(1,2,jpoint) + grad1_u12_num(jpoint,ipoint,3) = gl(1,3,jpoint) + enddo + + ! --- + ! e-e-n term + +! rc = qmckl_get_jastrow_champ_factor_een_gl(qmckl_ctx_jastrow, gl, 8_8*npoints) +! if (rc /= QMCKL_SUCCESS) then +! print *, irp_here, 'qmckl error in fact_een_gl' +! stop -1 +! endif +! +! do jpoint = 1, n_points_extra_final_grid ! r2 +! grad1_u12_num(jpoint,ipoint,1) = grad1_u12_num(jpoint,ipoint,1) + gl(1,1,jpoint) +! grad1_u12_num(jpoint,ipoint,2) = grad1_u12_num(jpoint,ipoint,2) + gl(1,2,jpoint) +! grad1_u12_num(jpoint,ipoint,3) = grad1_u12_num(jpoint,ipoint,3) + gl(1,3,jpoint) +! enddo + + ! --- + ! e-n term + + rc = qmckl_get_jastrow_champ_factor_en_gl(qmckl_ctx_jastrow, gl, 8_8*npoints) + if (rc /= QMCKL_SUCCESS) then + print *, irp_here, 'qmckl error in fact_en_gl' + stop -1 + endif + + do jpoint = 1, n_points_extra_final_grid ! r2 + grad1_u12_num(jpoint,ipoint,1) = grad1_u12_num(jpoint,ipoint,1) + f * gl(1,1,jpoint) + grad1_u12_num(jpoint,ipoint,2) = grad1_u12_num(jpoint,ipoint,2) + f * gl(1,2,jpoint) + grad1_u12_num(jpoint,ipoint,3) = grad1_u12_num(jpoint,ipoint,3) + f * gl(1,3,jpoint) + enddo + + do jpoint = 1, n_points_extra_final_grid ! r2 + dx = grad1_u12_num(jpoint,ipoint,1) + dy = grad1_u12_num(jpoint,ipoint,2) + dz = grad1_u12_num(jpoint,ipoint,3) + grad1_u12_squared_num(jpoint,ipoint) = dx*dx + dy*dy + dz*dz + enddo + + enddo + + deallocate(gl, rij) + else print *, ' j1b_type = ', j1b_type, 'not implemented yet' @@ -119,6 +211,9 @@ endif + call wall_time(time1) + print*, ' Wall time for grad1_u12_num & grad1_u12_squared_num (min) =', (time1-time0)/60.d0 + END_PROVIDER ! --- diff --git a/src/non_h_ints_mu/qmckl.irp.f b/src/non_h_ints_mu/qmckl.irp.f new file mode 100644 index 00000000..d83de4dc --- /dev/null +++ b/src/non_h_ints_mu/qmckl.irp.f @@ -0,0 +1,102 @@ +BEGIN_PROVIDER [ integer*8, qmckl_ctx_jastrow ] + use qmckl + implicit none + BEGIN_DOC + ! Context for the QMCKL library + END_DOC + integer(qmckl_exit_code) :: rc + + qmckl_ctx_jastrow = qmckl_context_create() + + rc = qmckl_set_nucleus_num(qmckl_ctx_jastrow, nucl_num*1_8) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + + rc = qmckl_set_nucleus_charge(qmckl_ctx_jastrow, nucl_charge, nucl_num*1_8) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + + rc = qmckl_set_nucleus_coord(qmckl_ctx_jastrow, 'T', nucl_coord, nucl_num*3_8) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + + rc = qmckl_set_electron_num(qmckl_ctx_jastrow, 1_8, 1_8) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + + + ! Jastrow parameters + rc = qmckl_set_jastrow_champ_type_nucl_num (qmckl_ctx_jastrow, 2_8) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + + rc = qmckl_set_jastrow_champ_type_nucl_vector (qmckl_ctx_jastrow, (/0_8,1_8,1_8/), 1_8*nucl_num) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + + rc = qmckl_set_jastrow_champ_rescale_factor_ee (qmckl_ctx_jastrow, 0.6d0) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + + rc = qmckl_set_jastrow_champ_rescale_factor_en (qmckl_ctx_jastrow, (/0.6d0, 0.6d0 /), 2_8 ) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + + rc = qmckl_set_jastrow_champ_aord_num (qmckl_ctx_jastrow, 5_8) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + + rc = qmckl_set_jastrow_champ_bord_num (qmckl_ctx_jastrow, 5_8) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + + rc = qmckl_set_jastrow_champ_cord_num (qmckl_ctx_jastrow, 0_8) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + +! double precision :: a_vector(12) = dble(& +! (/ 0.00000000, 0.00000000, -0.71168405, -0.44415699, -0.13865109, 0.07002267 , & +! 0.00000000, 0.00000000, -0.11379992, 0.04542846, 0.01696997, -0.01809299 /) ) + +! double precision :: b_vector(6) = dble(& +! (/ 0.00000000, 0.65603311, 0.14581988, 0.03138163, 0.00153156, -0.00447302 /) ) + +! double precision :: c_vector(46) = & +! (/ 1.06384279d0, -1.44303973d0, -0.92409833d0, 0.11845356d0, -0.02980776d0, & +! 1.07048863d0, 0.06009623d0, -0.01854872d0, -0.00915398d0, 0.01324198d0, & +! -0.00504959d0, -0.01202497d0, -0.00531644d0, 0.15101629d0, -0.00723831d0, & +! -0.00384182d0, -0.00295036d0, -0.00114583d0, 0.00158107d0, -0.00078107d0, & +! -0.00080000d0, -0.14140576d0, -0.00237271d0, -0.03006706d0, 0.01537009d0, & +! -0.02327226d0, 0.16502789d0, -0.01458259d0, -0.09946065d0, 0.00850029d0, & +! -0.02969361d0, -0.01159547d0, 0.00516313d0, 0.00405247d0, -0.02200886d0, & +! 0.03376709d0, 0.01277767d0, -0.01523013d0, -0.00739224d0, -0.00463953d0, & +! 0.00003174d0, -0.01421128d0, 0.00808140d0, 0.00612988d0, -0.00610632d0, & +! 0.01926215d0 /) + +! a_vector = 0.d0 +! b_vector = 0.d0 +! c_vector = 0.d0 + + double precision :: a_vector(12) = dble(& + (/ 0.00000000 , 0.00000000, -0.45105821, -0.23519218, -0.03825391, 0.10072866, & + 0.00000000 , 0.00000000, -0.06930592, -0.02909224, -0.00134650, 0.01477242 /) ) + + double precision :: b_vector(6) = dble(& + (/ 0.00000000, 0.00000000, 0.29217862, -0.00450671, -0.02925982, -0.01381532 /) ) + + double precision :: c_vector(46) + c_vector = 0.d0 + + rc = qmckl_set_jastrow_champ_a_vector(qmckl_ctx_jastrow, a_vector, 12_8) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + + rc = qmckl_set_jastrow_champ_b_vector(qmckl_ctx_jastrow, b_vector, 6_8) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + +! rc = qmckl_set_jastrow_champ_c_vector(qmckl_ctx_jastrow, c_vector, 46_8) +! rc = qmckl_check(qmckl_ctx_jastrow, rc) +! if (rc /= QMCKL_SUCCESS) stop -1 + +END_PROVIDER diff --git a/src/non_h_ints_mu/tc_integ_an.irp.f b/src/non_h_ints_mu/tc_integ_an.irp.f index ae7af987..a6459761 100644 --- a/src/non_h_ints_mu/tc_integ_an.irp.f +++ b/src/non_h_ints_mu/tc_integ_an.irp.f @@ -106,8 +106,11 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_f elseif(j1b_type .ge. 100) then - PROVIDE int2_grad1_u12_ao_num - int2_grad1_u12_ao = int2_grad1_u12_ao_num +! PROVIDE int2_grad1_u12_ao_num +! int2_grad1_u12_ao = int2_grad1_u12_ao_num + + PROVIDE int2_grad1_u12_ao_num_1shot + int2_grad1_u12_ao = int2_grad1_u12_ao_num_1shot else @@ -222,8 +225,11 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p elseif(j1b_type .ge. 100) then - PROVIDE int2_grad1_u12_square_ao_num - int2_grad1_u12_square_ao = int2_grad1_u12_square_ao_num + ! PROVIDE int2_grad1_u12_square_ao_num + ! int2_grad1_u12_square_ao = int2_grad1_u12_square_ao_num + + PROVIDE int2_grad1_u12_square_ao_num_1shot + int2_grad1_u12_square_ao = int2_grad1_u12_square_ao_num_1shot else diff --git a/src/qmckl/README.md b/src/qmckl/README.md new file mode 100644 index 00000000..ebc4b089 --- /dev/null +++ b/src/qmckl/README.md @@ -0,0 +1,4 @@ +#QMCkl + +Info related to the QMCkl library. + diff --git a/src/qmckl/qmckl.F90 b/src/qmckl/qmckl.F90 new file mode 100644 index 00000000..94ac962f --- /dev/null +++ b/src/qmckl/qmckl.F90 @@ -0,0 +1 @@ +#include From b26f7e7fe68ce696a28d074d7f37ea70d36f051d Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Fri, 15 Sep 2023 11:37:11 +0200 Subject: [PATCH 290/337] Fixed Conflict --- .github/workflows/configuration.yml | 2 +- RELEASE_NOTES.org | 5 +- config/gfortran_armpl.cfg | 2 +- config/ifort_2021_debug.cfg | 66 + etc/qp.rc | 13 +- external/ezfio | 2 +- external/irpf90 | 2 +- src/ao_two_e_ints/EZFIO.cfg | 6 + src/ao_two_e_ints/cholesky.irp.f | 516 ++++-- src/ao_two_e_ints/two_e_integrals.irp.f | 511 +++++- src/casscf_cipsi/50.casscf.bats | 49 + src/casscf_cipsi/EZFIO.cfg | 75 + src/casscf_cipsi/NEED | 5 + src/casscf_cipsi/README.rst | 5 + src/casscf_cipsi/bavard.irp.f | 6 + src/casscf_cipsi/bielec.irp.f | 155 ++ src/casscf_cipsi/bielec_natorb.irp.f | 369 +++++ src/casscf_cipsi/casscf.irp.f | 110 ++ src/casscf_cipsi/class.irp.f | 12 + src/casscf_cipsi/dav_sx_mat.irp.f | 45 + src/casscf_cipsi/densities.irp.f | 67 + src/casscf_cipsi/densities_peter.irp.f | 150 ++ src/casscf_cipsi/det_manip.irp.f | 125 ++ src/casscf_cipsi/driver_optorb.irp.f | 3 + src/casscf_cipsi/get_energy.irp.f | 51 + src/casscf_cipsi/grad_old.irp.f | 74 + src/casscf_cipsi/gradient.irp.f | 215 +++ src/casscf_cipsi/hessian.irp.f | 539 ++++++ src/casscf_cipsi/hessian_old.irp.f | 310 ++++ src/casscf_cipsi/mcscf_fock.irp.f | 80 + src/casscf_cipsi/natorb.irp.f | 231 +++ src/casscf_cipsi/neworbs.irp.f | 253 +++ src/casscf_cipsi/reorder_orb.irp.f | 70 + src/casscf_cipsi/save_energy.irp.f | 9 + src/casscf_cipsi/superci_dm.irp.f | 207 +++ src/casscf_cipsi/swap_orb.irp.f | 132 ++ src/casscf_cipsi/tot_en.irp.f | 101 ++ src/casscf_tc_bi/det_manip.irp.f | 125 ++ src/casscf_tc_bi/grad_dm.irp.f | 263 +++ src/casscf_tc_bi/grad_old.irp.f | 134 ++ src/casscf_tc_bi/gradient.irp.f | 94 ++ src/casscf_tc_bi/test_tc_casscf.irp.f | 252 +++ src/ccsd/ccsd_space_orb_sub.irp.f | 232 ++- src/ccsd/ccsd_space_orb_sub_chol.irp.f | 1458 +++++++++++++++++ src/ccsd/ccsd_spin_orb_sub.irp.f | 4 +- src/ccsd/ccsd_t_space_orb_abc.irp.f | 2 +- src/ccsd/ccsd_t_space_orb_stoch.irp.f | 21 +- src/cipsi/pt2_stoch_routines.irp.f | 2 +- src/cipsi_tc_bi_ortho/get_d0_good.irp.f | 8 +- src/cipsi_tc_bi_ortho/get_d1_good.irp.f | 36 +- src/cipsi_tc_bi_ortho/get_d2_good.irp.f | 28 +- src/cipsi_tc_bi_ortho/selection.irp.f | 35 +- src/cisd/EZFIO.cfg | 8 + src/cisd/NEED | 1 + src/cisd/lccsd.irp.f | 95 ++ src/cisd/lccsd_prov.irp.f | 50 + .../dav_diag_dressed_ext_rout.irp.f | 2 +- .../dav_double_dress_ext_rout.irp.f | 2 +- .../dav_dressed_ext_rout.irp.f | 2 +- src/dav_general_mat/dav_ext_rout.irp.f | 2 +- src/dav_general_mat/dav_general.irp.f | 2 +- src/davidson/diagonalization_h_dressed.irp.f | 2 +- .../diagonalization_hcsf_dressed.irp.f | 2 +- .../diagonalization_hs2_dressed.irp.f | 2 +- .../diagonalization_nonsym_h_dressed.irp.f | 2 +- src/davidson_keywords/usef.irp.f | 2 + src/determinants/density_matrix.irp.f | 4 +- src/determinants/dipole_moments.irp.f | 6 +- src/determinants/h_apply.irp.f | 2 +- src/determinants/s2.irp.f | 2 +- src/ezfio_files/NEED | 1 + src/ezfio_files/ezfio.irp.f | 9 +- src/fci_tc_bi/selectors.irp.f | 2 + src/hartree_fock/fock_matrix_hf.irp.f | 90 +- src/mo_optimization/EZFIO.cfg | 4 +- ...optimization.irp.f => cipsi_orb_opt.irp.f} | 2 +- src/mo_optimization/first_gradient_opt.irp.f | 2 +- .../state_average_energy.irp.f | 17 +- src/mo_two_e_ints/cholesky.irp.f | 60 +- src/mo_two_e_ints/integrals_3_index.irp.f | 14 +- src/mo_two_e_ints/mo_bi_integrals.irp.f | 113 +- src/tc_bi_ortho/h_mat_triple.irp.f | 391 +++++ src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f | 93 ++ src/tc_bi_ortho/normal_ordered_old.irp.f | 10 +- src/tc_bi_ortho/print_tc_dump.irp.f | 10 +- src/tc_bi_ortho/slater_tc_3e_slow.irp.f | 2 +- src/tc_bi_ortho/slater_tc_opt.irp.f | 31 +- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 19 +- src/tc_bi_ortho/tc_natorb.irp.f | 17 +- src/tc_bi_ortho/tc_prop.irp.f | 8 +- src/tc_bi_ortho/test_normal_order.irp.f | 43 +- src/tc_bi_ortho/test_s2_tc.irp.f | 100 +- src/tc_bi_ortho/test_tc_fock.irp.f | 9 +- src/tc_bi_ortho/test_tc_two_rdm.irp.f | 67 + src/tc_bi_ortho/two_rdm_naive.irp.f | 166 ++ src/tc_keywords/EZFIO.cfg | 6 + src/tc_scf/molden_lr_mos.irp.f | 14 +- src/tc_scf/routines_rotates.irp.f | 16 +- src/tools/molden.irp.f | 4 +- src/tools/print_ci_vectors.irp.f | 2 +- src/two_body_rdm/state_av_act_2rdm.irp.f | 10 +- .../state_av_full_orb_2_rdm.irp.f | 320 ++-- src/two_body_rdm/two_e_dm_mo.irp.f | 5 +- src/utils/block_diag_degen_core.irp.f | 244 +++ src/utils/c_functions.f90 | 7 +- src/utils/fast_mkl.c | 5 + src/utils/format_w_error.irp.f | 2 +- src/utils/fortran_mmap.c | 15 +- src/utils/linear_algebra.irp.f | 12 +- src/utils/map_functions.irp.f | 4 + src/utils/memory.irp.f | 49 +- src/utils/mmap.f90 | 22 +- src/utils_cc/mo_integrals_cc.irp.f | 427 ++++- .../rotation_matrix_iterative.irp.f | 4 +- .../trust_region_optimal_lambda.irp.f | 6 +- 115 files changed, 9231 insertions(+), 678 deletions(-) create mode 100644 config/ifort_2021_debug.cfg create mode 100644 src/casscf_cipsi/50.casscf.bats create mode 100644 src/casscf_cipsi/EZFIO.cfg create mode 100644 src/casscf_cipsi/NEED create mode 100644 src/casscf_cipsi/README.rst create mode 100644 src/casscf_cipsi/bavard.irp.f create mode 100644 src/casscf_cipsi/bielec.irp.f create mode 100644 src/casscf_cipsi/bielec_natorb.irp.f create mode 100644 src/casscf_cipsi/casscf.irp.f create mode 100644 src/casscf_cipsi/class.irp.f create mode 100644 src/casscf_cipsi/dav_sx_mat.irp.f create mode 100644 src/casscf_cipsi/densities.irp.f create mode 100644 src/casscf_cipsi/densities_peter.irp.f create mode 100644 src/casscf_cipsi/det_manip.irp.f create mode 100644 src/casscf_cipsi/driver_optorb.irp.f create mode 100644 src/casscf_cipsi/get_energy.irp.f create mode 100644 src/casscf_cipsi/grad_old.irp.f create mode 100644 src/casscf_cipsi/gradient.irp.f create mode 100644 src/casscf_cipsi/hessian.irp.f create mode 100644 src/casscf_cipsi/hessian_old.irp.f create mode 100644 src/casscf_cipsi/mcscf_fock.irp.f create mode 100644 src/casscf_cipsi/natorb.irp.f create mode 100644 src/casscf_cipsi/neworbs.irp.f create mode 100644 src/casscf_cipsi/reorder_orb.irp.f create mode 100644 src/casscf_cipsi/save_energy.irp.f create mode 100644 src/casscf_cipsi/superci_dm.irp.f create mode 100644 src/casscf_cipsi/swap_orb.irp.f create mode 100644 src/casscf_cipsi/tot_en.irp.f create mode 100644 src/casscf_tc_bi/det_manip.irp.f create mode 100644 src/casscf_tc_bi/grad_dm.irp.f create mode 100644 src/casscf_tc_bi/grad_old.irp.f create mode 100644 src/casscf_tc_bi/gradient.irp.f create mode 100644 src/casscf_tc_bi/test_tc_casscf.irp.f create mode 100644 src/ccsd/ccsd_space_orb_sub_chol.irp.f create mode 100644 src/cisd/lccsd.irp.f create mode 100644 src/cisd/lccsd_prov.irp.f rename src/mo_optimization/{optimization.irp.f => cipsi_orb_opt.irp.f} (96%) create mode 100644 src/tc_bi_ortho/h_mat_triple.irp.f create mode 100644 src/tc_bi_ortho/test_tc_two_rdm.irp.f create mode 100644 src/tc_bi_ortho/two_rdm_naive.irp.f create mode 100644 src/utils/block_diag_degen_core.irp.f create mode 100644 src/utils/fast_mkl.c diff --git a/.github/workflows/configuration.yml b/.github/workflows/configuration.yml index ba37f5dd..178b394e 100644 --- a/.github/workflows/configuration.yml +++ b/.github/workflows/configuration.yml @@ -22,7 +22,7 @@ jobs: - uses: actions/checkout@v3 - name: Install dependencies run: | - sudo apt install gfortran gcc liblapack-dev libblas-dev wget python3 make m4 pkg-config hdf5 + sudo apt install gfortran gcc liblapack-dev libblas-dev wget python3 make m4 pkg-config libhdf5-dev - name: zlib run: | ./configure -i zlib || echo OK diff --git a/RELEASE_NOTES.org b/RELEASE_NOTES.org index 3bd02898..a0e6d104 100644 --- a/RELEASE_NOTES.org +++ b/RELEASE_NOTES.org @@ -10,7 +10,8 @@ - Added many types of integrals - Accelerated four-index transformation - Added transcorrelated SCF - - Added transcorrelated CIPSI + - Added bi-orthonormal transcorrelated CIPSI + - Added Cholesky decomposition of AO integrals - Added CCSD and CCSD(T) - Added MO localization - Changed coupling parameters for ROHF @@ -20,7 +21,7 @@ - Removed cryptokit dependency in OCaml - Using now standard convention in RDM - Added molecular properties - - [ ] Added GTOs with complex exponent + - Added GTOs with complex exponent *** TODO: take from dev - Updated version of f77-zmq diff --git a/config/gfortran_armpl.cfg b/config/gfortran_armpl.cfg index fb5ee1cc..370e396e 100644 --- a/config/gfortran_armpl.cfg +++ b/config/gfortran_armpl.cfg @@ -14,7 +14,7 @@ # [COMMON] FC : gfortran -g -ffree-line-length-none -I . -fPIC -march=native -LAPACK_LIB : -larmpl_lp64 +LAPACK_LIB : -larmpl_lp64_mp IRPF90 : irpf90 IRPF90_FLAGS : --ninja --align=32 --assert -DSET_NESTED diff --git a/config/ifort_2021_debug.cfg b/config/ifort_2021_debug.cfg new file mode 100644 index 00000000..d70b1465 --- /dev/null +++ b/config/ifort_2021_debug.cfg @@ -0,0 +1,66 @@ +# Common flags +############## +# +# -mkl=[parallel|sequential] : Use the MKL library +# --ninja : Allow the utilisation of ninja. It is mandatory ! +# --align=32 : Align all provided arrays on a 32-byte boundary +# +[COMMON] +FC : ifort -fpic +LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps +IRPF90 : irpf90 +IRPF90_FLAGS : --ninja --align=32 --assert -DINTEL + +# Global options +################ +# +# 1 : Activate +# 0 : Deactivate +# +[OPTION] +MODE : DEBUG ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +CACHE : 0 ; Enable cache_compile.py +OPENMP : 1 ; Append OpenMP flags + +# Optimization flags +#################### +# +# -xHost : Compile a binary optimized for the current architecture +# -O2 : O3 not better than O2. +# -ip : Inter-procedural optimizations +# -ftz : Flushes denormal results to zero +# +[OPT] +FC : -traceback +FCFLAGS : -msse4.2 -O2 -ip -ftz -g + + +# Profiling flags +################# +# +[PROFILE] +FC : -p -g +FCFLAGS : -msse4.2 -O2 -ip -ftz + + +# Debugging flags +################# +# +# -traceback : Activate backtrace on runtime +# -fpe0 : All floating point exaceptions +# -C : Checks uninitialized variables, array subscripts, etc... +# -g : Extra debugging information +# -msse4.2 : Valgrind needs a very simple x86 executable +# +[DEBUG] +FC : -g -traceback +FCFLAGS : -msse4.2 -check all -debug all -fpe-all=0 -implicitnone + + +# OpenMP flags +################# +# +[OPENMP] +FC : -qopenmp +IRPF90_FLAGS : --openmp + diff --git a/etc/qp.rc b/etc/qp.rc index 9eec4570..d316faf5 100644 --- a/etc/qp.rc +++ b/etc/qp.rc @@ -188,7 +188,18 @@ _qp_Complete() ;; esac;; set_file) - COMPREPLY=( $(compgen -W "$(for i in */ $(find . -name ezfio | sed 's/ezfio$/.version/') ; do [[ -f $i ]] && echo ${i%/.version} ; done)" -- ${cur} ) ) + # Array to store directory names + dirs="" + + # Find directories containing "ezfio/.version" file recursively + for i in $(find . -name ezfio | sed 's/ezfio$/.version/') + do + dir_name=${i%/.version} # Remove the ".version" suffix + dir_name=${dir_name#./} # Remove the leading "./" + dirs+="./$dir_name " + done + + COMPREPLY=( $(compgen -W "$dirs" -- ${cur} ) ) return 0 ;; plugins) diff --git a/external/ezfio b/external/ezfio index 0520b5e2..ed1df9f3 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit 0520b5e2cf70e2451c37ce5b7f2f64f6d2e5e956 +Subproject commit ed1df9f3c1f51752656ca98da5693a4119add05c diff --git a/external/irpf90 b/external/irpf90 index 0007f72f..33ca5e10 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit 0007f72f677fe7d61c5e1ed461882cb239517102 +Subproject commit 33ca5e1018f3bbb5e695e6ee558f5dac0753b271 diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg index 9f523fca..9c017813 100644 --- a/src/ao_two_e_ints/EZFIO.cfg +++ b/src/ao_two_e_ints/EZFIO.cfg @@ -4,6 +4,12 @@ doc: Read/Write |AO| integrals from/to disk [ Write | Read | None ] interface: ezfio,provider,ocaml default: None +[io_ao_cholesky] +type: Disk_access +doc: Read/Write |AO| integrals from/to disk [ Write | Read | None ] +interface: ezfio,provider,ocaml +default: None + [ao_integrals_threshold] type: Threshold doc: If | (pq|rs) | < `ao_integrals_threshold` then (pq|rs) is zero diff --git a/src/ao_two_e_ints/cholesky.irp.f b/src/ao_two_e_ints/cholesky.irp.f index 77eb6ddc..2977f0f4 100644 --- a/src/ao_two_e_ints/cholesky.irp.f +++ b/src/ao_two_e_ints/cholesky.irp.f @@ -1,121 +1,3 @@ -BEGIN_PROVIDER [ integer, cholesky_ao_num_guess ] - implicit none - BEGIN_DOC - ! Number of Cholesky vectors in AO basis - END_DOC - - cholesky_ao_num_guess = ao_num*ao_num -END_PROVIDER - - BEGIN_PROVIDER [ integer, cholesky_ao_num ] -&BEGIN_PROVIDER [ double precision, cholesky_ao, (ao_num, ao_num, cholesky_ao_num_guess) ] - use mmap_module - implicit none - BEGIN_DOC - ! Cholesky vectors in AO basis: (ik|a): - ! = (ik|jl) = sum_a (ik|a).(a|jl) - END_DOC - - type(c_ptr) :: ptr - integer :: fd, i,j,k,l,m,rank - double precision, pointer :: ao_integrals(:,:,:,:) - double precision, external :: ao_two_e_integral - - ! Store AO integrals in a memory mapped file - call mmap(trim(ezfio_work_dir)//'ao_integrals', & - (/ int(ao_num,8), int(ao_num,8), int(ao_num,8), int(ao_num,8) /), & - 8, fd, .False., ptr) - call c_f_pointer(ptr, ao_integrals, (/ao_num, ao_num, ao_num, ao_num/)) - - print*, 'Providing the AO integrals (Cholesky)' - call wall_time(wall_1) - call cpu_time(cpu_1) - - ao_integrals = 0.d0 - - double precision :: integral, cpu_1, cpu_2, wall_1, wall_2 - logical, external :: ao_two_e_integral_zero - double precision, external :: get_ao_two_e_integral - - if (read_ao_two_e_integrals) then - PROVIDE ao_two_e_integrals_in_map - - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral, wall_2) - do m=0,9 - do l=1+m,ao_num,10 - !$OMP DO SCHEDULE(dynamic) - do j=1,ao_num - do k=1,ao_num - do i=1,ao_num - if (ao_two_e_integral_zero(i,j,k,l)) cycle - integral = get_ao_two_e_integral(i,j,k,l, ao_integrals_map) - ao_integrals(i,k,j,l) = integral - enddo - enddo - enddo - !$OMP END DO NOWAIT - enddo - !$OMP MASTER - call wall_time(wall_2) - print '(I10,'' % in'', 4X, F10.2, '' s.'')', (m+1) * 10, wall_2-wall_1 - !$OMP END MASTER - enddo - !$OMP END PARALLEL - - else - - !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l, integral, wall_2) - do m=0,9 - do l=1+m,ao_num,10 - !$OMP DO SCHEDULE(dynamic) - do j=1,l - do k=1,ao_num - do i=1,min(k,j) - if (ao_two_e_integral_zero(i,j,k,l)) cycle - integral = ao_two_e_integral(i,k,j,l) - ao_integrals(i,k,j,l) = integral - ao_integrals(k,i,j,l) = integral - ao_integrals(i,k,l,j) = integral - ao_integrals(k,i,l,j) = integral - ao_integrals(j,l,i,k) = integral - ao_integrals(j,l,k,i) = integral - ao_integrals(l,j,i,k) = integral - ao_integrals(l,j,k,i) = integral - enddo - enddo - enddo - !$OMP END DO NOWAIT - enddo - !$OMP MASTER - call wall_time(wall_2) - print '(I10,'' % in'', 4X, F10.2, '' s.'')', (m+1) * 10, wall_2-wall_1 - !$OMP END MASTER - enddo - !$OMP END PARALLEL - - call wall_time(wall_2) - call cpu_time(cpu_2) - print*, 'AO integrals provided:' - print*, ' cpu time :',cpu_2 - cpu_1, 's' - print*, ' wall time :',wall_2 - wall_1, 's ( x ', (cpu_2-cpu_1)/(wall_2-wall_1+tiny(1.d0)), ' )' - - endif - - ! Call Lapack - cholesky_ao_num = cholesky_ao_num_guess - call pivoted_cholesky(ao_integrals, cholesky_ao_num, ao_cholesky_threshold, ao_num*ao_num, cholesky_ao) - print *, 'Rank: ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' - - ! Remove mmap - double precision, external :: getUnitAndOpen - call munmap( & - (/ int(ao_num,8), int(ao_num,8), int(ao_num,8), int(ao_num,8) /), & - 8, fd, ptr) - open(unit=99,file=trim(ezfio_work_dir)//'ao_integrals') - close(99, status='delete') - -END_PROVIDER - BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, ao_num) ] implicit none BEGIN_DOC @@ -131,3 +13,401 @@ BEGIN_PROVIDER [ double precision, cholesky_ao_transp, (cholesky_ao_num, ao_num, enddo END_PROVIDER + + BEGIN_PROVIDER [ integer, cholesky_ao_num ] +&BEGIN_PROVIDER [ double precision, cholesky_ao, (ao_num, ao_num, 1) ] + implicit none + BEGIN_DOC + ! Cholesky vectors in AO basis: (ik|a): + ! = (ik|jl) = sum_a (ik|a).(a|jl) + ! + ! Last dimension of cholesky_ao is cholesky_ao_num + END_DOC + + integer :: rank, ndim + double precision :: tau + double precision, pointer :: L(:,:), L_old(:,:) + + + double precision :: s + double precision, parameter :: dscale = 1.d0 + + double precision, allocatable :: D(:), Delta(:,:), Ltmp_p(:,:), Ltmp_q(:,:) + integer, allocatable :: Lset(:), Dset(:), addr(:,:) + logical, allocatable :: computed(:) + + integer :: i,j,k,m,p,q, qj, dj, p2, q2 + integer :: N, np, nq + + double precision :: Dmax, Dmin, Qmax, f + double precision, external :: get_ao_two_e_integral + logical, external :: ao_two_e_integral_zero + + double precision, external :: ao_two_e_integral + integer :: block_size, iblock, ierr + + double precision :: mem + double precision, external :: memory_of_double, memory_of_int + + integer, external :: getUnitAndOpen + integer :: iunit + + ndim = ao_num*ao_num + deallocate(cholesky_ao) + + if (read_ao_cholesky) then + print *, 'Reading Cholesky vectors from disk...' + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao', 'R') + read(iunit) rank + allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) + read(iunit) cholesky_ao + close(iunit) + cholesky_ao_num = rank + + else + + PROVIDE nucl_coord + + if (do_direct_integrals) then + if (ao_two_e_integral(1,1,1,1) < huge(1.d0)) then + ! Trigger providers inside ao_two_e_integral + continue + endif + else + PROVIDE ao_two_e_integrals_in_map + endif + + tau = ao_cholesky_threshold + + mem = 6.d0 * memory_of_double(ndim) + 6.d0 * memory_of_int(ndim) + call check_mem(mem, irp_here) + + call print_memory_usage() + + allocate(L(ndim,1)) + + print *, '' + print *, 'Cholesky decomposition of AO integrals' + print *, '======================================' + print *, '' + print *, '============ =============' + print *, ' Rank Threshold' + print *, '============ =============' + + + rank = 0 + + allocate( D(ndim), Lset(ndim), Dset(ndim) ) + allocate( addr(3,ndim) ) + + ! 1. + k=0 + do j=1,ao_num + do i=1,ao_num + k = k+1 + addr(1,k) = i + addr(2,k) = j + addr(3,k) = (i-1)*ao_num + j + enddo + enddo + + if (do_direct_integrals) then + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(guided) + do i=1,ndim + D(i) = ao_two_e_integral(addr(1,i), addr(2,i), & + addr(1,i), addr(2,i)) + enddo + !$OMP END PARALLEL DO + else + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) SCHEDULE(guided) + do i=1,ndim + D(i) = get_ao_two_e_integral(addr(1,i), addr(1,i), & + addr(2,i), addr(2,i), & + ao_integrals_map) + enddo + !$OMP END PARALLEL DO + endif + + Dmax = maxval(D) + + ! 2. + np=0 + do p=1,ndim + if ( dscale*dscale*Dmax*D(p) > tau*tau ) then + np = np+1 + Lset(np) = p + endif + enddo + + ! 3. + N = 0 + + ! 4. + i = 0 + + ! 5. + do while ( (Dmax > tau).and.(rank < ndim) ) + ! a. + i = i+1 + + s = 0.01d0 + + ! Inrease s until the arrays fit in memory + do while (.True.) + + ! b. + Dmin = max(s*Dmax,tau) + + ! c. + nq=0 + do p=1,np + if ( D(Lset(p)) > Dmin ) then + nq = nq+1 + Dset(nq) = Lset(p) + endif + enddo + + call total_memory(mem) + mem = mem & + + np*memory_of_double(nq) &! Delta(np,nq) + + (rank+nq)* memory_of_double(ndim) &! L(ndim,rank+nq) + + (np+nq)*memory_of_double(block_size) ! Ltmp_p(np,block_size) + Ltmp_q(nq,block_size) + + if (mem > qp_max_mem) then + s = s*2.d0 + else + exit + endif + + if ((s > 1.d0).or.(nq == 0)) then + call print_memory_usage() + print *, 'Not enough memory. Reduce cholesky threshold' + stop -1 + endif + + enddo + + ! d., e. + block_size = max(N,24) + + L_old => L + allocate(L(ndim,rank+nq), stat=ierr) + if (ierr /= 0) then + call print_memory_usage() + print *, irp_here, ': allocation failed : (L(ndim,rank+nq))' + stop -1 + endif + + !$OMP PARALLEL DO PRIVATE(k,j) + do k=1,rank + do j=1,ndim + L(j,k) = L_old(j,k) + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(L_old) + + allocate(Delta(np,nq), stat=ierr) + if (ierr /= 0) then + call print_memory_usage() + print *, irp_here, ': allocation failed : (Delta(np,nq))' + stop -1 + endif + + allocate(Ltmp_p(np,block_size), stat=ierr) + if (ierr /= 0) then + call print_memory_usage() + print *, irp_here, ': allocation failed : (Ltmp_p(np,block_size))' + stop -1 + endif + + allocate(Ltmp_q(nq,block_size), stat=ierr) + if (ierr /= 0) then + call print_memory_usage() + print *, irp_here, ': allocation failed : (Ltmp_q(nq,block_size))' + stop -1 + endif + + + allocate(computed(nq)) + + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(m,k,p,q,j) + + !$OMP DO + do q=1,nq + do j=1,np + Delta(j,q) = 0.d0 + enddo + computed(q) = .False. + enddo + !$OMP ENDDO NOWAIT + + !$OMP DO + do k=1,N + do p=1,np + Ltmp_p(p,k) = L(Lset(p),k) + enddo + do q=1,nq + Ltmp_q(q,k) = L(Dset(q),k) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP BARRIER + !$OMP END PARALLEL + + if (N>0) then + call dgemm('N','T', np, nq, N, -1.d0, & + Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + endif + + ! f. + Qmax = D(Dset(1)) + do q=1,nq + Qmax = max(Qmax, D(Dset(q))) + enddo + + ! g. + + iblock = 0 + do j=1,nq + + if ( (Qmax <= Dmin).or.(N+j > ndim) ) exit + ! i. + rank = N+j + + if (iblock == block_size) then + call dgemm('N','T',np,nq,block_size,-1.d0, & + Ltmp_p, np, Ltmp_q, nq, 1.d0, Delta, np) + iblock = 0 + endif + + ! ii. + do dj=1,nq + qj = Dset(dj) + if (D(qj) == Qmax) then + exit + endif + enddo + + L(1:ndim, rank) = 0.d0 + + if (.not.computed(dj)) then + m = dj + !$OMP PARALLEL DO PRIVATE(k) SCHEDULE(guided) + do k=np,1,-1 + if (.not.ao_two_e_integral_zero( addr(1,Lset(k)), addr(1,Dset(m)),& + addr(2,Lset(k)), addr(2,Dset(m)) ) ) then + if (do_direct_integrals) then + Delta(k,m) = Delta(k,m) + & + ao_two_e_integral(addr(1,Lset(k)), addr(2,Lset(k)),& + addr(1,Dset(m)), addr(2,Dset(m))) + else + Delta(k,m) = Delta(k,m) + & + get_ao_two_e_integral( addr(1,Lset(k)), addr(1,Dset(m)),& + addr(2,Lset(k)), addr(2,Dset(m)), ao_integrals_map) + endif + endif + enddo + !$OMP END PARALLEL DO + computed(dj) = .True. + endif + + iblock = iblock+1 + do p=1,np + Ltmp_p(p,iblock) = Delta(p,dj) + enddo + + ! iv. + if (iblock > 1) then + call dgemv('N', np, iblock-1, -1.d0, Ltmp_p, np, Ltmp_q(dj,1), nq, 1.d0,& + Ltmp_p(1,iblock), 1) + endif + + ! iii. + f = 1.d0/dsqrt(Qmax) + + !$OMP PARALLEL PRIVATE(m,p,q,k) DEFAULT(shared) + !$OMP DO + do p=1,np + Ltmp_p(p,iblock) = Ltmp_p(p,iblock) * f + L(Lset(p), rank) = Ltmp_p(p,iblock) + D(Lset(p)) = D(Lset(p)) - Ltmp_p(p,iblock) * Ltmp_p(p,iblock) + enddo + !$OMP END DO + + !$OMP DO + do q=1,nq + Ltmp_q(q,iblock) = L(Dset(q), rank) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + Qmax = D(Dset(1)) + do q=1,nq + Qmax = max(Qmax, D(Dset(q))) + enddo + + enddo + + print '(I10, 4X, ES12.3)', rank, Qmax + + deallocate(computed) + deallocate(Delta) + deallocate(Ltmp_p) + deallocate(Ltmp_q) + + ! i. + N = rank + + ! j. + Dmax = D(Lset(1)) + do p=1,np + Dmax = max(Dmax, D(Lset(p))) + enddo + + np=0 + do p=1,ndim + if ( dscale*dscale*Dmax*D(p) > tau*tau ) then + np = np+1 + Lset(np) = p + endif + enddo + + enddo + + allocate(cholesky_ao(ao_num,ao_num,rank), stat=ierr) + if (ierr /= 0) then + call print_memory_usage() + print *, irp_here, ': Allocation failed' + stop -1 + endif + !$OMP PARALLEL DO PRIVATE(k) + do k=1,rank + call dcopy(ndim, L(1,k), 1, cholesky_ao(1,1,k), 1) + enddo + !$OMP END PARALLEL DO + deallocate(L) + cholesky_ao_num = rank + + print *, '============ =============' + print *, '' + + if (write_ao_cholesky) then + print *, 'Writing Cholesky vectors to disk...' + iunit = getUnitAndOpen(trim(ezfio_work_dir)//'cholesky_ao', 'W') + write(iunit) rank + write(iunit) cholesky_ao + close(iunit) + call ezfio_set_ao_two_e_ints_io_ao_cholesky('Read') + endif + + endif + + print *, 'Rank : ', cholesky_ao_num, '(', 100.d0*dble(cholesky_ao_num)/dble(ao_num*ao_num), ' %)' + print *, '' + +END_PROVIDER + diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f index 85ff5bcf..148ebb62 100644 --- a/src/ao_two_e_ints/two_e_integrals.irp.f +++ b/src/ao_two_e_ints/two_e_integrals.irp.f @@ -460,7 +460,7 @@ BEGIN_PROVIDER [ double precision, ao_two_e_integral_schwartz, (ao_num, ao_num) !$OMP PARALLEL DO PRIVATE(i,k) & !$OMP DEFAULT(NONE) & !$OMP SHARED (ao_num,ao_two_e_integral_schwartz) & - !$OMP SCHEDULE(dynamic) + !$OMP SCHEDULE(guided) do i=1,ao_num do k=1,i ao_two_e_integral_schwartz(i,k) = dsqrt(ao_two_e_integral(i,i,k,k)) @@ -951,7 +951,7 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt double precision :: X(0:max_dim) double precision :: Y(0:max_dim) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y - integer :: nx, ix,iy,ny + integer :: nx, ix,iy,ny,ib ASSERT (a>2) !DIR$ LOOP COUNT(8) @@ -974,8 +974,43 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt enddo ! !DIR$ FORCEINLINE -! call multiply_poly(X,nx,B_10,2,d,nd) - call multiply_poly_c2(X,nx,B_10,d,nd) +! call multiply_poly_c2_inline_2e(X,nx,B_10,d,nd) + if (nx >= 0) then + select case (nx) + case (0) + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(1) * X(0) + d(2) = d(2) + B_10(2) * X(0) + + case (1) + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0) + d(2) = d(2) + B_10(1) * X(1) + B_10(2) * X(0) + d(3) = d(3) + B_10(2) * X(1) + + case (2) + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0) + d(2) = d(2) + B_10(0) * X(2) + B_10(1) * X(1) + B_10(2) * X(0) + d(3) = d(3) + B_10(1) * X(2) + B_10(2) * X(1) + d(4) = d(4) + B_10(2) * X(2) + + case default + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0) + do ib=2,nx + d(ib) = d(ib) + B_10(0) * X(ib) + B_10(1) * X(ib-1) + B_10(2) * X(ib-2) + enddo + d(nx+1) = d(nx+1) + B_10(1) * X(nx) + B_10(2) * X(nx-1) + d(nx+2) = d(nx+2) + B_10(2) * X(nx) + + end select + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif nx = nd !DIR$ LOOP COUNT(8) @@ -996,9 +1031,47 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt X(ix) *= c enddo endif + ! !DIR$ FORCEINLINE -! call multiply_poly(X,nx,B_00,2,d,nd) - call multiply_poly_c2(X,nx,B_00,d,nd) +! call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd) + if(nx >= 0) then + + select case (nx) + case (0) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(2) * X(0) + + case (1) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(1) * X(1) + B_00(2) * X(0) + d(3) = d(3) + B_00(2) * X(1) + + case (2) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(0) * X(2) + B_00(1) * X(1) + B_00(2) * X(0) + d(3) = d(3) + B_00(1) * X(2) + B_00(2) * X(1) + d(4) = d(4) + B_00(2) * X(2) + + case default + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + do ib=2,nx + d(ib) = d(ib) + B_00(0) * X(ib) + B_00(1) * X(ib-1) + B_00(2) * X(ib-2) + enddo + d(nx+1) = d(nx+1) + B_00(1) * X(nx) + B_00(2) * X(nx-1) + d(nx+2) = d(nx+2) + B_00(2) * X(nx) + + end select + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif + endif ny=0 @@ -1016,8 +1089,45 @@ recursive subroutine I_x1_pol_mult_recurs(a,c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt endif ! !DIR$ FORCEINLINE -! call multiply_poly(Y,ny,C_00,2,d,nd) - call multiply_poly_c2(Y,ny,C_00,d,nd) +! call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd) + if(ny >= 0) then + + select case (ny) + case (0) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(2) * Y(0) + + case (1) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(1) * Y(1) + C_00(2) * Y(0) + d(3) = d(3) + C_00(2) * Y(1) + + case (2) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(0) * Y(2) + C_00(1) * Y(1) + C_00(2) * Y(0) + d(3) = d(3) + C_00(1) * Y(2) + C_00(2) * Y(1) + d(4) = d(4) + C_00(2) * Y(2) + + case default + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + do ib=2,ny + d(ib) = d(ib) + C_00(0) * Y(ib) + C_00(1) * Y(ib-1) + C_00(2) * Y(ib-2) + enddo + d(ny+1) = d(ny+1) + C_00(1) * Y(ny) + C_00(2) * Y(ny-1) + d(ny+2) = d(ny+2) + C_00(2) * Y(ny) + + end select + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif + end recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) @@ -1034,7 +1144,7 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) double precision :: X(0:max_dim) double precision :: Y(0:max_dim) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y - integer :: nx, ix,iy,ny + integer :: nx, ix,iy,ny,ib if( (c<0).or.(nd<0) )then nd = -1 @@ -1056,8 +1166,44 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) endif ! !DIR$ FORCEINLINE -! call multiply_poly(X,nx,B_00,2,d,nd) - call multiply_poly_c2(X,nx,B_00,d,nd) +! call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd) + if(nx >= 0) then + + select case (nx) + case (0) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(2) * X(0) + + case (1) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(1) * X(1) + B_00(2) * X(0) + d(3) = d(3) + B_00(2) * X(1) + + case (2) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(0) * X(2) + B_00(1) * X(1) + B_00(2) * X(0) + d(3) = d(3) + B_00(1) * X(2) + B_00(2) * X(1) + d(4) = d(4) + B_00(2) * X(2) + + case default + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + do ib=2,nx + d(ib) = d(ib) + B_00(0) * X(ib) + B_00(1) * X(ib-1) + B_00(2) * X(ib-2) + enddo + d(nx+1) = d(nx+1) + B_00(1) * X(nx) + B_00(2) * X(nx-1) + d(nx+2) = d(nx+2) + B_00(2) * X(nx) + + end select + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif ny=0 @@ -1068,8 +1214,44 @@ recursive subroutine I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) call I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in) ! !DIR$ FORCEINLINE -! call multiply_poly(Y,ny,C_00,2,d,nd) - call multiply_poly_c2(Y,ny,C_00,d,nd) +! call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd) + if(ny >= 0) then + + select case (ny) + case (0) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(2) * Y(0) + + case (1) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(1) * Y(1) + C_00(2) * Y(0) + d(3) = d(3) + C_00(2) * Y(1) + + case (2) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(0) * Y(2) + C_00(1) * Y(1) + C_00(2) * Y(0) + d(3) = d(3) + C_00(1) * Y(2) + C_00(2) * Y(1) + d(4) = d(4) + C_00(2) * Y(2) + + case default + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + do ib=2,ny + d(ib) = d(ib) + C_00(0) * Y(ib) + C_00(1) * Y(ib-1) + C_00(2) * Y(ib-2) + enddo + d(ny+1) = d(ny+1) + C_00(1) * Y(ny) + C_00(2) * Y(ny-1) + d(ny+2) = d(ny+2) + C_00(2) * Y(ny) + + end select + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif end @@ -1087,7 +1269,7 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) double precision :: X(0:max_dim) double precision :: Y(0:max_dim) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y - integer :: nx, ix,iy,ny + integer :: nx, ix,iy,ny,ib !DIR$ LOOP COUNT(8) do ix=0,n_pt_in @@ -1097,8 +1279,44 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) call I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,X,nx,n_pt_in) ! !DIR$ FORCEINLINE -! call multiply_poly(X,nx,B_10,2,d,nd) - call multiply_poly_c2(X,nx,B_10,d,nd) +! call multiply_poly_c2_inline_2e(X,nx,B_10,d,nd) + if(nx >= 0) then + + select case (nx) + case (0) + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(1) * X(0) + d(2) = d(2) + B_10(2) * X(0) + + case (1) + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0) + d(2) = d(2) + B_10(1) * X(1) + B_10(2) * X(0) + d(3) = d(3) + B_10(2) * X(1) + + case (2) + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0) + d(2) = d(2) + B_10(0) * X(2) + B_10(1) * X(1) + B_10(2) * X(0) + d(3) = d(3) + B_10(1) * X(2) + B_10(2) * X(1) + d(4) = d(4) + B_10(2) * X(2) + + case default + d(0) = d(0) + B_10(0) * X(0) + d(1) = d(1) + B_10(0) * X(1) + B_10(1) * X(0) + do ib=2,nx + d(ib) = d(ib) + B_10(0) * X(ib) + B_10(1) * X(ib-1) + B_10(2) * X(ib-2) + enddo + d(nx+1) = d(nx+1) + B_10(1) * X(nx) + B_10(2) * X(nx-1) + d(nx+2) = d(nx+2) + B_10(2) * X(nx) + + end select + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif nx = nd !DIR$ LOOP COUNT(8) @@ -1117,8 +1335,44 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) endif ! !DIR$ FORCEINLINE -! call multiply_poly(X,nx,B_00,2,d,nd) - call multiply_poly_c2(X,nx,B_00,d,nd) +! call multiply_poly_c2_inline_2e(X,nx,B_00,d,nd) + if(nx >= 0) then + + select case (nx) + case (0) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(2) * X(0) + + case (1) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(1) * X(1) + B_00(2) * X(0) + d(3) = d(3) + B_00(2) * X(1) + + case (2) + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + d(2) = d(2) + B_00(0) * X(2) + B_00(1) * X(1) + B_00(2) * X(0) + d(3) = d(3) + B_00(1) * X(2) + B_00(2) * X(1) + d(4) = d(4) + B_00(2) * X(2) + + case default + d(0) = d(0) + B_00(0) * X(0) + d(1) = d(1) + B_00(0) * X(1) + B_00(1) * X(0) + do ib=2,nx + d(ib) = d(ib) + B_00(0) * X(ib) + B_00(1) * X(ib-1) + B_00(2) * X(ib-2) + enddo + d(nx+1) = d(nx+1) + B_00(1) * X(nx) + B_00(2) * X(nx-1) + d(nx+2) = d(nx+2) + B_00(2) * X(nx) + + end select + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif ny=0 !DIR$ LOOP COUNT(8) @@ -1129,8 +1383,45 @@ recursive subroutine I_x1_pol_mult_a2(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in) call I_x1_pol_mult_a1(c,B_10,B_01,B_00,C_00,D_00,Y,ny,n_pt_in) ! !DIR$ FORCEINLINE -! call multiply_poly(Y,ny,C_00,2,d,nd) - call multiply_poly_c2(Y,ny,C_00,d,nd) +! call multiply_poly_c2_inline_2e(Y,ny,C_00,d,nd) + if(ny >= 0) then + + select case (ny) + case (0) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(2) * Y(0) + + case (1) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(1) * Y(1) + C_00(2) * Y(0) + d(3) = d(3) + C_00(2) * Y(1) + + case (2) + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + d(2) = d(2) + C_00(0) * Y(2) + C_00(1) * Y(1) + C_00(2) * Y(0) + d(3) = d(3) + C_00(1) * Y(2) + C_00(2) * Y(1) + d(4) = d(4) + C_00(2) * Y(2) + + case default + d(0) = d(0) + C_00(0) * Y(0) + d(1) = d(1) + C_00(0) * Y(1) + C_00(1) * Y(0) + do ib=2,ny + d(ib) = d(ib) + C_00(0) * Y(ib) + C_00(1) * Y(ib-1) + C_00(2) * Y(ib-2) + enddo + d(ny+1) = d(ny+1) + C_00(1) * Y(ny) + C_00(2) * Y(ny-1) + d(ny+2) = d(ny+2) + C_00(2) * Y(ny) + + end select + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif + end recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) @@ -1147,7 +1438,7 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) integer :: nx, ix,ny double precision :: X(0:max_dim),Y(0:max_dim) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X, Y - integer :: i + integer :: i, ib select case (c) case (0) @@ -1178,8 +1469,45 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) Y(2) = D_00(2) ! !DIR$ FORCEINLINE -! call multiply_poly(Y,ny,D_00,2,d,nd) - call multiply_poly_c2(Y,ny,D_00,d,nd) +! call multiply_poly_c2_inline_2e(Y,ny,D_00,d,nd) + if(ny >= 0) then + + select case (ny) + case (0) + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(1) * Y(0) + d(2) = d(2) + D_00(2) * Y(0) + + case (1) + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0) + d(2) = d(2) + D_00(1) * Y(1) + D_00(2) * Y(0) + d(3) = d(3) + D_00(2) * Y(1) + + case (2) + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0) + d(2) = d(2) + D_00(0) * Y(2) + D_00(1) * Y(1) + D_00(2) * Y(0) + d(3) = d(3) + D_00(1) * Y(2) + D_00(2) * Y(1) + d(4) = d(4) + D_00(2) * Y(2) + + case default + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0) + do ib=2,ny + d(ib) = d(ib) + D_00(0) * Y(ib) + D_00(1) * Y(ib-1) + D_00(2) * Y(ib-2) + enddo + d(ny+1) = d(ny+1) + D_00(1) * Y(ny) + D_00(2) * Y(ny-1) + d(ny+2) = d(ny+2) + D_00(2) * Y(ny) + + end select + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif + return @@ -1198,8 +1526,44 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) enddo ! !DIR$ FORCEINLINE -! call multiply_poly(X,nx,B_01,2,d,nd) - call multiply_poly_c2(X,nx,B_01,d,nd) +! call multiply_poly_c2_inline_2e(X,nx,B_01,d,nd) + if(nx >= 0) then + + select case (nx) + case (0) + d(0) = d(0) + B_01(0) * X(0) + d(1) = d(1) + B_01(1) * X(0) + d(2) = d(2) + B_01(2) * X(0) + + case (1) + d(0) = d(0) + B_01(0) * X(0) + d(1) = d(1) + B_01(0) * X(1) + B_01(1) * X(0) + d(2) = d(2) + B_01(1) * X(1) + B_01(2) * X(0) + d(3) = d(3) + B_01(2) * X(1) + + case (2) + d(0) = d(0) + B_01(0) * X(0) + d(1) = d(1) + B_01(0) * X(1) + B_01(1) * X(0) + d(2) = d(2) + B_01(0) * X(2) + B_01(1) * X(1) + B_01(2) * X(0) + d(3) = d(3) + B_01(1) * X(2) + B_01(2) * X(1) + d(4) = d(4) + B_01(2) * X(2) + + case default + d(0) = d(0) + B_01(0) * X(0) + d(1) = d(1) + B_01(0) * X(1) + B_01(1) * X(0) + do ib=2,nx + d(ib) = d(ib) + B_01(0) * X(ib) + B_01(1) * X(ib-1) + B_01(2) * X(ib-2) + enddo + d(nx+1) = d(nx+1) + B_01(1) * X(nx) + B_01(2) * X(nx-1) + d(nx+2) = d(nx+2) + B_01(2) * X(nx) + + end select + + do nd = nx+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif ny = 0 !DIR$ LOOP COUNT(6) @@ -1209,8 +1573,45 @@ recursive subroutine I_x2_pol_mult(c,B_10,B_01,B_00,C_00,D_00,d,nd,dim) call I_x2_pol_mult(c-1,B_10,B_01,B_00,C_00,D_00,Y,ny,dim) ! !DIR$ FORCEINLINE -! call multiply_poly(Y,ny,D_00,2,d,nd) - call multiply_poly_c2(Y,ny,D_00,d,nd) +! call multiply_poly_c2_inline_2e(Y,ny,D_00,d,nd) + + if(ny >= 0) then + + select case (ny) + case (0) + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(1) * Y(0) + d(2) = d(2) + D_00(2) * Y(0) + + case (1) + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0) + d(2) = d(2) + D_00(1) * Y(1) + D_00(2) * Y(0) + d(3) = d(3) + D_00(2) * Y(1) + + case (2) + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0) + d(2) = d(2) + D_00(0) * Y(2) + D_00(1) * Y(1) + D_00(2) * Y(0) + d(3) = d(3) + D_00(1) * Y(2) + D_00(2) * Y(1) + d(4) = d(4) + D_00(2) * Y(2) + + case default + d(0) = d(0) + D_00(0) * Y(0) + d(1) = d(1) + D_00(0) * Y(1) + D_00(1) * Y(0) + do ib=2,ny + d(ib) = d(ib) + D_00(0) * Y(ib) + D_00(1) * Y(ib-1) + D_00(2) * Y(ib-2) + enddo + d(ny+1) = d(ny+1) + D_00(1) * Y(ny) + D_00(2) * Y(ny-1) + d(ny+2) = d(ny+2) + D_00(2) * Y(ny) + + end select + + do nd = ny+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + + endif end select end @@ -1232,7 +1633,8 @@ subroutine compute_ao_integrals_jl(j,l,n_integrals,buffer_i,buffer_value) logical, external :: ao_two_e_integral_zero integer :: i,k - double precision :: ao_two_e_integral,cpu_1,cpu_2, wall_1, wall_2 + double precision, external :: ao_two_e_integral + double precision :: cpu_1,cpu_2, wall_1, wall_2 double precision :: integral, wall_0 double precision :: thr integer :: kk, m, j1, i1 @@ -1299,3 +1701,56 @@ subroutine multiply_poly_local(b,nb,c,nc,d,nd) end +!DIR$ FORCEINLINE +subroutine multiply_poly_c2_inline_2e(b,nb,c,d,nd) + implicit none + BEGIN_DOC + ! Multiply two polynomials + ! D(t) += B(t)*C(t) + END_DOC + + integer, intent(in) :: nb + integer, intent(out) :: nd + double precision, intent(in) :: b(0:nb), c(0:2) + double precision, intent(inout) :: d(0:nb+2) + + integer :: ndtmp + integer :: ib, ic, id, k + if(nb < 0) return !False if nb>=0 + + select case (nb) + case (0) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(1) * b(0) + d(2) = d(2) + c(2) * b(0) + + case (1) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(1) * b(1) + c(2) * b(0) + d(3) = d(3) + c(2) * b(1) + + case (2) + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + d(2) = d(2) + c(0) * b(2) + c(1) * b(1) + c(2) * b(0) + d(3) = d(3) + c(1) * b(2) + c(2) * b(1) + d(4) = d(4) + c(2) * b(2) + + case default + d(0) = d(0) + c(0) * b(0) + d(1) = d(1) + c(0) * b(1) + c(1) * b(0) + do ib=2,nb + d(ib) = d(ib) + c(0) * b(ib) + c(1) * b(ib-1) + c(2) * b(ib-2) + enddo + d(nb+1) = d(nb+1) + c(1) * b(nb) + c(2) * b(nb-1) + d(nb+2) = d(nb+2) + c(2) * b(nb) + + end select + + do nd = nb+2,0,-1 + if (d(nd) /= 0.d0) exit + enddo + +end + diff --git a/src/casscf_cipsi/50.casscf.bats b/src/casscf_cipsi/50.casscf.bats new file mode 100644 index 00000000..a0db725d --- /dev/null +++ b/src/casscf_cipsi/50.casscf.bats @@ -0,0 +1,49 @@ +#!/usr/bin/env bats + +source $QP_ROOT/tests/bats/common.bats.sh +source $QP_ROOT/quantum_package.rc + + +function run_stoch() { + thresh=$2 + test_exe casscf || skip + qp set perturbation do_pt2 True + qp set determinants n_det_max $3 + qp set davidson threshold_davidson 1.e-10 + qp set davidson n_states_diag 4 + qp run casscf | tee casscf.out + energy1="$(ezfio get casscf energy_pt2 | tr '[]' ' ' | cut -d ',' -f 1)" + eq $energy1 $1 $thresh +} + +@test "F2" { # 18.0198s + rm -rf f2_casscf + qp_create_ezfio -b aug-cc-pvdz ../input/f2.zmt -o f2_casscf + qp set_file f2_casscf + qp run scf + qp set_mo_class --core="[1-6,8-9]" --act="[7,10]" --virt="[11-46]" + run_stoch -198.773366970 1.e-4 100000 +} + +@test "N2" { # 18.0198s + rm -rf n2_casscf + qp_create_ezfio -b aug-cc-pvdz ../input/n2.xyz -o n2_casscf + qp set_file n2_casscf + qp run scf + qp set_mo_class --core="[1-4]" --act="[5-10]" --virt="[11-46]" + run_stoch -109.0961643162 1.e-4 100000 +} + +@test "N2_stretched" { + rm -rf n2_stretched_casscf + qp_create_ezfio -b aug-cc-pvdz -m 7 ../input/n2_stretched.xyz -o n2_stretched_casscf + qp set_file n2_stretched_casscf + qp run scf | tee scf.out + qp set_mo_class --core="[1-4]" --act="[5-10]" --virt="[11-46]" + qp set electrons elec_alpha_num 7 + qp set electrons elec_beta_num 7 + run_stoch -108.7860471300 1.e-4 100000 +# + +} + diff --git a/src/casscf_cipsi/EZFIO.cfg b/src/casscf_cipsi/EZFIO.cfg new file mode 100644 index 00000000..2a1f1926 --- /dev/null +++ b/src/casscf_cipsi/EZFIO.cfg @@ -0,0 +1,75 @@ +[energy] +type: double precision +doc: Calculated Selected |FCI| energy +interface: ezfio +size: (determinants.n_states) + +[energy_pt2] +type: double precision +doc: Calculated |FCI| energy + |PT2| +interface: ezfio +size: (determinants.n_states) + +[state_following_casscf] +type: logical +doc: If |true|, the CASSCF will try to follow the guess CI vector and orbitals +interface: ezfio,provider,ocaml +default: False + + +[diag_hess_cas] +type: logical +doc: If |true|, only the DIAGONAL part of the hessian is retained for the CASSCF +interface: ezfio,provider,ocaml +default: False + +[hess_cv_cv] +type: logical +doc: If |true|, the core-virtual - core-virtual part of the hessian is computed +interface: ezfio,provider,ocaml +default: True + + +[level_shift_casscf] +type: Positive_float +doc: Energy shift on the virtual MOs to improve SCF convergence +interface: ezfio,provider,ocaml +default: 0.005 + + +[fast_2rdm] +type: logical +doc: If true, the two-rdm are computed with a fast algo +interface: ezfio,provider,ocaml +default: True + +[criterion_casscf] +type: character*(32) +doc: choice of the criterion for the convergence of the casscf: can be energy or gradients or e_pt2 +interface: ezfio, provider, ocaml +default: e_pt2 + +[thresh_casscf] +type: Threshold +doc: Threshold on the convergence of the CASCF energy. +interface: ezfio,provider,ocaml +default: 1.e-06 + + +[pt2_min_casscf] +type: Threshold +doc: Minimum value of the pt2_max parameter for the CIPSI in the CASSCF iterations. +interface: ezfio,provider,ocaml +default: 1.e-04 + +[n_big_act_orb] +type: integer +doc: Number of active orbitals from which the active space is considered as large, and therefore pt2_min_casscf is activated. +interface: ezfio,provider,ocaml +default: 16 + +[adaptive_pt2_max] +type: logical +doc: If |true|, the pt2_max value in the CIPSI iterations will automatically adapt, otherwise it is fixed at the value given in the EZFIO folder +interface: ezfio,provider,ocaml +default: True diff --git a/src/casscf_cipsi/NEED b/src/casscf_cipsi/NEED new file mode 100644 index 00000000..dd91c7bd --- /dev/null +++ b/src/casscf_cipsi/NEED @@ -0,0 +1,5 @@ +cipsi +selectors_full +generators_cas +two_body_rdm +dav_general_mat diff --git a/src/casscf_cipsi/README.rst b/src/casscf_cipsi/README.rst new file mode 100644 index 00000000..08bfd95b --- /dev/null +++ b/src/casscf_cipsi/README.rst @@ -0,0 +1,5 @@ +====== +casscf +====== + +|CASSCF| program with the CIPSI algorithm. diff --git a/src/casscf_cipsi/bavard.irp.f b/src/casscf_cipsi/bavard.irp.f new file mode 100644 index 00000000..463c3ea4 --- /dev/null +++ b/src/casscf_cipsi/bavard.irp.f @@ -0,0 +1,6 @@ +! -*- F90 -*- +BEGIN_PROVIDER [logical, bavard] +! bavard=.true. + bavard=.false. +END_PROVIDER + diff --git a/src/casscf_cipsi/bielec.irp.f b/src/casscf_cipsi/bielec.irp.f new file mode 100644 index 00000000..0a44f994 --- /dev/null +++ b/src/casscf_cipsi/bielec.irp.f @@ -0,0 +1,155 @@ +BEGIN_PROVIDER [real*8, bielec_PQxx, (mo_num, mo_num,n_core_inact_act_orb,n_core_inact_act_orb)] + BEGIN_DOC + ! bielec_PQxx : integral (pq|xx) with p,q arbitrary, x core or active + ! indices are unshifted orbital numbers + END_DOC + implicit none + integer :: i,j,ii,jj,p,q,i3,j3,t3,v3 + real*8 :: mo_two_e_integral + + bielec_PQxx(:,:,:,:) = 0.d0 + PROVIDE mo_two_e_integrals_in_map + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,ii,j,jj,i3,j3) & + !$OMP SHARED(n_core_inact_orb,list_core_inact,mo_num,bielec_PQxx, & + !$OMP n_act_orb,mo_integrals_map,list_act) + + !$OMP DO + do i=1,n_core_inact_orb + ii=list_core_inact(i) + do j=i,n_core_inact_orb + jj=list_core_inact(j) + call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx(1,1,i,j),mo_integrals_map) + bielec_PQxx(:,:,j,i)=bielec_PQxx(:,:,i,j) + end do + do j=1,n_act_orb + jj=list_act(j) + j3=j+n_core_inact_orb + call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx(1,1,i,j3),mo_integrals_map) + bielec_PQxx(:,:,j3,i)=bielec_PQxx(:,:,i,j3) + end do + end do + !$OMP END DO + + + !$OMP DO + do i=1,n_act_orb + ii=list_act(i) + i3=i+n_core_inact_orb + do j=i,n_act_orb + jj=list_act(j) + j3=j+n_core_inact_orb + call get_mo_two_e_integrals_i1j1(ii,jj,mo_num,bielec_PQxx(1,1,i3,j3),mo_integrals_map) + bielec_PQxx(:,:,j3,i3)=bielec_PQxx(:,:,i3,j3) + end do + end do + !$OMP END DO + + !$OMP END PARALLEL + +END_PROVIDER + + + +BEGIN_PROVIDER [real*8, bielec_PxxQ, (mo_num,n_core_inact_act_orb,n_core_inact_act_orb, mo_num)] + BEGIN_DOC + ! bielec_PxxQ : integral (px|xq) with p,q arbitrary, x core or active + ! indices are unshifted orbital numbers + END_DOC + implicit none + integer :: i,j,ii,jj,p,q,i3,j3,t3,v3 + double precision, allocatable :: integrals_array(:,:) + real*8 :: mo_two_e_integral + + PROVIDE mo_two_e_integrals_in_map + bielec_PxxQ = 0.d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(i,ii,j,jj,i3,j3,integrals_array) & + !$OMP SHARED(n_core_inact_orb,list_core_inact,mo_num,bielec_PxxQ, & + !$OMP n_act_orb,mo_integrals_map,list_act) + + allocate(integrals_array(mo_num,mo_num)) + + !$OMP DO + do i=1,n_core_inact_orb + ii=list_core_inact(i) + do j=i,n_core_inact_orb + jj=list_core_inact(j) + call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map) + do q=1,mo_num + do p=1,mo_num + bielec_PxxQ(p,i,j,q)=integrals_array(p,q) + bielec_PxxQ(p,j,i,q)=integrals_array(q,p) + end do + end do + end do + do j=1,n_act_orb + jj=list_act(j) + j3=j+n_core_inact_orb + call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map) + do q=1,mo_num + do p=1,mo_num + bielec_PxxQ(p,i,j3,q)=integrals_array(p,q) + bielec_PxxQ(p,j3,i,q)=integrals_array(q,p) + end do + end do + end do + end do + !$OMP END DO + + + ! (ip|qj) + !$OMP DO + do i=1,n_act_orb + ii=list_act(i) + i3=i+n_core_inact_orb + do j=i,n_act_orb + jj=list_act(j) + j3=j+n_core_inact_orb + call get_mo_two_e_integrals_ij(ii,jj,mo_num,integrals_array,mo_integrals_map) + do q=1,mo_num + do p=1,mo_num + bielec_PxxQ(p,i3,j3,q)=integrals_array(p,q) + bielec_PxxQ(p,j3,i3,q)=integrals_array(q,p) + end do + end do + end do + end do + !$OMP END DO + + deallocate(integrals_array) + !$OMP END PARALLEL + +END_PROVIDER + + +BEGIN_PROVIDER [real*8, bielecCI, (n_act_orb,n_act_orb,n_act_orb, mo_num)] + BEGIN_DOC + ! bielecCI : integrals (tu|vp) with p arbitrary, tuv active + ! index p runs over the whole basis, t,u,v only over the active orbitals + END_DOC + implicit none + integer :: i,j,k,p,t,u,v + double precision, external :: mo_two_e_integral + PROVIDE mo_two_e_integrals_in_map + + !$OMP PARALLEL DO DEFAULT(NONE) & + !$OMP PRIVATE(i,j,k,p,t,u,v) & + !$OMP SHARED(mo_num,n_act_orb,list_act,bielecCI) + do p=1,mo_num + do j=1,n_act_orb + u=list_act(j) + do k=1,n_act_orb + v=list_act(k) + do i=1,n_act_orb + t=list_act(i) + bielecCI(i,k,j,p) = mo_two_e_integral(t,u,v,p) + end do + end do + end do + end do + !$OMP END PARALLEL DO + +END_PROVIDER diff --git a/src/casscf_cipsi/bielec_natorb.irp.f b/src/casscf_cipsi/bielec_natorb.irp.f new file mode 100644 index 00000000..9968530c --- /dev/null +++ b/src/casscf_cipsi/bielec_natorb.irp.f @@ -0,0 +1,369 @@ + BEGIN_PROVIDER [real*8, bielec_PQxx_no, (mo_num, mo_num,n_core_inact_act_orb,n_core_inact_act_orb)] + BEGIN_DOC + ! integral (pq|xx) in the basis of natural MOs + ! indices are unshifted orbital numbers + END_DOC + implicit none + integer :: i,j,k,l,t,u,p,q + double precision, allocatable :: f(:,:,:), d(:,:,:) + + + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(j,k,l,p,d,f) & + !$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, & + !$OMP bielec_PQxx_no,bielec_PQxx,list_act,natorbsCI) + + allocate (f(n_act_orb,mo_num,n_core_inact_act_orb), & + d(n_act_orb,mo_num,n_core_inact_act_orb)) + + !$OMP DO + do l=1,n_core_inact_act_orb + bielec_PQxx_no(:,:,:,l) = bielec_PQxx(:,:,:,l) + + do k=1,n_core_inact_act_orb + do j=1,mo_num + do p=1,n_act_orb + f(p,j,k)=bielec_PQxx_no(list_act(p),j,k,l) + end do + end do + end do + call dgemm('T','N',n_act_orb,mo_num*n_core_inact_act_orb,n_act_orb,1.d0, & + natorbsCI, size(natorbsCI,1), & + f, n_act_orb, & + 0.d0, & + d, n_act_orb) + do k=1,n_core_inact_act_orb + do j=1,mo_num + do p=1,n_act_orb + bielec_PQxx_no(list_act(p),j,k,l)=d(p,j,k) + end do + end do + + do j=1,mo_num + do p=1,n_act_orb + f(p,j,k)=bielec_PQxx_no(j,list_act(p),k,l) + end do + end do + end do + call dgemm('T','N',n_act_orb,mo_num*n_core_inact_act_orb,n_act_orb,1.d0, & + natorbsCI, n_act_orb, & + f, n_act_orb, & + 0.d0, & + d, n_act_orb) + do k=1,n_core_inact_act_orb + do p=1,n_act_orb + do j=1,mo_num + bielec_PQxx_no(j,list_act(p),k,l)=d(p,j,k) + end do + end do + end do + end do + !$OMP END DO NOWAIT + + deallocate (f,d) + + allocate (f(mo_num,mo_num,n_act_orb),d(mo_num,mo_num,n_act_orb)) + + !$OMP DO + do l=1,n_core_inact_act_orb + + do p=1,n_act_orb + do k=1,mo_num + do j=1,mo_num + f(j,k,p) = bielec_PQxx_no(j,k,n_core_inact_orb+p,l) + end do + end do + end do + call dgemm('N','N',mo_num*mo_num,n_act_orb,n_act_orb,1.d0, & + f, mo_num*mo_num, & + natorbsCI, n_act_orb, & + 0.d0, & + d, mo_num*mo_num) + do p=1,n_act_orb + do k=1,mo_num + do j=1,mo_num + bielec_PQxx_no(j,k,n_core_inact_orb+p,l)=d(j,k,p) + end do + end do + end do + end do + !$OMP END DO NOWAIT + + !$OMP BARRIER + + !$OMP DO + do l=1,n_core_inact_act_orb + do p=1,n_act_orb + do k=1,mo_num + do j=1,mo_num + f(j,k,p) = bielec_PQxx_no(j,k,l,n_core_inact_orb+p) + end do + end do + end do + call dgemm('N','N',mo_num*mo_num,n_act_orb,n_act_orb,1.d0, & + f, mo_num*mo_num, & + natorbsCI, n_act_orb, & + 0.d0, & + d, mo_num*mo_num) + do p=1,n_act_orb + do k=1,mo_num + do j=1,mo_num + bielec_PQxx_no(j,k,l,n_core_inact_orb+p)=d(j,k,p) + end do + end do + end do + end do + !$OMP END DO + + deallocate (f,d) + !$OMP END PARALLEL + +END_PROVIDER + + + +BEGIN_PROVIDER [real*8, bielec_PxxQ_no, (mo_num,n_core_inact_act_orb,n_core_inact_act_orb, mo_num)] + BEGIN_DOC + ! integral (px|xq) in the basis of natural MOs + ! indices are unshifted orbital numbers + END_DOC + implicit none + integer :: i,j,k,l,t,u,p,q + double precision, allocatable :: f(:,:,:), d(:,:,:) + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(j,k,l,p,d,f) & + !$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, & + !$OMP bielec_PxxQ_no,bielec_PxxQ,list_act,natorbsCI) + + + allocate (f(n_act_orb,n_core_inact_act_orb,n_core_inact_act_orb), & + d(n_act_orb,n_core_inact_act_orb,n_core_inact_act_orb)) + + !$OMP DO + do j=1,mo_num + bielec_PxxQ_no(:,:,:,j) = bielec_PxxQ(:,:,:,j) + do l=1,n_core_inact_act_orb + do k=1,n_core_inact_act_orb + do p=1,n_act_orb + f(p,k,l) = bielec_PxxQ_no(list_act(p),k,l,j) + end do + end do + end do + call dgemm('T','N',n_act_orb,n_core_inact_act_orb**2,n_act_orb,1.d0, & + natorbsCI, size(natorbsCI,1), & + f, n_act_orb, & + 0.d0, & + d, n_act_orb) + do l=1,n_core_inact_act_orb + do k=1,n_core_inact_act_orb + do p=1,n_act_orb + bielec_PxxQ_no(list_act(p),k,l,j)=d(p,k,l) + end do + end do + end do + end do + !$OMP END DO NOWAIT + + deallocate (f,d) + + allocate (f(n_act_orb,mo_num,n_core_inact_act_orb), & + d(n_act_orb,mo_num,n_core_inact_act_orb)) + + !$OMP DO + do k=1,mo_num + do l=1,n_core_inact_act_orb + do j=1,mo_num + do p=1,n_act_orb + f(p,j,l) = bielec_PxxQ_no(j,n_core_inact_orb+p,l,k) + end do + end do + end do + call dgemm('T','N',n_act_orb,mo_num*n_core_inact_act_orb,n_act_orb,1.d0, & + natorbsCI, size(natorbsCI,1), & + f, n_act_orb, & + 0.d0, & + d, n_act_orb) + do l=1,n_core_inact_act_orb + do j=1,mo_num + do p=1,n_act_orb + bielec_PxxQ_no(j,n_core_inact_orb+p,l,k)=d(p,j,l) + end do + end do + end do + end do + !$OMP END DO NOWAIT + + deallocate(f,d) + + allocate(f(mo_num,n_core_inact_act_orb,n_act_orb), & + d(mo_num,n_core_inact_act_orb,n_act_orb) ) + + !$OMP DO + do k=1,mo_num + do p=1,n_act_orb + do l=1,n_core_inact_act_orb + do j=1,mo_num + f(j,l,p) = bielec_PxxQ_no(j,l,n_core_inact_orb+p,k) + end do + end do + end do + call dgemm('N','N',mo_num*n_core_inact_act_orb,n_act_orb,n_act_orb,1.d0, & + f, mo_num*n_core_inact_act_orb, & + natorbsCI, size(natorbsCI,1), & + 0.d0, & + d, mo_num*n_core_inact_act_orb) + do p=1,n_act_orb + do l=1,n_core_inact_act_orb + do j=1,mo_num + bielec_PxxQ_no(j,l,n_core_inact_orb+p,k)=d(j,l,p) + end do + end do + end do + end do + !$OMP END DO NOWAIT + + !$OMP BARRIER + + !$OMP DO + do l=1,n_core_inact_act_orb + do p=1,n_act_orb + do k=1,n_core_inact_act_orb + do j=1,mo_num + f(j,k,p) = bielec_PxxQ_no(j,k,l,n_core_inact_orb+p) + end do + end do + end do + call dgemm('N','N',mo_num*n_core_inact_act_orb,n_act_orb,n_act_orb,1.d0, & + f, mo_num*n_core_inact_act_orb, & + natorbsCI, size(natorbsCI,1), & + 0.d0, & + d, mo_num*n_core_inact_act_orb) + do p=1,n_act_orb + do k=1,n_core_inact_act_orb + do j=1,mo_num + bielec_PxxQ_no(j,k,l,n_core_inact_orb+p)=d(j,k,p) + end do + end do + end do + end do + !$OMP END DO NOWAIT + deallocate(f,d) + !$OMP END PARALLEL + +END_PROVIDER + + +BEGIN_PROVIDER [real*8, bielecCI_no, (n_act_orb,n_act_orb,n_act_orb, mo_num)] + BEGIN_DOC + ! integrals (tu|vp) in the basis of natural MOs + ! index p runs over the whole basis, t,u,v only over the active orbitals + END_DOC + implicit none + integer :: i,j,k,l,t,u,p,q + double precision, allocatable :: f(:,:,:), d(:,:,:) + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(j,k,l,p,d,f) & + !$OMP SHARED(n_core_inact_act_orb,mo_num,n_act_orb,n_core_inact_orb, & + !$OMP bielecCI_no,bielecCI,list_act,natorbsCI) + + allocate (f(n_act_orb,n_act_orb,mo_num), & + d(n_act_orb,n_act_orb,mo_num)) + + !$OMP DO + do l=1,mo_num + bielecCI_no(:,:,:,l) = bielecCI(:,:,:,l) + do k=1,n_act_orb + do j=1,n_act_orb + do p=1,n_act_orb + f(p,j,k)=bielecCI_no(p,j,k,l) + end do + end do + end do + call dgemm('T','N',n_act_orb,n_act_orb*n_act_orb,n_act_orb,1.d0, & + natorbsCI, size(natorbsCI,1), & + f, n_act_orb, & + 0.d0, & + d, n_act_orb) + do k=1,n_act_orb + do j=1,n_act_orb + do p=1,n_act_orb + bielecCI_no(p,j,k,l)=d(p,j,k) + end do + end do + + do j=1,n_act_orb + do p=1,n_act_orb + f(p,j,k)=bielecCI_no(j,p,k,l) + end do + end do + end do + call dgemm('T','N',n_act_orb,n_act_orb*n_act_orb,n_act_orb,1.d0, & + natorbsCI, n_act_orb, & + f, n_act_orb, & + 0.d0, & + d, n_act_orb) + do k=1,n_act_orb + do p=1,n_act_orb + do j=1,n_act_orb + bielecCI_no(j,p,k,l)=d(p,j,k) + end do + end do + end do + + do p=1,n_act_orb + do k=1,n_act_orb + do j=1,n_act_orb + f(j,k,p)=bielecCI_no(j,k,p,l) + end do + end do + end do + call dgemm('N','N',n_act_orb*n_act_orb,n_act_orb,n_act_orb,1.d0, & + f, n_act_orb*n_act_orb, & + natorbsCI, n_act_orb, & + 0.d0, & + d, n_act_orb*n_act_orb) + + do p=1,n_act_orb + do k=1,n_act_orb + do j=1,n_act_orb + bielecCI_no(j,k,p,l)=d(j,k,p) + end do + end do + end do + end do + !$OMP END DO + + !$OMP DO + do l=1,n_act_orb + do p=1,n_act_orb + do k=1,n_act_orb + do j=1,n_act_orb + f(j,k,p)=bielecCI_no(j,k,l,list_act(p)) + end do + end do + end do + call dgemm('N','N',n_act_orb*n_act_orb,n_act_orb,n_act_orb,1.d0, & + f, n_act_orb*n_act_orb, & + natorbsCI, n_act_orb, & + 0.d0, & + d, n_act_orb*n_act_orb) + + do p=1,n_act_orb + do k=1,n_act_orb + do j=1,n_act_orb + bielecCI_no(j,k,l,list_act(p))=d(j,k,p) + end do + end do + end do + end do + !$OMP END DO + + deallocate(d,f) + !$OMP END PARALLEL + + +END_PROVIDER + diff --git a/src/casscf_cipsi/casscf.irp.f b/src/casscf_cipsi/casscf.irp.f new file mode 100644 index 00000000..02954ebf --- /dev/null +++ b/src/casscf_cipsi/casscf.irp.f @@ -0,0 +1,110 @@ +program casscf + implicit none + BEGIN_DOC +! TODO : Put the documentation of the program here + END_DOC + call reorder_orbitals_for_casscf +! no_vvvv_integrals = .True. +! touch no_vvvv_integrals + n_det_max_full = 500 + touch n_det_max_full + pt2_relative_error = 0.04 + touch pt2_relative_error +! call run_stochastic_cipsi + call run +end + +subroutine run + implicit none + double precision :: energy_old, energy, pt2_max_before, ept2_before,delta_E + logical :: converged,state_following_casscf_cipsi_save + integer :: iteration + converged = .False. + + energy = 0.d0 + mo_label = "MCSCF" + iteration = 1 + state_following_casscf_cipsi_save = state_following_casscf + state_following_casscf = .True. + touch state_following_casscf + ept2_before = 0.d0 + if(adaptive_pt2_max)then + pt2_max = 0.005 + SOFT_TOUCH pt2_max + endif + do while (.not.converged) + print*,'pt2_max = ',pt2_max + call run_stochastic_cipsi + energy_old = energy + energy = eone+etwo+ecore + pt2_max_before = pt2_max + + call write_time(6) + call write_int(6,iteration,'CAS-SCF iteration = ') + call write_double(6,energy,'CAS-SCF energy = ') + if(n_states == 1)then + double precision :: E_PT2, PT2 + call ezfio_get_casscf_cipsi_energy_pt2(E_PT2) + call ezfio_get_casscf_cipsi_energy(PT2) + PT2 -= E_PT2 + call write_double(6,E_PT2,'E + PT2 energy = ') + call write_double(6,PT2,' PT2 = ') + call write_double(6,pt2_max,' PT2_MAX = ') + endif + + print*,'' + call write_double(6,norm_grad_vec2,'Norm of gradients = ') + call write_double(6,norm_grad_vec2_tab(1), ' Core-active gradients = ') + call write_double(6,norm_grad_vec2_tab(2), ' Core-virtual gradients = ') + call write_double(6,norm_grad_vec2_tab(3), ' Active-virtual gradients = ') + print*,'' + call write_double(6,energy_improvement, 'Predicted energy improvement = ') + + if(criterion_casscf == "energy")then + converged = dabs(energy_improvement) < thresh_scf + else if (criterion_casscf == "gradients")then + converged = norm_grad_vec2 < thresh_scf + else if (criterion_casscf == "e_pt2")then + delta_E = dabs(E_PT2 - ept2_before) + converged = dabs(delta_E) < thresh_casscf + endif + ept2_before = E_PT2 + if(adaptive_pt2_max)then + pt2_max = dabs(energy_improvement / (pt2_relative_error)) + pt2_max = min(pt2_max, pt2_max_before) + if(n_act_orb.ge.n_big_act_orb)then + pt2_max = max(pt2_max,pt2_min_casscf) + endif + endif + print*,'' + call write_double(6,pt2_max, 'PT2_MAX for next iteration = ') + + mo_coef = NewOrbs + mo_occ = occnum + call save_mos + if(.not.converged)then + iteration += 1 + if(norm_grad_vec2.gt.0.01d0)then + N_det = N_states + else + N_det = max(N_det/8 ,N_states) + endif + psi_det = psi_det_sorted + psi_coef = psi_coef_sorted + read_wf = .True. + call clear_mo_map + SOFT_TOUCH mo_coef N_det psi_det psi_coef + if(adaptive_pt2_max)then + SOFT_TOUCH pt2_max + endif + if(iteration .gt. 3)then + state_following_casscf = state_following_casscf_cipsi_save + soft_touch state_following_casscf + endif + endif + + enddo + +end + + diff --git a/src/casscf_cipsi/class.irp.f b/src/casscf_cipsi/class.irp.f new file mode 100644 index 00000000..7360a661 --- /dev/null +++ b/src/casscf_cipsi/class.irp.f @@ -0,0 +1,12 @@ + BEGIN_PROVIDER [ logical, do_only_1h1p ] +&BEGIN_PROVIDER [ logical, do_only_cas ] +&BEGIN_PROVIDER [ logical, do_ddci ] + implicit none + BEGIN_DOC + ! In the CAS case, all those are always false except do_only_cas + END_DOC + do_only_cas = .True. + do_only_1h1p = .False. + do_ddci = .False. +END_PROVIDER + diff --git a/src/casscf_cipsi/dav_sx_mat.irp.f b/src/casscf_cipsi/dav_sx_mat.irp.f new file mode 100644 index 00000000..1e24f0e2 --- /dev/null +++ b/src/casscf_cipsi/dav_sx_mat.irp.f @@ -0,0 +1,45 @@ + + +subroutine davidson_diag_sx_mat(N_st, u_in, energies) + implicit none + integer, intent(in) :: N_st + double precision, intent(out) :: u_in(nMonoEx+1,n_states_diag), energies(N_st) + integer :: i,j,N_st_tmp, dim_in, sze, N_st_diag_in + integer, allocatable :: list_guess(:) + double precision, allocatable :: H_jj(:) + logical :: converged + N_st_diag_in = n_states_diag + provide SXmatrix + sze = nMonoEx+1 + dim_in = sze + allocate(H_jj(sze), list_guess(sze)) + H_jj(1) = 0.d0 + N_st_tmp = 1 + list_guess(1) = 1 + do j = 2, nMonoEx+1 + H_jj(j) = SXmatrix(j,j) + if(H_jj(j).lt.0.d0)then + list_guess(N_st_tmp) = j + N_st_tmp += 1 + endif + enddo + if(N_st_tmp .ne. N_st)then + print*,'Pb in davidson_diag_sx_mat' + print*,'N_st_tmp .ne. N_st' + print*,N_st_tmp, N_st + stop + endif + print*,'Number of possibly interesting states = ',N_st + print*,'Corresponding diagonal elements of the SX matrix ' + u_in = 0.d0 + do i = 1, min(N_st, N_st_diag_in) +! do i = 1, N_st + j = list_guess(i) + print*,'i,j',i,j + print*,'SX(i,i) = ',H_jj(j) + u_in(j,i) = 1.d0 + enddo + call davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,converged,SXmatrix) + print*,'energies = ',energies + +end diff --git a/src/casscf_cipsi/densities.irp.f b/src/casscf_cipsi/densities.irp.f new file mode 100644 index 00000000..bebcf5d7 --- /dev/null +++ b/src/casscf_cipsi/densities.irp.f @@ -0,0 +1,67 @@ +use bitmasks + +BEGIN_PROVIDER [real*8, D0tu, (n_act_orb,n_act_orb) ] + implicit none + BEGIN_DOC + ! the first-order density matrix in the basis of the starting MOs. + ! matrix is state averaged. + END_DOC + integer :: t,u + + do u=1,n_act_orb + do t=1,n_act_orb + D0tu(t,u) = one_e_dm_mo_alpha_average( list_act(t), list_act(u) ) + & + one_e_dm_mo_beta_average ( list_act(t), list_act(u) ) + enddo + enddo + +END_PROVIDER + +BEGIN_PROVIDER [real*8, P0tuvx, (n_act_orb,n_act_orb,n_act_orb,n_act_orb) ] + BEGIN_DOC + ! The second-order density matrix in the basis of the starting MOs ONLY IN THE RANGE OF ACTIVE MOS + ! The values are state averaged + ! + ! We use the spin-free generators of mono-excitations + ! E_pq destroys q and creates p + ! D_pq = <0|E_pq|0> = D_qp + ! P_pqrs = 1/2 <0|E_pq E_rs - delta_qr E_ps|0> + ! + ! P0tuvx(p,q,r,s) = chemist notation : 1/2 <0|E_pq E_rs - delta_qr E_ps|0> + END_DOC + implicit none + integer :: t,u,v,x + integer :: tt,uu,vv,xx + integer :: mu,nu,istate,ispin,jspin,ihole,ipart,jhole,jpart + integer :: ierr + real*8 :: phase1,phase11,phase12,phase2,phase21,phase22 + integer :: nu1,nu2,nu11,nu12,nu21,nu22 + integer :: ierr1,ierr2,ierr11,ierr12,ierr21,ierr22 + real*8 :: cI_mu(N_states),term + integer(bit_kind), dimension(N_int,2) :: det_mu, det_mu_ex + integer(bit_kind), dimension(N_int,2) :: det_mu_ex1, det_mu_ex11, det_mu_ex12 + integer(bit_kind), dimension(N_int,2) :: det_mu_ex2, det_mu_ex21, det_mu_ex22 + + if (bavard) then + write(6,*) ' providing the 2 body RDM on the active part' + endif + + P0tuvx= 0.d0 + if(fast_2rdm)then + do istate=1,N_states + do x = 1, n_act_orb + do v = 1, n_act_orb + do u = 1, n_act_orb + do t = 1, n_act_orb + ! 1 1 2 2 1 2 1 2 + P0tuvx(t,u,v,x) = 0.5d0 * state_av_act_2_rdm_spin_trace_mo(t,v,u,x) + enddo + enddo + enddo + enddo + enddo + else + P0tuvx = P0tuvx_peter + endif + +END_PROVIDER diff --git a/src/casscf_cipsi/densities_peter.irp.f b/src/casscf_cipsi/densities_peter.irp.f new file mode 100644 index 00000000..ee7414da --- /dev/null +++ b/src/casscf_cipsi/densities_peter.irp.f @@ -0,0 +1,150 @@ +use bitmasks + +BEGIN_PROVIDER [real*8, P0tuvx_peter, (n_act_orb,n_act_orb,n_act_orb,n_act_orb) ] + BEGIN_DOC + ! the second-order density matrix in the basis of the starting MOs + ! matrices are state averaged + ! + ! we use the spin-free generators of mono-excitations + ! E_pq destroys q and creates p + ! D_pq = <0|E_pq|0> = D_qp + ! P_pqrs = 1/2 <0|E_pq E_rs - delta_qr E_ps|0> + ! + END_DOC + implicit none + integer :: t,u,v,x,mu,nu,istate,ispin,jspin,ihole,ipart,jhole,jpart + integer :: ierr + real*8 :: phase1,phase11,phase12,phase2,phase21,phase22 + integer :: nu1,nu2,nu11,nu12,nu21,nu22 + integer :: ierr1,ierr2,ierr11,ierr12,ierr21,ierr22 + real*8 :: cI_mu(N_states),term + integer(bit_kind), dimension(N_int,2) :: det_mu, det_mu_ex + integer(bit_kind), dimension(N_int,2) :: det_mu_ex1, det_mu_ex11, det_mu_ex12 + integer(bit_kind), dimension(N_int,2) :: det_mu_ex2, det_mu_ex21, det_mu_ex22 + + if (bavard) then + write(6,*) ' providing density matrix P0' + endif + + P0tuvx_peter = 0.d0 + + ! first loop: we apply E_tu, once for D_tu, once for -P_tvvu + do mu=1,n_det + call det_extract(det_mu,mu,N_int) + do istate=1,n_states + cI_mu(istate)=psi_coef(mu,istate) + end do + do t=1,n_act_orb + ipart=list_act(t) + do u=1,n_act_orb + ihole=list_act(u) + ! apply E_tu + call det_copy(det_mu,det_mu_ex1,N_int) + call det_copy(det_mu,det_mu_ex2,N_int) + call do_spinfree_mono_excitation(det_mu,det_mu_ex1 & + ,det_mu_ex2,nu1,nu2,ihole,ipart,phase1,phase2,ierr1,ierr2) + ! det_mu_ex1 is in the list + if (nu1.ne.-1) then + do istate=1,n_states + term=cI_mu(istate)*psi_coef(nu1,istate)*phase1 + ! and we fill P0_tvvu + do v=1,n_act_orb + P0tuvx_peter(t,v,v,u)-=term + end do + end do + end if + ! det_mu_ex2 is in the list + if (nu2.ne.-1) then + do istate=1,n_states + term=cI_mu(istate)*psi_coef(nu2,istate)*phase2 + do v=1,n_act_orb + P0tuvx_peter(t,v,v,u)-=term + end do + end do + end if + end do + end do + end do + ! now we do the double excitation E_tu E_vx |0> + do mu=1,n_det + call det_extract(det_mu,mu,N_int) + do istate=1,n_states + cI_mu(istate)=psi_coef(mu,istate) + end do + do v=1,n_act_orb + ipart=list_act(v) + do x=1,n_act_orb + ihole=list_act(x) + ! apply E_vx + call det_copy(det_mu,det_mu_ex1,N_int) + call det_copy(det_mu,det_mu_ex2,N_int) + call do_spinfree_mono_excitation(det_mu,det_mu_ex1 & + ,det_mu_ex2,nu1,nu2,ihole,ipart,phase1,phase2,ierr1,ierr2) + ! we apply E_tu to the first resultant determinant, thus E_tu E_vx |0> + if (ierr1.eq.1) then + do t=1,n_act_orb + jpart=list_act(t) + do u=1,n_act_orb + jhole=list_act(u) + call det_copy(det_mu_ex1,det_mu_ex11,N_int) + call det_copy(det_mu_ex1,det_mu_ex12,N_int) + call do_spinfree_mono_excitation(det_mu_ex1,det_mu_ex11& + ,det_mu_ex12,nu11,nu12,jhole,jpart,phase11,phase12,ierr11,ierr12) + if (nu11.ne.-1) then + do istate=1,n_states + P0tuvx_peter(t,u,v,x)+=cI_mu(istate)*psi_coef(nu11,istate)& + *phase11*phase1 + end do + end if + if (nu12.ne.-1) then + do istate=1,n_states + P0tuvx_peter(t,u,v,x)+=cI_mu(istate)*psi_coef(nu12,istate)& + *phase12*phase1 + end do + end if + end do + end do + end if + + ! we apply E_tu to the second resultant determinant + if (ierr2.eq.1) then + do t=1,n_act_orb + jpart=list_act(t) + do u=1,n_act_orb + jhole=list_act(u) + call det_copy(det_mu_ex2,det_mu_ex21,N_int) + call det_copy(det_mu_ex2,det_mu_ex22,N_int) + call do_spinfree_mono_excitation(det_mu_ex2,det_mu_ex21& + ,det_mu_ex22,nu21,nu22,jhole,jpart,phase21,phase22,ierr21,ierr22) + if (nu21.ne.-1) then + do istate=1,n_states + P0tuvx_peter(t,u,v,x)+=cI_mu(istate)*psi_coef(nu21,istate)& + *phase21*phase2 + end do + end if + if (nu22.ne.-1) then + do istate=1,n_states + P0tuvx_peter(t,u,v,x)+=cI_mu(istate)*psi_coef(nu22,istate)& + *phase22*phase2 + end do + end if + end do + end do + end if + + end do + end do + end do + + ! we average by just dividing by the number of states + do x=1,n_act_orb + do v=1,n_act_orb + do u=1,n_act_orb + do t=1,n_act_orb + P0tuvx_peter(t,u,v,x)*=0.5D0/dble(N_states) + end do + end do + end do + end do + +END_PROVIDER diff --git a/src/casscf_cipsi/det_manip.irp.f b/src/casscf_cipsi/det_manip.irp.f new file mode 100644 index 00000000..d8c309a4 --- /dev/null +++ b/src/casscf_cipsi/det_manip.irp.f @@ -0,0 +1,125 @@ +use bitmasks + +subroutine do_signed_mono_excitation(key1,key2,nu,ihole,ipart, & + ispin,phase,ierr) + BEGIN_DOC + ! we create the mono-excitation, and determine, if possible, + ! the phase and the number in the list of determinants + END_DOC + implicit none + integer(bit_kind) :: key1(N_int,2),key2(N_int,2) + integer(bit_kind), allocatable :: keytmp(:,:) + integer :: exc(0:2,2,2),ihole,ipart,ierr,nu,ispin + real*8 :: phase + logical :: found + allocate(keytmp(N_int,2)) + + nu=-1 + phase=1.D0 + ierr=0 + call det_copy(key1,key2,N_int) + ! write(6,*) ' key2 before excitation ',ihole,' -> ',ipart,' spin = ',ispin + ! call print_det(key2,N_int) + call do_single_excitation(key2,ihole,ipart,ispin,ierr) + ! write(6,*) ' key2 after ',ihole,' -> ',ipart,' spin = ',ispin + ! call print_det(key2,N_int) + ! write(6,*) ' excitation ',ihole,' -> ',ipart,' gives ierr = ',ierr + if (ierr.eq.1) then + ! excitation is possible + ! get the phase + call get_single_excitation(key1,key2,exc,phase,N_int) + ! get the number in the list + found=.false. + nu=0 + + !TODO BOTTLENECK + do while (.not.found) + nu+=1 + if (nu.gt.N_det) then + ! the determinant is possible, but not in the list + found=.true. + nu=-1 + else + call det_extract(keytmp,nu,N_int) + integer :: i,ii + found=.true. + do ii=1,2 + do i=1,N_int + if (keytmp(i,ii).ne.key2(i,ii)) then + found=.false. + end if + end do + end do + end if + end do + end if + ! + ! we found the new string, the phase, and possibly the number in the list + ! +end subroutine do_signed_mono_excitation + +subroutine det_extract(key,nu,Nint) + BEGIN_DOC + ! extract a determinant from the list of determinants + END_DOC + implicit none + integer :: ispin,i,nu,Nint + integer(bit_kind) :: key(Nint,2) + do ispin=1,2 + do i=1,Nint + key(i,ispin)=psi_det(i,ispin,nu) + end do + end do +end subroutine det_extract + +subroutine det_copy(key1,key2,Nint) + use bitmasks ! you need to include the bitmasks_module.f90 features + BEGIN_DOC + ! copy a determinant from key1 to key2 + END_DOC + implicit none + integer :: ispin,i,Nint + integer(bit_kind) :: key1(Nint,2),key2(Nint,2) + do ispin=1,2 + do i=1,Nint + key2(i,ispin)=key1(i,ispin) + end do + end do +end subroutine det_copy + +subroutine do_spinfree_mono_excitation(key_in,key_out1,key_out2 & + ,nu1,nu2,ihole,ipart,phase1,phase2,ierr,jerr) + BEGIN_DOC + ! we create the spin-free mono-excitation E_pq=(a^+_p a_q + a^+_P a_Q) + ! we may create two determinants as result + ! + END_DOC + implicit none + integer(bit_kind) :: key_in(N_int,2),key_out1(N_int,2) + integer(bit_kind) :: key_out2(N_int,2) + integer :: ihole,ipart,ierr,jerr,nu1,nu2 + integer :: ispin + real*8 :: phase1,phase2 + + ! write(6,*) ' applying E_',ipart,ihole,' on determinant ' + ! call print_det(key_in,N_int) + + ! spin alpha + ispin=1 + call do_signed_mono_excitation(key_in,key_out1,nu1,ihole & + ,ipart,ispin,phase1,ierr) + ! if (ierr.eq.1) then + ! write(6,*) ' 1 result is ',nu1,phase1 + ! call print_det(key_out1,N_int) + ! end if + ! spin beta + ispin=2 + call do_signed_mono_excitation(key_in,key_out2,nu2,ihole & + ,ipart,ispin,phase2,jerr) + ! if (jerr.eq.1) then + ! write(6,*) ' 2 result is ',nu2,phase2 + ! call print_det(key_out2,N_int) + ! end if + +end subroutine do_spinfree_mono_excitation + diff --git a/src/casscf_cipsi/driver_optorb.irp.f b/src/casscf_cipsi/driver_optorb.irp.f new file mode 100644 index 00000000..2e3e02dc --- /dev/null +++ b/src/casscf_cipsi/driver_optorb.irp.f @@ -0,0 +1,3 @@ +subroutine driver_optorb + implicit none +end diff --git a/src/casscf_cipsi/get_energy.irp.f b/src/casscf_cipsi/get_energy.irp.f new file mode 100644 index 00000000..cfb26b59 --- /dev/null +++ b/src/casscf_cipsi/get_energy.irp.f @@ -0,0 +1,51 @@ +program print_2rdm + implicit none + BEGIN_DOC + ! get the active part of the bielectronic energy on a given wave function. + ! + ! useful to test the active part of the spin trace 2 rdms + END_DOC +!no_vvvv_integrals = .True. + read_wf = .True. +!touch read_wf no_vvvv_integrals +!call routine +!call routine_bis + call print_grad +end + +subroutine print_grad + implicit none + integer :: i + do i = 1, nMonoEx + if(dabs(gradvec2(i)).gt.1.d-5)then + print*,'' + print*,i,gradvec2(i),excit(:,i) + endif + enddo +end + +subroutine routine + integer :: i,j,k,l + integer :: ii,jj,kk,ll + double precision :: accu(4),twodm,thr,act_twodm2,integral,get_two_e_integral + thr = 1.d-10 + + + accu = 0.d0 + do ll = 1, n_act_orb + l = list_act(ll) + do kk = 1, n_act_orb + k = list_act(kk) + do jj = 1, n_act_orb + j = list_act(jj) + do ii = 1, n_act_orb + i = list_act(ii) + integral = get_two_e_integral(i,j,k,l,mo_integrals_map) + accu(1) += state_av_act_2_rdm_spin_trace_mo(ii,jj,kk,ll) * integral + enddo + enddo + enddo + enddo + print*,'accu = ',accu(1) + +end diff --git a/src/casscf_cipsi/grad_old.irp.f b/src/casscf_cipsi/grad_old.irp.f new file mode 100644 index 00000000..d60a60c8 --- /dev/null +++ b/src/casscf_cipsi/grad_old.irp.f @@ -0,0 +1,74 @@ + +BEGIN_PROVIDER [real*8, gradvec_old, (nMonoEx)] + BEGIN_DOC + ! calculate the orbital gradient by hand, i.e. for + ! each determinant I we determine the string E_pq |I> (alpha and beta + ! separately) and generate + ! sum_I c_I is then the pq component of the orbital + ! gradient + ! E_pq = a^+_pa_q + a^+_Pa_Q + END_DOC + implicit none + integer :: ii,tt,aa,indx,ihole,ipart,istate + real*8 :: res + + do indx=1,nMonoEx + ihole=excit(1,indx) + ipart=excit(2,indx) + call calc_grad_elem(ihole,ipart,res) + gradvec_old(indx)=res + end do + + real*8 :: norm_grad + norm_grad=0.d0 + do indx=1,nMonoEx + norm_grad+=gradvec_old(indx)*gradvec_old(indx) + end do + norm_grad=sqrt(norm_grad) + if (bavard) then + write(6,*) + write(6,*) ' Norm of the orbital gradient (via <0|EH|0>) : ', norm_grad + write(6,*) + endif + + +END_PROVIDER + +subroutine calc_grad_elem(ihole,ipart,res) + BEGIN_DOC + ! eq 18 of Siegbahn et al, Physica Scripta 1980 + ! we calculate 2 , q=hole, p=particle + END_DOC + implicit none + integer :: ihole,ipart,mu,iii,ispin,ierr,nu,istate + real*8 :: res + integer(bit_kind), allocatable :: det_mu(:,:),det_mu_ex(:,:) + real*8 :: i_H_psi_array(N_states),phase + allocate(det_mu(N_int,2)) + allocate(det_mu_ex(N_int,2)) + + res=0.D0 + + do mu=1,n_det + ! get the string of the determinant + call det_extract(det_mu,mu,N_int) + do ispin=1,2 + ! do the monoexcitation on it + call det_copy(det_mu,det_mu_ex,N_int) + call do_signed_mono_excitation(det_mu,det_mu_ex,nu & + ,ihole,ipart,ispin,phase,ierr) + if (ierr.eq.1) then + call i_H_psi(det_mu_ex,psi_det,psi_coef,N_int & + ,N_det,N_det,N_states,i_H_psi_array) + do istate=1,N_states + res+=i_H_psi_array(istate)*psi_coef(mu,istate)*phase + end do + end if + end do + end do + + ! state-averaged gradient + res*=2.D0/dble(N_states) + +end subroutine calc_grad_elem + diff --git a/src/casscf_cipsi/gradient.irp.f b/src/casscf_cipsi/gradient.irp.f new file mode 100644 index 00000000..a1c5e947 --- /dev/null +++ b/src/casscf_cipsi/gradient.irp.f @@ -0,0 +1,215 @@ +use bitmasks + +BEGIN_PROVIDER [ integer, nMonoEx ] + BEGIN_DOC + ! Number of single excitations + END_DOC + implicit none + nMonoEx=n_core_inact_orb*n_act_orb+n_core_inact_orb*n_virt_orb+n_act_orb*n_virt_orb +END_PROVIDER + + BEGIN_PROVIDER [integer, n_c_a_prov] +&BEGIN_PROVIDER [integer, n_c_v_prov] +&BEGIN_PROVIDER [integer, n_a_v_prov] + implicit none + n_c_a_prov = n_core_inact_orb * n_act_orb + n_c_v_prov = n_core_inact_orb * n_virt_orb + n_a_v_prov = n_act_orb * n_virt_orb + END_PROVIDER + + BEGIN_PROVIDER [integer, excit, (2,nMonoEx)] +&BEGIN_PROVIDER [character*3, excit_class, (nMonoEx)] +&BEGIN_PROVIDER [integer, list_idx_c_a, (3,n_c_a_prov) ] +&BEGIN_PROVIDER [integer, list_idx_c_v, (3,n_c_v_prov) ] +&BEGIN_PROVIDER [integer, list_idx_a_v, (3,n_a_v_prov) ] +&BEGIN_PROVIDER [integer, mat_idx_c_a, (n_core_inact_orb,n_act_orb) +&BEGIN_PROVIDER [integer, mat_idx_c_v, (n_core_inact_orb,n_virt_orb) +&BEGIN_PROVIDER [integer, mat_idx_a_v, (n_act_orb,n_virt_orb) + BEGIN_DOC + ! a list of the orbitals involved in the excitation + END_DOC + + implicit none + integer :: i,t,a,ii,tt,aa,indx,indx_tmp + indx=0 + indx_tmp = 0 + do ii=1,n_core_inact_orb + i=list_core_inact(ii) + do tt=1,n_act_orb + t=list_act(tt) + indx+=1 + excit(1,indx)=i + excit(2,indx)=t + excit_class(indx)='c-a' + indx_tmp += 1 + list_idx_c_a(1,indx_tmp) = indx + list_idx_c_a(2,indx_tmp) = ii + list_idx_c_a(3,indx_tmp) = tt + mat_idx_c_a(ii,tt) = indx + end do + end do + + indx_tmp = 0 + do ii=1,n_core_inact_orb + i=list_core_inact(ii) + do aa=1,n_virt_orb + a=list_virt(aa) + indx+=1 + excit(1,indx)=i + excit(2,indx)=a + excit_class(indx)='c-v' + indx_tmp += 1 + list_idx_c_v(1,indx_tmp) = indx + list_idx_c_v(2,indx_tmp) = ii + list_idx_c_v(3,indx_tmp) = aa + mat_idx_c_v(ii,aa) = indx + end do + end do + + indx_tmp = 0 + do tt=1,n_act_orb + t=list_act(tt) + do aa=1,n_virt_orb + a=list_virt(aa) + indx+=1 + excit(1,indx)=t + excit(2,indx)=a + excit_class(indx)='a-v' + indx_tmp += 1 + list_idx_a_v(1,indx_tmp) = indx + list_idx_a_v(2,indx_tmp) = tt + list_idx_a_v(3,indx_tmp) = aa + mat_idx_a_v(tt,aa) = indx + end do + end do + + if (bavard) then + write(6,*) ' Filled the table of the Monoexcitations ' + do indx=1,nMonoEx + write(6,*) ' ex ',indx,' : ',excit(1,indx),' -> ' & + ,excit(2,indx),' ',excit_class(indx) + end do + end if + +END_PROVIDER + + BEGIN_PROVIDER [real*8, gradvec2, (nMonoEx)] +&BEGIN_PROVIDER [real*8, norm_grad_vec2] +&BEGIN_PROVIDER [real*8, norm_grad_vec2_tab, (3)] + BEGIN_DOC + ! calculate the orbital gradient from density + ! matrices and integrals; Siegbahn et al, Phys Scr 1980 + ! eqs 14 a,b,c + END_DOC + implicit none + integer :: i,t,a,indx + real*8 :: gradvec_it,gradvec_ia,gradvec_ta + + indx=0 + norm_grad_vec2_tab = 0.d0 + do i=1,n_core_inact_orb + do t=1,n_act_orb + indx+=1 + gradvec2(indx)=gradvec_it(i,t) + norm_grad_vec2_tab(1) += gradvec2(indx)*gradvec2(indx) + end do + end do + + do i=1,n_core_inact_orb + do a=1,n_virt_orb + indx+=1 + gradvec2(indx)=gradvec_ia(i,a) + norm_grad_vec2_tab(2) += gradvec2(indx)*gradvec2(indx) + end do + end do + + do t=1,n_act_orb + do a=1,n_virt_orb + indx+=1 + gradvec2(indx)=gradvec_ta(t,a) + norm_grad_vec2_tab(3) += gradvec2(indx)*gradvec2(indx) + end do + end do + + norm_grad_vec2=0.d0 + do indx=1,nMonoEx + norm_grad_vec2+=gradvec2(indx)*gradvec2(indx) + end do + do i = 1, 3 + norm_grad_vec2_tab(i) = dsqrt(norm_grad_vec2_tab(i)) + enddo + norm_grad_vec2=sqrt(norm_grad_vec2) + if(bavard)then + write(6,*) + write(6,*) ' Norm of the orbital gradient (via D, P and integrals): ', norm_grad_vec2 + write(6,*) + endif + +END_PROVIDER + +real*8 function gradvec_it(i,t) + BEGIN_DOC + ! the orbital gradient core/inactive -> active + ! we assume natural orbitals + END_DOC + implicit none + integer :: i,t + + integer :: ii,tt,v,vv,x,y + integer :: x3,y3 + + ii=list_core_inact(i) + tt=list_act(t) + gradvec_it=2.D0*(Fipq(tt,ii)+Fapq(tt,ii)) + gradvec_it-=occnum(tt)*Fipq(ii,tt) + do v=1,n_act_orb ! active + vv=list_act(v) + do x=1,n_act_orb ! active + x3=x+n_core_inact_orb ! list_act(x) + do y=1,n_act_orb ! active + y3=y+n_core_inact_orb ! list_act(y) + ! Gamma(2) a a a a 1/r12 i a a a + gradvec_it-=2.D0*P0tuvx_no(t,v,x,y)*bielec_PQxx_no(ii,vv,x3,y3) + end do + end do + end do + gradvec_it*=2.D0 +end function gradvec_it + +real*8 function gradvec_ia(i,a) + BEGIN_DOC + ! the orbital gradient core/inactive -> virtual + END_DOC + implicit none + integer :: i,a,ii,aa + + ii=list_core_inact(i) + aa=list_virt(a) + gradvec_ia=2.D0*(Fipq(aa,ii)+Fapq(aa,ii)) + gradvec_ia*=2.D0 + +end function gradvec_ia + +real*8 function gradvec_ta(t,a) + BEGIN_DOC + ! the orbital gradient active -> virtual + ! we assume natural orbitals + END_DOC + implicit none + integer :: t,a,tt,aa,v,vv,x,y + + tt=list_act(t) + aa=list_virt(a) + gradvec_ta=0.D0 + gradvec_ta+=occnum(tt)*Fipq(aa,tt) + do v=1,n_act_orb + do x=1,n_act_orb + do y=1,n_act_orb + gradvec_ta+=2.D0*P0tuvx_no(t,v,x,y)*bielecCI_no(x,y,v,aa) + end do + end do + end do + gradvec_ta*=2.D0 + +end function gradvec_ta + diff --git a/src/casscf_cipsi/hessian.irp.f b/src/casscf_cipsi/hessian.irp.f new file mode 100644 index 00000000..458c6aa6 --- /dev/null +++ b/src/casscf_cipsi/hessian.irp.f @@ -0,0 +1,539 @@ +use bitmasks + +real*8 function hessmat_itju(i,t,j,u) + BEGIN_DOC + ! the orbital hessian for core/inactive -> active, core/inactive -> active + ! i, t, j, u are list indices, the corresponding orbitals are ii,tt,jj,uu + ! + ! we assume natural orbitals + END_DOC + implicit none + integer :: i,t,j,u,ii,tt,uu,v,vv,x,xx,y,jj + real*8 :: term,t2 + + ii=list_core_inact(i) + tt=list_act(t) + if (i.eq.j) then + if (t.eq.u) then + ! diagonal element + term=occnum(tt)*Fipq(ii,ii)+2.D0*(Fipq(tt,tt)+Fapq(tt,tt)) & + -2.D0*(Fipq(ii,ii)+Fapq(ii,ii)) + term+=2.D0*(3.D0*bielec_pxxq_no(tt,i,i,tt)-bielec_pqxx_no(tt,tt,i,i)) + term-=2.D0*occnum(tt)*(3.D0*bielec_pxxq_no(tt,i,i,tt) & + -bielec_pqxx_no(tt,tt,i,i)) + term-=occnum(tt)*Fipq(tt,tt) + do v=1,n_act_orb + vv=list_act(v) + do x=1,n_act_orb + xx=list_act(x) + term+=2.D0*(P0tuvx_no(t,t,v,x)*bielec_pqxx_no(vv,xx,i,i) & + +(P0tuvx_no(t,x,v,t)+P0tuvx_no(t,x,t,v))* & + bielec_pxxq_no(vv,i,i,xx)) + do y=1,n_act_orb + term-=2.D0*P0tuvx_no(t,v,x,y)*bielecCI_no(t,v,y,xx) + end do + end do + end do + else + ! it/iu, t != u + uu=list_act(u) + term=2.D0*(Fipq(tt,uu)+Fapq(tt,uu)) + term+=2.D0*(4.D0*bielec_PxxQ_no(tt,i,j,uu)-bielec_PxxQ_no(uu,i,j,tt) & + -bielec_PQxx_no(tt,uu,i,j)) + term-=occnum(tt)*Fipq(uu,tt) + term-=(occnum(tt)+occnum(uu)) & + *(3.D0*bielec_PxxQ_no(tt,i,i,uu)-bielec_PQxx_no(uu,tt,i,i)) + do v=1,n_act_orb + vv=list_act(v) + ! term-=D0tu(u,v)*Fipq(tt,vv) ! published, but inverting t and u seems more correct + do x=1,n_act_orb + xx=list_act(x) + term+=2.D0*(P0tuvx_no(u,t,v,x)*bielec_pqxx_no(vv,xx,i,i) & + +(P0tuvx_no(u,x,v,t)+P0tuvx_no(u,x,t,v)) & + *bielec_pxxq_no(vv,i,i,xx)) + do y=1,n_act_orb + term-=2.D0*P0tuvx_no(t,v,x,y)*bielecCI_no(u,v,y,xx) + end do + end do + end do + end if + else + ! it/ju + jj=list_core_inact(j) + uu=list_act(u) + if (t.eq.u) then + term=occnum(tt)*Fipq(ii,jj) + term-=2.D0*(Fipq(ii,jj)+Fapq(ii,jj)) + else + term=0.D0 + end if + term+=2.D0*(4.D0*bielec_PxxQ_no(tt,i,j,uu)-bielec_PxxQ_no(uu,i,j,tt) & + -bielec_PQxx_no(tt,uu,i,j)) + term-=(occnum(tt)+occnum(uu))* & + (4.D0*bielec_PxxQ_no(tt,i,j,uu)-bielec_PxxQ_no(uu,i,j,tt) & + -bielec_PQxx_no(uu,tt,i,j)) + do v=1,n_act_orb + vv=list_act(v) + do x=1,n_act_orb + xx=list_act(x) + term+=2.D0*(P0tuvx_no(u,t,v,x)*bielec_pqxx_no(vv,xx,i,j) & + +(P0tuvx_no(u,x,v,t)+P0tuvx_no(u,x,t,v)) & + *bielec_pxxq_no(vv,i,j,xx)) + end do + end do + end if + + term*=2.D0 + hessmat_itju=term + +end function hessmat_itju + +real*8 function hessmat_itja(i,t,j,a) + BEGIN_DOC + ! the orbital hessian for core/inactive -> active, core/inactive -> virtual + END_DOC + implicit none + integer :: i,t,j,a,ii,tt,jj,aa,v,vv,x,y + real*8 :: term + + ! it/ja + ii=list_core_inact(i) + tt=list_act(t) + jj=list_core_inact(j) + aa=list_virt(a) + term=2.D0*(4.D0*bielec_pxxq_no(aa,j,i,tt) & + -bielec_pqxx_no(aa,tt,i,j) -bielec_pxxq_no(aa,i,j,tt)) + term-=occnum(tt)*(4.D0*bielec_pxxq_no(aa,j,i,tt) & + -bielec_pqxx_no(aa,tt,i,j) -bielec_pxxq_no(aa,i,j,tt)) + if (i.eq.j) then + term+=2.D0*(Fipq(aa,tt)+Fapq(aa,tt)) + term-=0.5D0*occnum(tt)*Fipq(aa,tt) + do v=1,n_act_orb + do x=1,n_act_orb + do y=1,n_act_orb + term-=P0tuvx_no(t,v,x,y)*bielecCI_no(x,y,v,aa) + end do + end do + end do + end if + term*=2.D0 + hessmat_itja=term + +end function hessmat_itja + +real*8 function hessmat_itua(i,t,u,a) + BEGIN_DOC + ! the orbital hessian for core/inactive -> active, active -> virtual + END_DOC + implicit none + integer :: i,t,u,a,ii,tt,uu,aa,v,vv,x,xx,u3,t3,v3 + real*8 :: term + + ii=list_core_inact(i) + tt=list_act(t) + t3=t+n_core_inact_orb + uu=list_act(u) + u3=u+n_core_inact_orb + aa=list_virt(a) + if (t.eq.u) then + term=-occnum(tt)*Fipq(aa,ii) + else + term=0.D0 + end if + term-=occnum(uu)*(bielec_pqxx_no(aa,ii,t3,u3)-4.D0*bielec_pqxx_no(aa,uu,t3,i)& + +bielec_pxxq_no(aa,t3,u3,ii)) + do v=1,n_act_orb + vv=list_act(v) + v3=v+n_core_inact_orb + do x=1,n_act_orb + integer :: x3 + xx=list_act(x) + x3=x+n_core_inact_orb + term-=2.D0*(P0tuvx_no(t,u,v,x)*bielec_pqxx_no(aa,ii,v3,x3) & + +(P0tuvx_no(t,v,u,x)+P0tuvx_no(t,v,x,u)) & + *bielec_pqxx_no(aa,xx,v3,i)) + end do + end do + if (t.eq.u) then + term+=Fipq(aa,ii)+Fapq(aa,ii) + end if + term*=2.D0 + hessmat_itua=term + +end function hessmat_itua + +real*8 function hessmat_iajb(i,a,j,b) + BEGIN_DOC + ! the orbital hessian for core/inactive -> virtual, core/inactive -> virtual + END_DOC + implicit none + integer :: i,a,j,b,ii,aa,jj,bb + real*8 :: term + + ii=list_core_inact(i) + aa=list_virt(a) + if (i.eq.j) then + if (a.eq.b) then + ! ia/ia + term=2.D0*(Fipq(aa,aa)+Fapq(aa,aa)-Fipq(ii,ii)-Fapq(ii,ii)) + term+=2.D0*(3.D0*bielec_pxxq_no(aa,i,i,aa)-bielec_pqxx_no(aa,aa,i,i)) + else + bb=list_virt(b) + ! ia/ib + term=2.D0*(Fipq(aa,bb)+Fapq(aa,bb)) + term+=2.D0*(3.D0*bielec_pxxq_no(aa,i,i,bb)-bielec_pqxx_no(aa,bb,i,i)) + end if + else + ! ia/jb + jj=list_core_inact(j) + bb=list_virt(b) + term=2.D0*(4.D0*bielec_pxxq_no(aa,i,j,bb)-bielec_pqxx_no(aa,bb,i,j) & + -bielec_pxxq_no(aa,j,i,bb)) + if (a.eq.b) then + term-=2.D0*(Fipq(ii,jj)+Fapq(ii,jj)) + end if + end if + term*=2.D0 + hessmat_iajb=term + +end function hessmat_iajb + +real*8 function hessmat_iatb(i,a,t,b) + BEGIN_DOC + ! the orbital hessian for core/inactive -> virtual, active -> virtual + END_DOC + implicit none + integer :: i,a,t,b,ii,aa,tt,bb,v,vv,x,y,v3,t3 + real*8 :: term + + ii=list_core_inact(i) + aa=list_virt(a) + tt=list_act(t) + bb=list_virt(b) + t3=t+n_core_inact_orb + term=occnum(tt)*(4.D0*bielec_pxxq_no(aa,i,t3,bb)-bielec_pxxq_no(aa,t3,i,bb)& + -bielec_pqxx_no(aa,bb,i,t3)) + if (a.eq.b) then + term-=Fipq(tt,ii)+Fapq(tt,ii) + term-=0.5D0*occnum(tt)*Fipq(tt,ii) + do v=1,n_act_orb + do x=1,n_act_orb + do y=1,n_act_orb + term-=P0tuvx_no(t,v,x,y)*bielecCI_no(x,y,v,ii) + end do + end do + end do + end if + term*=2.D0 + hessmat_iatb=term + +end function hessmat_iatb + +real*8 function hessmat_taub(t,a,u,b) + BEGIN_DOC + ! the orbital hessian for act->virt,act->virt + END_DOC + implicit none + integer :: t,a,u,b,tt,aa,uu,bb,v,vv,x,xx,y + integer :: v3,x3 + real*8 :: term,t1,t2,t3 + + tt=list_act(t) + aa=list_virt(a) + if (t == u) then + if (a == b) then + ! ta/ta + t1=occnum(tt)*Fipq(aa,aa) + t2=0.D0 + t3=0.D0 + t1-=occnum(tt)*Fipq(tt,tt) + do v=1,n_act_orb + vv=list_act(v) + v3=v+n_core_inact_orb + do x=1,n_act_orb + xx=list_act(x) + x3=x+n_core_inact_orb + t2+=2.D0*(P0tuvx_no(t,t,v,x)*bielec_pqxx_no(aa,aa,v3,x3) & + +(P0tuvx_no(t,x,v,t)+P0tuvx_no(t,x,t,v))* & + bielec_pxxq_no(aa,x3,v3,aa)) + do y=1,n_act_orb + t3-=2.D0*P0tuvx_no(t,v,x,y)*bielecCI_no(t,v,y,xx) + end do + end do + end do + term=t1+t2+t3 + else + bb=list_virt(b) + ! ta/tb b/=a + term=occnum(tt)*Fipq(aa,bb) + do v=1,n_act_orb + vv=list_act(v) + v3=v+n_core_inact_orb + do x=1,n_act_orb + xx=list_act(x) + x3=x+n_core_inact_orb + term+=2.D0*(P0tuvx_no(t,t,v,x)*bielec_pqxx_no(aa,bb,v3,x3) & + +(P0tuvx_no(t,x,v,t)+P0tuvx_no(t,x,t,v)) & + *bielec_pxxq_no(aa,x3,v3,bb)) + end do + end do + end if + else + ! ta/ub t/=u + uu=list_act(u) + bb=list_virt(b) + term=0.D0 + do v=1,n_act_orb + vv=list_act(v) + v3=v+n_core_inact_orb + do x=1,n_act_orb + xx=list_act(x) + x3=x+n_core_inact_orb + term+=2.D0*(P0tuvx_no(t,u,v,x)*bielec_pqxx_no(aa,bb,v3,x3) & + +(P0tuvx_no(t,x,v,u)+P0tuvx_no(t,x,u,v)) & + *bielec_pxxq_no(aa,x3,v3,bb)) + end do + end do + if (a.eq.b) then + term-=0.5D0*(occnum(tt)*Fipq(uu,tt)+occnum(uu)*Fipq(tt,uu)) + do v=1,n_act_orb + do y=1,n_act_orb + do x=1,n_act_orb + term-=P0tuvx_no(t,v,x,y)*bielecCI_no(x,y,v,uu) + term-=P0tuvx_no(u,v,x,y)*bielecCI_no(x,y,v,tt) + end do + end do + end do + end if + + end if + + term*=2.D0 + hessmat_taub=term + +end function hessmat_taub + +BEGIN_PROVIDER [real*8, hessdiag, (nMonoEx)] + BEGIN_DOC + ! the diagonal of the Hessian, needed for the Davidson procedure + END_DOC + implicit none + integer :: i,t,a,indx,indx_shift + real*8 :: hessmat_itju,hessmat_iajb,hessmat_taub + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessdiag,n_core_inact_orb,n_act_orb,n_virt_orb,nMonoEx) & + !$OMP PRIVATE(i,indx,t,a,indx_shift) + + !$OMP DO + do i=1,n_core_inact_orb + do t=1,n_act_orb + indx = t + (i-1)*n_act_orb + hessdiag(indx)=hessmat_itju(i,t,i,t) + end do + end do + !$OMP END DO NOWAIT + + indx_shift = n_core_inact_orb*n_act_orb + !$OMP DO + do a=1,n_virt_orb + do i=1,n_core_inact_orb + indx = a + (i-1)*n_virt_orb + indx_shift + hessdiag(indx)=hessmat_iajb(i,a,i,a) + end do + end do + !$OMP END DO NOWAIT + + indx_shift += n_core_inact_orb*n_virt_orb + !$OMP DO + do a=1,n_virt_orb + do t=1,n_act_orb + indx = a + (t-1)*n_virt_orb + indx_shift + hessdiag(indx)=hessmat_taub(t,a,t,a) + end do + end do + !$OMP END DO + !$OMP END PARALLEL + +END_PROVIDER + + +BEGIN_PROVIDER [double precision, hessmat, (nMonoEx,nMonoEx)] + implicit none + integer :: i,j,t,u,a,b + integer :: indx,indx_tmp, jndx, jndx_tmp + integer :: ustart,bstart + real*8 :: hessmat_itju + real*8 :: hessmat_itja + real*8 :: hessmat_itua + real*8 :: hessmat_iajb + real*8 :: hessmat_iatb + real*8 :: hessmat_taub + ! c-a c-v a-v + ! c-a | X X X + ! c-v | X X + ! a-v | X + + provide mo_two_e_integrals_in_map + + hessmat = 0.d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat,n_c_a_prov,list_idx_c_a,n_core_inact_orb,n_act_orb,mat_idx_c_a) & + !$OMP PRIVATE(indx_tmp,indx,i,t,j,u,ustart,jndx) + + !$OMP DO +!!!! < Core-active| H |Core-active > + ! Core-active excitations + do indx_tmp = 1, n_c_a_prov + indx = list_idx_c_a(1,indx_tmp) + i = list_idx_c_a(2,indx_tmp) + t = list_idx_c_a(3,indx_tmp) + ! Core-active excitations + do j = 1, n_core_inact_orb + if (i.eq.j) then + ustart=t + else + ustart=1 + end if + do u=ustart,n_act_orb + jndx = mat_idx_c_a(j,u) + hessmat(jndx,indx) = hessmat_itju(i,t,j,u) + hessmat(indx,jndx) = hessmat(jndx,indx) + enddo + enddo + enddo + !$OMP END DO NOWAIT + !$OMP END PARALLEL + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat,n_c_a_prov,n_c_v_prov,list_idx_c_a,list_idx_c_v) & + !$OMP PRIVATE(indx_tmp,jndx_tmp,indx,i,t,j,a,jndx) + + !$OMP DO +!!!! < Core-active| H |Core-VIRTUAL > + ! Core-active excitations + do indx_tmp = 1, n_c_a_prov + indx = list_idx_c_a(1,indx_tmp) + i = list_idx_c_a(2,indx_tmp) + t = list_idx_c_a(3,indx_tmp) + ! Core-VIRTUAL excitations + do jndx_tmp = 1, n_c_v_prov + jndx = list_idx_c_v(1,jndx_tmp) + j = list_idx_c_v(2,jndx_tmp) + a = list_idx_c_v(3,jndx_tmp) + hessmat(jndx,indx) = hessmat_itja(i,t,j,a) + hessmat(indx,jndx) = hessmat(jndx,indx) + enddo + enddo + !$OMP END DO NOWAIT + !$OMP END PARALLEL + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat,n_c_a_prov,n_a_v_prov,list_idx_c_a,list_idx_a_v) & + !$OMP PRIVATE(indx_tmp,jndx_tmp,indx,i,t,u,a,jndx) + + !$OMP DO +!!!! < Core-active| H |ACTIVE-VIRTUAL > + ! Core-active excitations + do indx_tmp = 1, n_c_a_prov + indx = list_idx_c_a(1,indx_tmp) + i = list_idx_c_a(2,indx_tmp) + t = list_idx_c_a(3,indx_tmp) + ! ACTIVE-VIRTUAL excitations + do jndx_tmp = 1, n_a_v_prov + jndx = list_idx_a_v(1,jndx_tmp) + u = list_idx_a_v(2,jndx_tmp) + a = list_idx_a_v(3,jndx_tmp) + hessmat(jndx,indx) = hessmat_itua(i,t,u,a) + hessmat(indx,jndx) = hessmat(jndx,indx) + enddo + enddo + + !$OMP END DO NOWAIT + !$OMP END PARALLEL + + + if(hess_cv_cv)then + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat,n_c_v_prov,list_idx_c_v,n_core_inact_orb,n_virt_orb,mat_idx_c_v) & + !$OMP PRIVATE(indx_tmp,indx,i,a,j,b,bstart,jndx) + !$OMP DO +!!!!! < Core-VIRTUAL | H |Core-VIRTUAL > + ! Core-VIRTUAL excitations + do indx_tmp = 1, n_c_v_prov + indx = list_idx_c_v(1,indx_tmp) + i = list_idx_c_v(2,indx_tmp) + a = list_idx_c_v(3,indx_tmp) + ! Core-VIRTUAL excitations + do j = 1, n_core_inact_orb + if (i.eq.j) then + bstart=a + else + bstart=1 + end if + do b=bstart,n_virt_orb + jndx = mat_idx_c_v(j,b) + hessmat(jndx,indx) = hessmat_iajb(i,a,j,b) + hessmat(indx,jndx) = hessmat(jndx,indx) + enddo + enddo + enddo + + !$OMP END DO NOWAIT + !$OMP END PARALLEL + endif + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat,n_c_v_prov,n_a_v_prov,list_idx_c_v,list_idx_a_v) & + !$OMP PRIVATE(indx_tmp,jndx_tmp,indx,i,a,t,b,jndx) + + !$OMP DO +!!!! < Core-VIRTUAL | H |Active-VIRTUAL > + ! Core-VIRTUAL excitations + do indx_tmp = 1, n_c_v_prov + indx = list_idx_c_v(1,indx_tmp) + i = list_idx_c_v(2,indx_tmp) + a = list_idx_c_v(3,indx_tmp) + ! Active-VIRTUAL excitations + do jndx_tmp = 1, n_a_v_prov + jndx = list_idx_a_v(1,jndx_tmp) + t = list_idx_a_v(2,jndx_tmp) + b = list_idx_a_v(3,jndx_tmp) + hessmat(jndx,indx) = hessmat_iatb(i,a,t,b) + hessmat(indx,jndx) = hessmat(jndx,indx) + enddo + enddo + !$OMP END DO NOWAIT + !$OMP END PARALLEL + + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat,n_a_v_prov,list_idx_a_v,n_act_orb,n_virt_orb,mat_idx_a_v) & + !$OMP PRIVATE(indx_tmp,indx,t,a,u,b,bstart,jndx) + + !$OMP DO +!!!! < Active-VIRTUAL | H |Active-VIRTUAL > + ! Active-VIRTUAL excitations + do indx_tmp = 1, n_a_v_prov + indx = list_idx_a_v(1,indx_tmp) + t = list_idx_a_v(2,indx_tmp) + a = list_idx_a_v(3,indx_tmp) + ! Active-VIRTUAL excitations + do u=t,n_act_orb + if (t.eq.u) then + bstart=a + else + bstart=1 + end if + do b=bstart,n_virt_orb + jndx = mat_idx_a_v(u,b) + hessmat(jndx,indx) = hessmat_taub(t,a,u,b) + hessmat(indx,jndx) = hessmat(jndx,indx) + enddo + enddo + enddo + !$OMP END DO NOWAIT + !$OMP END PARALLEL + +END_PROVIDER diff --git a/src/casscf_cipsi/hessian_old.irp.f b/src/casscf_cipsi/hessian_old.irp.f new file mode 100644 index 00000000..d17f1f0a --- /dev/null +++ b/src/casscf_cipsi/hessian_old.irp.f @@ -0,0 +1,310 @@ + +use bitmasks +BEGIN_PROVIDER [real*8, hessmat_old, (nMonoEx,nMonoEx)] + BEGIN_DOC + ! calculate the orbital hessian 2 + ! + + by hand, + ! determinant per determinant, as for the gradient + ! + ! we assume that we have natural active orbitals + END_DOC + implicit none + integer :: indx,ihole,ipart + integer :: jndx,jhole,jpart + character*3 :: iexc,jexc + real*8 :: res + + if (bavard) then + write(6,*) ' providing Hessian matrix hessmat_old ' + write(6,*) ' nMonoEx = ',nMonoEx + endif + + do indx=1,nMonoEx + do jndx=1,nMonoEx + hessmat_old(indx,jndx)=0.D0 + end do + end do + + do indx=1,nMonoEx + ihole=excit(1,indx) + ipart=excit(2,indx) + iexc=excit_class(indx) + do jndx=indx,nMonoEx + jhole=excit(1,jndx) + jpart=excit(2,jndx) + jexc=excit_class(jndx) + call calc_hess_elem(ihole,ipart,jhole,jpart,res) + hessmat_old(indx,jndx)=res + hessmat_old(jndx,indx)=res + end do + end do + +END_PROVIDER + +subroutine calc_hess_elem(ihole,ipart,jhole,jpart,res) + BEGIN_DOC + ! eq 19 of Siegbahn et al, Physica Scripta 1980 + ! we calculate 2 + ! + + + ! average over all states is performed. + ! no transition between states. + END_DOC + implicit none + integer :: ihole,ipart,ispin,mu,istate + integer :: jhole,jpart,jspin + integer :: mu_pq, mu_pqrs, mu_rs, mu_rspq, nu_rs,nu + real*8 :: res + integer(bit_kind), allocatable :: det_mu(:,:) + integer(bit_kind), allocatable :: det_nu(:,:) + integer(bit_kind), allocatable :: det_mu_pq(:,:) + integer(bit_kind), allocatable :: det_mu_rs(:,:) + integer(bit_kind), allocatable :: det_nu_rs(:,:) + integer(bit_kind), allocatable :: det_mu_pqrs(:,:) + integer(bit_kind), allocatable :: det_mu_rspq(:,:) + real*8 :: i_H_psi_array(N_states),phase,phase2,phase3 + real*8 :: i_H_j_element + allocate(det_mu(N_int,2)) + allocate(det_nu(N_int,2)) + allocate(det_mu_pq(N_int,2)) + allocate(det_mu_rs(N_int,2)) + allocate(det_nu_rs(N_int,2)) + allocate(det_mu_pqrs(N_int,2)) + allocate(det_mu_rspq(N_int,2)) + integer :: mu_pq_possible + integer :: mu_rs_possible + integer :: nu_rs_possible + integer :: mu_pqrs_possible + integer :: mu_rspq_possible + + res=0.D0 + + ! the terms <0|E E H |0> + do mu=1,n_det + ! get the string of the determinant + call det_extract(det_mu,mu,N_int) + do ispin=1,2 + ! do the monoexcitation pq on it + call det_copy(det_mu,det_mu_pq,N_int) + call do_signed_mono_excitation(det_mu,det_mu_pq,mu_pq & + ,ihole,ipart,ispin,phase,mu_pq_possible) + if (mu_pq_possible.eq.1) then + ! possible, but not necessarily in the list + ! do the second excitation + do jspin=1,2 + call det_copy(det_mu_pq,det_mu_pqrs,N_int) + call do_signed_mono_excitation(det_mu_pq,det_mu_pqrs,mu_pqrs& + ,jhole,jpart,jspin,phase2,mu_pqrs_possible) + ! excitation possible + if (mu_pqrs_possible.eq.1) then + call i_H_psi(det_mu_pqrs,psi_det,psi_coef,N_int & + ,N_det,N_det,N_states,i_H_psi_array) + do istate=1,N_states + res+=i_H_psi_array(istate)*psi_coef(mu,istate)*phase*phase2 + end do + end if + ! try the de-excitation with opposite sign + call det_copy(det_mu_pq,det_mu_pqrs,N_int) + call do_signed_mono_excitation(det_mu_pq,det_mu_pqrs,mu_pqrs& + ,jpart,jhole,jspin,phase2,mu_pqrs_possible) + phase2=-phase2 + ! excitation possible + if (mu_pqrs_possible.eq.1) then + call i_H_psi(det_mu_pqrs,psi_det,psi_coef,N_int & + ,N_det,N_det,N_states,i_H_psi_array) + do istate=1,N_states + res+=i_H_psi_array(istate)*psi_coef(mu,istate)*phase*phase2 + end do + end if + end do + end if + ! exchange the notion of pq and rs + ! do the monoexcitation rs on the initial determinant + call det_copy(det_mu,det_mu_rs,N_int) + call do_signed_mono_excitation(det_mu,det_mu_rs,mu_rs & + ,jhole,jpart,ispin,phase2,mu_rs_possible) + if (mu_rs_possible.eq.1) then + ! do the second excitation + do jspin=1,2 + call det_copy(det_mu_rs,det_mu_rspq,N_int) + call do_signed_mono_excitation(det_mu_rs,det_mu_rspq,mu_rspq& + ,ihole,ipart,jspin,phase3,mu_rspq_possible) + ! excitation possible (of course, the result is outside the CAS) + if (mu_rspq_possible.eq.1) then + call i_H_psi(det_mu_rspq,psi_det,psi_coef,N_int & + ,N_det,N_det,N_states,i_H_psi_array) + do istate=1,N_states + res+=i_H_psi_array(istate)*psi_coef(mu,istate)*phase2*phase3 + end do + end if + ! we may try the de-excitation, with opposite sign + call det_copy(det_mu_rs,det_mu_rspq,N_int) + call do_signed_mono_excitation(det_mu_rs,det_mu_rspq,mu_rspq& + ,ipart,ihole,jspin,phase3,mu_rspq_possible) + phase3=-phase3 + ! excitation possible (of course, the result is outside the CAS) + if (mu_rspq_possible.eq.1) then + call i_H_psi(det_mu_rspq,psi_det,psi_coef,N_int & + ,N_det,N_det,N_states,i_H_psi_array) + do istate=1,N_states + res+=i_H_psi_array(istate)*psi_coef(mu,istate)*phase2*phase3 + end do + end if + end do + end if + ! + ! the operator E H E, we have to do a double loop over the determinants + ! we still have the determinant mu_pq and the phase in memory + if (mu_pq_possible.eq.1) then + do nu=1,N_det + call det_extract(det_nu,nu,N_int) + do jspin=1,2 + call det_copy(det_nu,det_nu_rs,N_int) + call do_signed_mono_excitation(det_nu,det_nu_rs,nu_rs & + ,jhole,jpart,jspin,phase2,nu_rs_possible) + ! excitation possible ? + if (nu_rs_possible.eq.1) then + call i_H_j(det_mu_pq,det_nu_rs,N_int,i_H_j_element) + do istate=1,N_states + res+=2.D0*i_H_j_element*psi_coef(mu,istate) & + *psi_coef(nu,istate)*phase*phase2 + end do + end if + end do + end do + end if + end do + end do + + ! state-averaged Hessian + res*=1.D0/dble(N_states) + +end subroutine calc_hess_elem + +BEGIN_PROVIDER [real*8, hessmat_peter, (nMonoEx,nMonoEx)] + BEGIN_DOC + ! explicit hessian matrix from density matrices and integrals + ! of course, this will be used for a direct Davidson procedure later + ! we will not store the matrix in real life + ! formulas are broken down as functions for the 6 classes of matrix elements + ! + END_DOC + implicit none + integer :: i,j,t,u,a,b,indx,jndx,bstart,ustart,indx_shift + + real*8 :: hessmat_itju + real*8 :: hessmat_itja + real*8 :: hessmat_itua + real*8 :: hessmat_iajb + real*8 :: hessmat_iatb + real*8 :: hessmat_taub + + if (bavard) then + write(6,*) ' providing Hessian matrix hessmat_peter ' + write(6,*) ' nMonoEx = ',nMonoEx + endif + provide mo_two_e_integrals_in_map + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP SHARED(hessmat_peter,n_core_inact_orb,n_act_orb,n_virt_orb,nMonoEx) & + !$OMP PRIVATE(i,indx,jndx,j,ustart,t,u,a,bstart,indx_shift) + + !$OMP DO + ! (DOUBLY OCCUPIED ---> ACT ) + do i=1,n_core_inact_orb + do t=1,n_act_orb + indx = t + (i-1)*n_act_orb + jndx=indx + ! (DOUBLY OCCUPIED ---> ACT ) + do j=i,n_core_inact_orb + if (i.eq.j) then + ustart=t + else + ustart=1 + end if + do u=ustart,n_act_orb + hessmat_peter(jndx,indx)=hessmat_itju(i,t,j,u) + jndx+=1 + end do + end do + ! (DOUBLY OCCUPIED ---> VIRTUAL) + do j=1,n_core_inact_orb + do a=1,n_virt_orb + hessmat_peter(jndx,indx)=hessmat_itja(i,t,j,a) + jndx+=1 + end do + end do + ! (ACTIVE ---> VIRTUAL) + do u=1,n_act_orb + do a=1,n_virt_orb + hessmat_peter(jndx,indx)=hessmat_itua(i,t,u,a) + jndx+=1 + end do + end do + end do + end do + !$OMP END DO NOWAIT + + indx_shift = n_core_inact_orb*n_act_orb + !$OMP DO + ! (DOUBLY OCCUPIED ---> VIRTUAL) + do a=1,n_virt_orb + do i=1,n_core_inact_orb + indx = a + (i-1)*n_virt_orb + indx_shift + jndx=indx + ! (DOUBLY OCCUPIED ---> VIRTUAL) + do j=i,n_core_inact_orb + if (i.eq.j) then + bstart=a + else + bstart=1 + end if + do b=bstart,n_virt_orb + hessmat_peter(jndx,indx)=hessmat_iajb(i,a,j,b) + jndx+=1 + end do + end do + ! (ACT ---> VIRTUAL) + do t=1,n_act_orb + do b=1,n_virt_orb + hessmat_peter(jndx,indx)=hessmat_iatb(i,a,t,b) + jndx+=1 + end do + end do + end do + end do + !$OMP END DO NOWAIT + + indx_shift += n_core_inact_orb*n_virt_orb + !$OMP DO + ! (ACT ---> VIRTUAL) + do a=1,n_virt_orb + do t=1,n_act_orb + indx = a + (t-1)*n_virt_orb + indx_shift + jndx=indx + ! (ACT ---> VIRTUAL) + do u=t,n_act_orb + if (t.eq.u) then + bstart=a + else + bstart=1 + end if + do b=bstart,n_virt_orb + hessmat_peter(jndx,indx)=hessmat_taub(t,a,u,b) + jndx+=1 + end do + end do + end do + end do + !$OMP END DO + + !$OMP END PARALLEL + + do jndx=1,nMonoEx + do indx=1,jndx-1 + hessmat_peter(indx,jndx) = hessmat_peter(jndx,indx) + enddo + enddo + + +END_PROVIDER + diff --git a/src/casscf_cipsi/mcscf_fock.irp.f b/src/casscf_cipsi/mcscf_fock.irp.f new file mode 100644 index 00000000..e4568405 --- /dev/null +++ b/src/casscf_cipsi/mcscf_fock.irp.f @@ -0,0 +1,80 @@ +BEGIN_PROVIDER [real*8, Fipq, (mo_num,mo_num) ] + BEGIN_DOC + ! the inactive Fock matrix, in molecular orbitals + END_DOC + implicit none + integer :: p,q,k,kk,t,tt,u,uu + + do q=1,mo_num + do p=1,mo_num + Fipq(p,q)=one_ints_no(p,q) + end do + end do + + ! the inactive Fock matrix + do k=1,n_core_inact_orb + kk=list_core_inact(k) + do q=1,mo_num + do p=1,mo_num + Fipq(p,q)+=2.D0*bielec_pqxx_no(p,q,k,k) -bielec_pxxq_no(p,k,k,q) + end do + end do + end do + + if (bavard) then + integer :: i + write(6,*) + write(6,*) ' the diagonal of the inactive effective Fock matrix ' + write(6,'(5(i3,F12.5))') (i,Fipq(i,i),i=1,mo_num) + write(6,*) + end if + + +END_PROVIDER + + +BEGIN_PROVIDER [real*8, Fapq, (mo_num,mo_num) ] + BEGIN_DOC + ! the active active Fock matrix, in molecular orbitals + ! we create them in MOs, quite expensive + ! + ! for an implementation in AOs we need first the natural orbitals + ! for forming an active density matrix in AOs + ! + END_DOC + implicit none + integer :: p,q,k,kk,t,tt,u,uu + + Fapq = 0.d0 + + ! the active Fock matrix, D0tu is diagonal + do t=1,n_act_orb + tt=list_act(t) + do q=1,mo_num + do p=1,mo_num + Fapq(p,q)+=occnum(tt) & + *(bielec_pqxx_no(p,q,tt,tt)-0.5D0*bielec_pxxq_no(p,tt,tt,q)) + end do + end do + end do + + if (bavard) then + integer :: i + write(6,*) + write(6,*) ' the effective Fock matrix over MOs' + write(6,*) + + write(6,*) + write(6,*) ' the diagonal of the inactive effective Fock matrix ' + write(6,'(5(i3,F12.5))') (i,Fipq(i,i),i=1,mo_num) + write(6,*) + write(6,*) + write(6,*) ' the diagonal of the active Fock matrix ' + write(6,'(5(i3,F12.5))') (i,Fapq(i,i),i=1,mo_num) + write(6,*) + end if + + +END_PROVIDER + + diff --git a/src/casscf_cipsi/natorb.irp.f b/src/casscf_cipsi/natorb.irp.f new file mode 100644 index 00000000..9ce90304 --- /dev/null +++ b/src/casscf_cipsi/natorb.irp.f @@ -0,0 +1,231 @@ + BEGIN_PROVIDER [real*8, occnum, (mo_num)] + implicit none + BEGIN_DOC + ! MO occupation numbers + END_DOC + + integer :: i + occnum=0.D0 + do i=1,n_core_inact_orb + occnum(list_core_inact(i))=2.D0 + end do + + do i=1,n_act_orb + occnum(list_act(i))=occ_act(i) + end do + + if (bavard) then + write(6,*) ' occupation numbers ' + do i=1,mo_num + write(6,*) i,occnum(i) + end do + endif + +END_PROVIDER + + + BEGIN_PROVIDER [ real*8, natorbsCI, (n_act_orb,n_act_orb) ] +&BEGIN_PROVIDER [ real*8, occ_act, (n_act_orb) ] + implicit none + BEGIN_DOC + ! Natural orbitals of CI + END_DOC + integer :: i, j + double precision :: Vt(n_act_orb,n_act_orb) + +! call lapack_diag(occ_act,natorbsCI,D0tu,n_act_orb,n_act_orb) + call svd(D0tu, size(D0tu,1), natorbsCI,size(natorbsCI,1), occ_act, Vt, size(Vt,1),n_act_orb,n_act_orb) + + if (bavard) then + write(6,*) ' found occupation numbers as ' + do i=1,n_act_orb + write(6,*) i,occ_act(i) + end do + + integer :: nmx + real*8 :: xmx + do i=1,n_act_orb + ! largest element of the eigenvector should be positive + xmx=0.D0 + nmx=0 + do j=1,n_act_orb + if (abs(natOrbsCI(j,i)).gt.xmx) then + nmx=j + xmx=abs(natOrbsCI(j,i)) + end if + end do + xmx=sign(1.D0,natOrbsCI(nmx,i)) + do j=1,n_act_orb + natOrbsCI(j,i)*=xmx + end do + + write(6,*) ' Eigenvector No ',i + write(6,'(5(I3,F12.5))') (j,natOrbsCI(j,i),j=1,n_act_orb) + end do + end if + +END_PROVIDER + + +BEGIN_PROVIDER [real*8, P0tuvx_no, (n_act_orb,n_act_orb,n_act_orb,n_act_orb)] + implicit none + BEGIN_DOC + ! 4-index transformation of 2part matrices + END_DOC + integer :: i,j,k,l,p,q + real*8 :: d(n_act_orb) + + ! index per index + ! first quarter + P0tuvx_no(:,:,:,:) = P0tuvx(:,:,:,:) + + do j=1,n_act_orb + do k=1,n_act_orb + do l=1,n_act_orb + do p=1,n_act_orb + d(p)=0.D0 + end do + do p=1,n_act_orb + do q=1,n_act_orb + d(p)+=P0tuvx_no(q,j,k,l)*natorbsCI(q,p) + end do + end do + do p=1,n_act_orb + P0tuvx_no(p,j,k,l)=d(p) + end do + end do + end do + end do + ! 2nd quarter + do j=1,n_act_orb + do k=1,n_act_orb + do l=1,n_act_orb + do p=1,n_act_orb + d(p)=0.D0 + end do + do p=1,n_act_orb + do q=1,n_act_orb + d(p)+=P0tuvx_no(j,q,k,l)*natorbsCI(q,p) + end do + end do + do p=1,n_act_orb + P0tuvx_no(j,p,k,l)=d(p) + end do + end do + end do + end do + ! 3rd quarter + do j=1,n_act_orb + do k=1,n_act_orb + do l=1,n_act_orb + do p=1,n_act_orb + d(p)=0.D0 + end do + do p=1,n_act_orb + do q=1,n_act_orb + d(p)+=P0tuvx_no(j,k,q,l)*natorbsCI(q,p) + end do + end do + do p=1,n_act_orb + P0tuvx_no(j,k,p,l)=d(p) + end do + end do + end do + end do + ! 4th quarter + do j=1,n_act_orb + do k=1,n_act_orb + do l=1,n_act_orb + do p=1,n_act_orb + d(p)=0.D0 + end do + do p=1,n_act_orb + do q=1,n_act_orb + d(p)+=P0tuvx_no(j,k,l,q)*natorbsCI(q,p) + end do + end do + do p=1,n_act_orb + P0tuvx_no(j,k,l,p)=d(p) + end do + end do + end do + end do + +END_PROVIDER + + + +BEGIN_PROVIDER [real*8, one_ints_no, (mo_num,mo_num)] + implicit none + BEGIN_DOC + ! Transformed one-e integrals + END_DOC + integer :: i,j, p, q + real*8 :: d(n_act_orb) + one_ints_no(:,:)=mo_one_e_integrals(:,:) + + ! 1st half-trf + do j=1,mo_num + do p=1,n_act_orb + d(p)=0.D0 + end do + do p=1,n_act_orb + do q=1,n_act_orb + d(p)+=one_ints_no(list_act(q),j)*natorbsCI(q,p) + end do + end do + do p=1,n_act_orb + one_ints_no(list_act(p),j)=d(p) + end do + end do + + ! 2nd half-trf + do j=1,mo_num + do p=1,n_act_orb + d(p)=0.D0 + end do + do p=1,n_act_orb + do q=1,n_act_orb + d(p)+=one_ints_no(j,list_act(q))*natorbsCI(q,p) + end do + end do + do p=1,n_act_orb + one_ints_no(j,list_act(p))=d(p) + end do + end do +END_PROVIDER + + +BEGIN_PROVIDER [ double precision, NatOrbsCI_mos, (mo_num, mo_num) ] + implicit none + BEGIN_DOC + ! Rotation matrix from current MOs to the CI natural MOs + END_DOC + integer :: p,q + + NatOrbsCI_mos(:,:) = 0.d0 + + do q = 1,mo_num + NatOrbsCI_mos(q,q) = 1.d0 + enddo + + do q = 1,n_act_orb + do p = 1,n_act_orb + NatOrbsCI_mos(list_act(p),list_act(q)) = natorbsCI(p,q) + enddo + enddo +END_PROVIDER + + +BEGIN_PROVIDER [real*8, NatOrbsFCI, (ao_num,mo_num)] + implicit none + BEGIN_DOC +! FCI natural orbitals + END_DOC + + call dgemm('N','N', ao_num,mo_num,mo_num,1.d0, & + mo_coef, size(mo_coef,1), & + NatOrbsCI_mos, size(NatOrbsCI_mos,1), 0.d0, & + NatOrbsFCI, size(NatOrbsFCI,1)) +END_PROVIDER + diff --git a/src/casscf_cipsi/neworbs.irp.f b/src/casscf_cipsi/neworbs.irp.f new file mode 100644 index 00000000..a7cebbb2 --- /dev/null +++ b/src/casscf_cipsi/neworbs.irp.f @@ -0,0 +1,253 @@ + BEGIN_PROVIDER [real*8, SXmatrix, (nMonoEx+1,nMonoEx+1)] +&BEGIN_PROVIDER [integer, n_guess_sx_mat ] + implicit none + BEGIN_DOC + ! Single-excitation matrix + END_DOC + + integer :: i,j + + do i=1,nMonoEx+1 + do j=1,nMonoEx+1 + SXmatrix(i,j)=0.D0 + end do + end do + + do i=1,nMonoEx + SXmatrix(1,i+1)=gradvec2(i) + SXmatrix(1+i,1)=gradvec2(i) + end do + if(diag_hess_cas)then + do i = 1, nMonoEx + SXmatrix(i+1,i+1) = hessdiag(i) + enddo + else + do i=1,nMonoEx + do j=1,nMonoEx + SXmatrix(i+1,j+1)=hessmat(i,j) + SXmatrix(j+1,i+1)=hessmat(i,j) + end do + end do + endif + + do i = 1, nMonoEx + SXmatrix(i+1,i+1) += level_shift_casscf + enddo + n_guess_sx_mat = 1 + do i = 1, nMonoEx + if(SXmatrix(i+1,i+1).lt.0.d0 )then + n_guess_sx_mat += 1 + endif + enddo + if (bavard) then + do i=2,nMonoEx + write(6,*) ' diagonal of the Hessian : ',i,hessmat(i,i) + end do + end if + +END_PROVIDER + + BEGIN_PROVIDER [real*8, SXeigenvec, (nMonoEx+1,nMonoEx+1)] +&BEGIN_PROVIDER [real*8, SXeigenval, (nMonoEx+1)] + implicit none + BEGIN_DOC + ! Eigenvectors/eigenvalues of the single-excitation matrix + END_DOC + if(nMonoEx+1.gt.n_det_max_full)then + if(bavard)then + print*,'Using the Davidson algorithm to diagonalize the SXmatrix' + endif + double precision, allocatable :: u_in(:,:),energies(:) + allocate(u_in(nMonoEx+1,n_states_diag),energies(n_guess_sx_mat)) + call davidson_diag_sx_mat(n_guess_sx_mat, u_in, energies) + integer :: i,j + SXeigenvec = 0.d0 + SXeigenval = 0.d0 + do i = 1, n_guess_sx_mat + SXeigenval(i) = energies(i) + do j = 1, nMonoEx+1 + SXeigenvec(j,i) = u_in(j,i) + enddo + enddo + else + if(bavard)then + print*,'Diagonalize the SXmatrix with Jacobi' + endif + call lapack_diag(SXeigenval,SXeigenvec,SXmatrix,nMonoEx+1,nMonoEx+1) + endif + if (bavard) then + write(6,*) ' SXdiag : lowest eigenvalues ' + write(6,*) ' 1 - ',SXeigenval(1),SXeigenvec(1,1) + if(n_guess_sx_mat.gt.0)then + write(6,*) ' 2 - ',SXeigenval(2),SXeigenvec(1,2) + write(6,*) ' 3 - ',SXeigenval(3),SXeigenvec(1,3) + write(6,*) ' 4 - ',SXeigenval(4),SXeigenvec(1,4) + write(6,*) ' 5 - ',SXeigenval(5),SXeigenvec(1,5) + endif + write(6,*) + write(6,*) ' SXdiag : lowest eigenvalue = ',SXeigenval(1) + endif +END_PROVIDER + + BEGIN_PROVIDER [real*8, energy_improvement] + implicit none + if(state_following_casscf)then + energy_improvement = SXeigenval(best_vector_ovrlp_casscf) + else + energy_improvement = SXeigenval(1) + endif + END_PROVIDER + + + + BEGIN_PROVIDER [ integer, best_vector_ovrlp_casscf ] +&BEGIN_PROVIDER [ double precision, best_overlap_casscf ] + implicit none + integer :: i + double precision :: c0 + best_overlap_casscf = 0.D0 + best_vector_ovrlp_casscf = -1000 + do i=1,nMonoEx+1 + if (SXeigenval(i).lt.0.D0) then + if (dabs(SXeigenvec(1,i)).gt.best_overlap_casscf) then + best_overlap_casscf=dabs(SXeigenvec(1,i)) + best_vector_ovrlp_casscf = i + end if + end if + end do + if(best_vector_ovrlp_casscf.lt.0)then + best_vector_ovrlp_casscf = minloc(SXeigenval,nMonoEx+1) + endif + c0=SXeigenvec(1,best_vector_ovrlp_casscf) + if (bavard) then + write(6,*) ' SXdiag : eigenvalue for best overlap with ' + write(6,*) ' previous orbitals = ',SXeigenval(best_vector_ovrlp_casscf) + write(6,*) ' weight of the 1st element ',c0 + endif + END_PROVIDER + + BEGIN_PROVIDER [double precision, SXvector, (nMonoEx+1)] + implicit none + BEGIN_DOC + ! Best eigenvector of the single-excitation matrix + END_DOC + integer :: i + double precision :: c0 + c0=SXeigenvec(1,best_vector_ovrlp_casscf) + do i=1,nMonoEx+1 + SXvector(i)=SXeigenvec(i,best_vector_ovrlp_casscf)/c0 + end do + END_PROVIDER + + +BEGIN_PROVIDER [double precision, NewOrbs, (ao_num,mo_num) ] + implicit none + BEGIN_DOC + ! Updated orbitals + END_DOC + integer :: i,j,ialph + + if(state_following_casscf)then + print*,'Using the state following casscf ' + call dgemm('N','T', ao_num,mo_num,mo_num,1.d0, & + NatOrbsFCI, size(NatOrbsFCI,1), & + Umat, size(Umat,1), 0.d0, & + NewOrbs, size(NewOrbs,1)) + + level_shift_casscf *= 0.5D0 + level_shift_casscf = max(level_shift_casscf,0.002d0) + !touch level_shift_casscf + else + if(best_vector_ovrlp_casscf.ne.1.and.n_orb_swap.ne.0)then + print*,'Taking the lowest root for the CASSCF' + print*,'!!! SWAPPING MOS !!!!!!' + level_shift_casscf *= 2.D0 + level_shift_casscf = min(level_shift_casscf,0.5d0) + print*,'level_shift_casscf = ',level_shift_casscf + NewOrbs = switch_mo_coef + !mo_coef = switch_mo_coef + !soft_touch mo_coef + !call save_mos_no_occ + !stop + else + level_shift_casscf *= 0.5D0 + level_shift_casscf = max(level_shift_casscf,0.002d0) + !touch level_shift_casscf + call dgemm('N','T', ao_num,mo_num,mo_num,1.d0, & + NatOrbsFCI, size(NatOrbsFCI,1), & + Umat, size(Umat,1), 0.d0, & + NewOrbs, size(NewOrbs,1)) + endif + endif + +END_PROVIDER + +BEGIN_PROVIDER [real*8, Umat, (mo_num,mo_num) ] + implicit none + BEGIN_DOC + ! Orbital rotation matrix + END_DOC + integer :: i,j,indx,k,iter,t,a,ii,tt,aa + logical :: converged + + real*8 :: Tpotmat (mo_num,mo_num), Tpotmat2 (mo_num,mo_num) + real*8 :: Tmat(mo_num,mo_num) + real*8 :: f + + ! the orbital rotation matrix T + Tmat(:,:)=0.D0 + indx=1 + do i=1,n_core_inact_orb + ii=list_core_inact(i) + do t=1,n_act_orb + tt=list_act(t) + indx+=1 + Tmat(ii,tt)= SXvector(indx) + Tmat(tt,ii)=-SXvector(indx) + end do + end do + do i=1,n_core_inact_orb + ii=list_core_inact(i) + do a=1,n_virt_orb + aa=list_virt(a) + indx+=1 + Tmat(ii,aa)= SXvector(indx) + Tmat(aa,ii)=-SXvector(indx) + end do + end do + do t=1,n_act_orb + tt=list_act(t) + do a=1,n_virt_orb + aa=list_virt(a) + indx+=1 + Tmat(tt,aa)= SXvector(indx) + Tmat(aa,tt)=-SXvector(indx) + end do + end do + + ! Form the exponential + + Tpotmat(:,:)=0.D0 + Umat(:,:) =0.D0 + do i=1,mo_num + Tpotmat(i,i)=1.D0 + Umat(i,i) =1.d0 + end do + iter=0 + converged=.false. + do while (.not.converged) + iter+=1 + f = 1.d0 / dble(iter) + Tpotmat2(:,:) = Tpotmat(:,:) * f + call dgemm('N','N', mo_num,mo_num,mo_num,1.d0, & + Tpotmat2, size(Tpotmat2,1), & + Tmat, size(Tmat,1), 0.d0, & + Tpotmat, size(Tpotmat,1)) + Umat(:,:) = Umat(:,:) + Tpotmat(:,:) + + converged = ( sum(abs(Tpotmat(:,:))) < 1.d-6).or.(iter>30) + end do +END_PROVIDER + + + diff --git a/src/casscf_cipsi/reorder_orb.irp.f b/src/casscf_cipsi/reorder_orb.irp.f new file mode 100644 index 00000000..3cb90522 --- /dev/null +++ b/src/casscf_cipsi/reorder_orb.irp.f @@ -0,0 +1,70 @@ +subroutine reorder_orbitals_for_casscf + implicit none + BEGIN_DOC +! routine that reorders the orbitals of the CASSCF in terms block of core, active and virtual + END_DOC + integer :: i,j,iorb + integer, allocatable :: iorder(:),array(:) + allocate(iorder(mo_num),array(mo_num)) + do i = 1, n_core_orb + iorb = list_core(i) + array(iorb) = i + enddo + + do i = 1, n_inact_orb + iorb = list_inact(i) + array(iorb) = mo_num + i + enddo + + do i = 1, n_act_orb + iorb = list_act(i) + array(iorb) = 2 * mo_num + i + enddo + + do i = 1, n_virt_orb + iorb = list_virt(i) + array(iorb) = 3 * mo_num + i + enddo + + do i = 1, mo_num + iorder(i) = i + enddo + call isort(array,iorder,mo_num) + double precision, allocatable :: mo_coef_new(:,:) + allocate(mo_coef_new(ao_num,mo_num)) + do i = 1, mo_num + mo_coef_new(:,i) = mo_coef(:,iorder(i)) + enddo + mo_coef = mo_coef_new + touch mo_coef + + list_core_reverse = 0 + do i = 1, n_core_orb + list_core(i) = i + list_core_reverse(i) = i + mo_class(i) = "Core" + enddo + + list_inact_reverse = 0 + do i = 1, n_inact_orb + list_inact(i) = i + n_core_orb + list_inact_reverse(i+n_core_orb) = i + mo_class(i+n_core_orb) = "Inactive" + enddo + + list_act_reverse = 0 + do i = 1, n_act_orb + list_act(i) = n_core_inact_orb + i + list_act_reverse(n_core_inact_orb + i) = i + mo_class(n_core_inact_orb + i) = "Active" + enddo + + list_virt_reverse = 0 + do i = 1, n_virt_orb + list_virt(i) = n_core_inact_orb + n_act_orb + i + list_virt_reverse(n_core_inact_orb + n_act_orb + i) = i + mo_class(n_core_inact_orb + n_act_orb + i) = "Virtual" + enddo + touch list_core_reverse list_core list_inact list_inact_reverse list_act list_act_reverse list_virt list_virt_reverse + +end diff --git a/src/casscf_cipsi/save_energy.irp.f b/src/casscf_cipsi/save_energy.irp.f new file mode 100644 index 00000000..18750c3d --- /dev/null +++ b/src/casscf_cipsi/save_energy.irp.f @@ -0,0 +1,9 @@ +subroutine save_energy(E,pt2) + implicit none + BEGIN_DOC +! Saves the energy in |EZFIO|. + END_DOC + double precision, intent(in) :: E(N_states), pt2(N_states) + call ezfio_set_casscf_cipsi_energy(E(1:N_states)) + call ezfio_set_casscf_cipsi_energy_pt2(E(1:N_states)+pt2(1:N_states)) +end diff --git a/src/casscf_cipsi/superci_dm.irp.f b/src/casscf_cipsi/superci_dm.irp.f new file mode 100644 index 00000000..ee831c35 --- /dev/null +++ b/src/casscf_cipsi/superci_dm.irp.f @@ -0,0 +1,207 @@ + BEGIN_PROVIDER [double precision, super_ci_dm, (mo_num,mo_num)] + implicit none + BEGIN_DOC +! density matrix of the super CI matrix, in the basis of NATURAL ORBITALS OF THE CASCI WF +! +! This is obtained from annex B of Roos et. al. Chemical Physics 48 (1980) 157-173 +! +! WARNING ::: in the equation B3.d there is a TYPO with a forgotten MINUS SIGN (see variable mat_tmp_dm_super_ci ) + END_DOC + super_ci_dm = 0.d0 + integer :: i,j,iorb,jorb + integer :: a,aorb,b,borb + integer :: t,torb,v,vorb,u,uorb,x,xorb + double precision :: c0,ci + c0 = SXeigenvec(1,1) + ! equation B3.a of the annex B of Roos et. al. Chemical Physics 48 (1980) 157-173 + ! loop over the core/inact + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + super_ci_dm(iorb,iorb) = 2.d0 ! first term of B3.a + ! loop over the core/inact + do j = 1, n_core_inact_orb + jorb = list_core_inact(j) + ! loop over the virtual + do a = 1, n_virt_orb + aorb = list_virt(a) + super_ci_dm(jorb,iorb) += -2.d0 * lowest_super_ci_coef_mo(aorb,iorb) * lowest_super_ci_coef_mo(aorb,jorb) ! second term in B3.a + enddo + do t = 1, n_act_orb + torb = list_act(t) + ! thrid term of the B3.a + super_ci_dm(jorb,iorb) += - lowest_super_ci_coef_mo(iorb,torb) * lowest_super_ci_coef_mo(jorb,torb) * (2.d0 - occ_act(t)) + enddo + enddo + enddo + + ! equation B3.b of the annex B of Roos et. al. Chemical Physics 48 (1980) 157-173 + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + do t = 1, n_act_orb + torb = list_act(t) + super_ci_dm(iorb,torb) = c0 * lowest_super_ci_coef_mo(torb,iorb) * (2.d0 - occ_act(t)) + super_ci_dm(torb,iorb) = c0 * lowest_super_ci_coef_mo(torb,iorb) * (2.d0 - occ_act(t)) + do a = 1, n_virt_orb + aorb = list_virt(a) + super_ci_dm(iorb,torb) += - lowest_super_ci_coef_mo(aorb,iorb) * lowest_super_ci_coef_mo(aorb,torb) * occ_act(t) + super_ci_dm(torb,iorb) += - lowest_super_ci_coef_mo(aorb,iorb) * lowest_super_ci_coef_mo(aorb,torb) * occ_act(t) + enddo + enddo + enddo + + ! equation B3.c of the annex B of Roos et. al. Chemical Physics 48 (1980) 157-173 + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + do a = 1, n_virt_orb + aorb = list_virt(a) + super_ci_dm(aorb,iorb) = 2.d0 * c0 * lowest_super_ci_coef_mo(aorb,iorb) + super_ci_dm(iorb,aorb) = 2.d0 * c0 * lowest_super_ci_coef_mo(aorb,iorb) + enddo + enddo + + ! equation B3.d of the annex B of Roos et. al. Chemical Physics 48 (1980) 157-173 + do t = 1, n_act_orb + torb = list_act(t) + super_ci_dm(torb,torb) = occ_act(t) ! first term of equation B3.d + do x = 1, n_act_orb + xorb = list_act(x) + super_ci_dm(torb,torb) += - occ_act(x) * occ_act(t)* mat_tmp_dm_super_ci(x,x) ! second term involving the ONE-rdm + enddo + do u = 1, n_act_orb + uorb = list_act(u) + + ! second term of equation B3.d + do x = 1, n_act_orb + xorb = list_act(x) + do v = 1, n_act_orb + vorb = list_act(v) + super_ci_dm(torb,uorb) += 2.d0 * P0tuvx_no(v,x,t,u) * mat_tmp_dm_super_ci(v,x) ! second term involving the TWO-rdm + enddo + enddo + + ! third term of equation B3.d + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + super_ci_dm(torb,uorb) += lowest_super_ci_coef_mo(iorb,torb) * lowest_super_ci_coef_mo(iorb,uorb) * (2.d0 - occ_act(t) - occ_act(u)) + enddo + + enddo + enddo + + ! equation B3.e of the annex B of Roos et. al. Chemical Physics 48 (1980) 157-173 + do t = 1, n_act_orb + torb = list_act(t) + do a = 1, n_virt_orb + aorb = list_virt(a) + super_ci_dm(aorb,torb) += c0 * lowest_super_ci_coef_mo(aorb,torb) * occ_act(t) + super_ci_dm(torb,aorb) += c0 * lowest_super_ci_coef_mo(aorb,torb) * occ_act(t) + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + super_ci_dm(aorb,torb) += lowest_super_ci_coef_mo(iorb,aorb) * lowest_super_ci_coef_mo(iorb,torb) * (2.d0 - occ_act(t)) + super_ci_dm(torb,aorb) += lowest_super_ci_coef_mo(iorb,aorb) * lowest_super_ci_coef_mo(iorb,torb) * (2.d0 - occ_act(t)) + enddo + enddo + enddo + + ! equation B3.f of the annex B of Roos et. al. Chemical Physics 48 (1980) 157-173 + do a = 1, n_virt_orb + aorb = list_virt(a) + do b = 1, n_virt_orb + borb= list_virt(b) + + ! First term of equation B3.f + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + super_ci_dm(borb,aorb) += 2.d0 * lowest_super_ci_coef_mo(iorb,aorb) * lowest_super_ci_coef_mo(iorb,borb) + enddo + + ! Second term of equation B3.f + do t = 1, n_act_orb + torb = list_act(t) + super_ci_dm(borb,aorb) += lowest_super_ci_coef_mo(torb,aorb) * lowest_super_ci_coef_mo(torb,borb) * occ_act(t) + enddo + enddo + enddo + + END_PROVIDER + + BEGIN_PROVIDER [double precision, superci_natorb, (ao_num,mo_num) +&BEGIN_PROVIDER [double precision, superci_nat_occ, (mo_num) + implicit none + call general_mo_coef_new_as_svd_vectors_of_mo_matrix_eig(super_ci_dm,mo_num,mo_num,mo_num,NatOrbsFCI,superci_nat_occ,superci_natorb) + +END_PROVIDER + + BEGIN_PROVIDER [double precision, mat_tmp_dm_super_ci, (n_act_orb,n_act_orb)] + implicit none + BEGIN_DOC + ! computation of the term in [ ] in the equation B3.d of Roos et. al. Chemical Physics 48 (1980) 157-173 + ! + ! !!!!! WARNING !!!!!! there is a TYPO: a MINUS SIGN SHOULD APPEAR in that term + END_DOC + integer :: a,aorb,i,iorb + integer :: x,xorb,v,vorb + mat_tmp_dm_super_ci = 0.d0 + do v = 1, n_act_orb + vorb = list_act(v) + do x = 1, n_act_orb + xorb = list_act(x) + do a = 1, n_virt_orb + aorb = list_virt(a) + mat_tmp_dm_super_ci(x,v) += lowest_super_ci_coef_mo(aorb,vorb) * lowest_super_ci_coef_mo(aorb,xorb) + enddo + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + ! MARK THE MINUS SIGN HERE !!!!!!!!!!! BECAUSE OF TYPO IN THE ORIGINAL PAPER + mat_tmp_dm_super_ci(x,v) -= lowest_super_ci_coef_mo(iorb,vorb) * lowest_super_ci_coef_mo(iorb,xorb) + enddo + enddo + enddo + END_PROVIDER + + BEGIN_PROVIDER [double precision, lowest_super_ci_coef_mo, (mo_num,mo_num)] + implicit none + integer :: i,j,iorb,jorb + integer :: a, aorb,t, torb + double precision :: sqrt2 + + sqrt2 = 1.d0/dsqrt(2.d0) + do i = 1, nMonoEx + iorb = excit(1,i) + jorb = excit(2,i) + lowest_super_ci_coef_mo(iorb,jorb) = SXeigenvec(i+1,1) + lowest_super_ci_coef_mo(jorb,iorb) = SXeigenvec(i+1,1) + enddo + + ! a_{it} of the equation B.2 of Roos et. al. Chemical Physics 48 (1980) 157-173 + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + do t = 1, n_act_orb + torb = list_act(t) + lowest_super_ci_coef_mo(torb,iorb) *= (2.d0 - occ_act(t))**(-0.5d0) + lowest_super_ci_coef_mo(iorb,torb) *= (2.d0 - occ_act(t))**(-0.5d0) + enddo + enddo + + ! a_{ia} of the equation B.2 of Roos et. al. Chemical Physics 48 (1980) 157-173 + do i = 1, n_core_inact_orb + iorb = list_core_inact(i) + do a = 1, n_virt_orb + aorb = list_virt(a) + lowest_super_ci_coef_mo(aorb,iorb) *= sqrt2 + lowest_super_ci_coef_mo(iorb,aorb) *= sqrt2 + enddo + enddo + + ! a_{ta} of the equation B.2 of Roos et. al. Chemical Physics 48 (1980) 157-173 + do a = 1, n_virt_orb + aorb = list_virt(a) + do t = 1, n_act_orb + torb = list_act(t) + lowest_super_ci_coef_mo(torb,aorb) *= occ_act(t)**(-0.5d0) + lowest_super_ci_coef_mo(aorb,torb) *= occ_act(t)**(-0.5d0) + enddo + enddo + + END_PROVIDER + diff --git a/src/casscf_cipsi/swap_orb.irp.f b/src/casscf_cipsi/swap_orb.irp.f new file mode 100644 index 00000000..49af207c --- /dev/null +++ b/src/casscf_cipsi/swap_orb.irp.f @@ -0,0 +1,132 @@ + BEGIN_PROVIDER [double precision, SXvector_lowest, (nMonoEx)] + implicit none + integer :: i + do i=2,nMonoEx+1 + SXvector_lowest(i-1)=SXeigenvec(i,1) + enddo + END_PROVIDER + + BEGIN_PROVIDER [double precision, thresh_overlap_switch] + implicit none + thresh_overlap_switch = 0.5d0 + END_PROVIDER + + BEGIN_PROVIDER [integer, max_overlap, (nMonoEx)] +&BEGIN_PROVIDER [integer, n_max_overlap] +&BEGIN_PROVIDER [integer, dim_n_max_overlap] + implicit none + double precision, allocatable :: vec_tmp(:) + integer, allocatable :: iorder(:) + allocate(vec_tmp(nMonoEx),iorder(nMonoEx)) + integer :: i + do i = 1, nMonoEx + iorder(i) = i + vec_tmp(i) = -dabs(SXvector_lowest(i)) + enddo + call dsort(vec_tmp,iorder,nMonoEx) + n_max_overlap = 0 + do i = 1, nMonoEx + if(dabs(vec_tmp(i)).gt.thresh_overlap_switch)then + n_max_overlap += 1 + max_overlap(n_max_overlap) = iorder(i) + endif + enddo + dim_n_max_overlap = max(1,n_max_overlap) + END_PROVIDER + + BEGIN_PROVIDER [integer, orb_swap, (2,dim_n_max_overlap)] +&BEGIN_PROVIDER [integer, index_orb_swap, (dim_n_max_overlap)] +&BEGIN_PROVIDER [integer, n_orb_swap ] + implicit none + use bitmasks ! you need to include the bitmasks_module.f90 features + integer :: i,imono,iorb,jorb,j + n_orb_swap = 0 + do i = 1, n_max_overlap + imono = max_overlap(i) + iorb = excit(1,imono) + jorb = excit(2,imono) + if (excit_class(imono) == "c-a" .and.hessmat(imono,imono).gt.0.d0)then ! core --> active rotation + n_orb_swap += 1 + orb_swap(1,n_orb_swap) = iorb ! core + orb_swap(2,n_orb_swap) = jorb ! active + index_orb_swap(n_orb_swap) = imono + else if (excit_class(imono) == "a-v" .and.hessmat(imono,imono).gt.0.d0)then ! active --> virtual rotation + n_orb_swap += 1 + orb_swap(1,n_orb_swap) = jorb ! virtual + orb_swap(2,n_orb_swap) = iorb ! active + index_orb_swap(n_orb_swap) = imono + endif + enddo + + integer,allocatable :: orb_swap_tmp(:,:) + allocate(orb_swap_tmp(2,dim_n_max_overlap)) + do i = 1, n_orb_swap + orb_swap_tmp(1,i) = orb_swap(1,i) + orb_swap_tmp(2,i) = orb_swap(2,i) + enddo + + integer(bit_kind), allocatable :: det_i(:),det_j(:) + allocate(det_i(N_int),det_j(N_int)) + logical, allocatable :: good_orb_rot(:) + allocate(good_orb_rot(n_orb_swap)) + integer, allocatable :: index_orb_swap_tmp(:) + allocate(index_orb_swap_tmp(dim_n_max_overlap)) + index_orb_swap_tmp = index_orb_swap + good_orb_rot = .True. + integer :: icount,k + do i = 1, n_orb_swap + if(.not.good_orb_rot(i))cycle + det_i = 0_bit_kind + call set_bit_to_integer(orb_swap(1,i),det_i,N_int) + call set_bit_to_integer(orb_swap(2,i),det_i,N_int) + do j = i+1, n_orb_swap + det_j = 0_bit_kind + call set_bit_to_integer(orb_swap(1,j),det_j,N_int) + call set_bit_to_integer(orb_swap(2,j),det_j,N_int) + icount = 0 + do k = 1, N_int + icount += popcnt(ior(det_i(k),det_j(k))) + enddo + if (icount.ne.4)then + good_orb_rot(i) = .False. + good_orb_rot(j) = .False. + exit + endif + enddo + enddo + icount = n_orb_swap + n_orb_swap = 0 + do i = 1, icount + if(good_orb_rot(i))then + n_orb_swap += 1 + index_orb_swap(n_orb_swap) = index_orb_swap_tmp(i) + orb_swap(1,n_orb_swap) = orb_swap_tmp(1,i) + orb_swap(2,n_orb_swap) = orb_swap_tmp(2,i) + endif + enddo + + if(n_orb_swap.gt.0)then + print*,'n_orb_swap = ',n_orb_swap + endif + do i = 1, n_orb_swap + print*,'imono = ',index_orb_swap(i) + print*,orb_swap(1,i),'-->',orb_swap(2,i) + enddo + END_PROVIDER + + BEGIN_PROVIDER [double precision, switch_mo_coef, (ao_num,mo_num)] + implicit none + integer :: i,j,iorb,jorb + switch_mo_coef = NatOrbsFCI + do i = 1, n_orb_swap + iorb = orb_swap(1,i) + jorb = orb_swap(2,i) + do j = 1, ao_num + switch_mo_coef(j,jorb) = NatOrbsFCI(j,iorb) + enddo + do j = 1, ao_num + switch_mo_coef(j,iorb) = NatOrbsFCI(j,jorb) + enddo + enddo + + END_PROVIDER diff --git a/src/casscf_cipsi/tot_en.irp.f b/src/casscf_cipsi/tot_en.irp.f new file mode 100644 index 00000000..1d70e087 --- /dev/null +++ b/src/casscf_cipsi/tot_en.irp.f @@ -0,0 +1,101 @@ + BEGIN_PROVIDER [real*8, etwo] +&BEGIN_PROVIDER [real*8, eone] +&BEGIN_PROVIDER [real*8, eone_bis] +&BEGIN_PROVIDER [real*8, etwo_bis] +&BEGIN_PROVIDER [real*8, etwo_ter] +&BEGIN_PROVIDER [real*8, ecore] +&BEGIN_PROVIDER [real*8, ecore_bis] + implicit none + integer :: t,u,v,x,i,ii,tt,uu,vv,xx,j,jj,t3,u3,v3,x3 + real*8 :: e_one_all,e_two_all + e_one_all=0.D0 + e_two_all=0.D0 + do i=1,n_core_inact_orb + ii=list_core_inact(i) + e_one_all+=2.D0*mo_one_e_integrals(ii,ii) + do j=1,n_core_inact_orb + jj=list_core_inact(j) + e_two_all+=2.D0*bielec_PQxx(ii,ii,j,j)-bielec_PQxx(ii,jj,j,i) + end do + do t=1,n_act_orb + tt=list_act(t) + t3=t+n_core_inact_orb + do u=1,n_act_orb + uu=list_act(u) + u3=u+n_core_inact_orb + e_two_all+=D0tu(t,u)*(2.D0*bielec_PQxx(tt,uu,i,i) & + -bielec_PQxx(tt,ii,i,u3)) + end do + end do + end do + do t=1,n_act_orb + tt=list_act(t) + do u=1,n_act_orb + uu=list_act(u) + e_one_all+=D0tu(t,u)*mo_one_e_integrals(tt,uu) + do v=1,n_act_orb + v3=v+n_core_inact_orb + do x=1,n_act_orb + x3=x+n_core_inact_orb + e_two_all +=P0tuvx(t,u,v,x)*bielec_PQxx(tt,uu,v3,x3) + end do + end do + end do + end do + ecore =nuclear_repulsion + ecore_bis=nuclear_repulsion + do i=1,n_core_inact_orb + ii=list_core_inact(i) + ecore +=2.D0*mo_one_e_integrals(ii,ii) + ecore_bis+=2.D0*mo_one_e_integrals(ii,ii) + do j=1,n_core_inact_orb + jj=list_core_inact(j) + ecore +=2.D0*bielec_PQxx(ii,ii,j,j)-bielec_PQxx(ii,jj,j,i) + ecore_bis+=2.D0*bielec_PxxQ(ii,i,j,jj)-bielec_PxxQ(ii,j,j,ii) + end do + end do + eone =0.D0 + eone_bis=0.D0 + etwo =0.D0 + etwo_bis=0.D0 + etwo_ter=0.D0 + do t=1,n_act_orb + tt=list_act(t) + t3=t+n_core_inact_orb + do u=1,n_act_orb + uu=list_act(u) + u3=u+n_core_inact_orb + eone +=D0tu(t,u)*mo_one_e_integrals(tt,uu) + eone_bis+=D0tu(t,u)*mo_one_e_integrals(tt,uu) + do i=1,n_core_inact_orb + ii=list_core_inact(i) + eone +=D0tu(t,u)*(2.D0*bielec_PQxx(tt,uu,i,i) & + -bielec_PQxx(tt,ii,i,u3)) + eone_bis+=D0tu(t,u)*(2.D0*bielec_PxxQ(tt,u3,i,ii) & + -bielec_PxxQ(tt,i,i,uu)) + end do + do v=1,n_act_orb + vv=list_act(v) + v3=v+n_core_inact_orb + do x=1,n_act_orb + xx=list_act(x) + x3=x+n_core_inact_orb + real*8 :: h1,h2,h3 + h1=bielec_PQxx(tt,uu,v3,x3) + h2=bielec_PxxQ(tt,u3,v3,xx) + h3=bielecCI(t,u,v,xx) + etwo +=P0tuvx(t,u,v,x)*h1 + etwo_bis+=P0tuvx(t,u,v,x)*h2 + etwo_ter+=P0tuvx(t,u,v,x)*h3 + if ((h1.ne.h2).or.(h1.ne.h3)) then + write(6,9901) t,u,v,x,h1,h2,h3 + 9901 format('aie: ',4I4,3E20.12) + end if + end do + end do + end do + end do + +END_PROVIDER + + diff --git a/src/casscf_tc_bi/det_manip.irp.f b/src/casscf_tc_bi/det_manip.irp.f new file mode 100644 index 00000000..d8c309a4 --- /dev/null +++ b/src/casscf_tc_bi/det_manip.irp.f @@ -0,0 +1,125 @@ +use bitmasks + +subroutine do_signed_mono_excitation(key1,key2,nu,ihole,ipart, & + ispin,phase,ierr) + BEGIN_DOC + ! we create the mono-excitation, and determine, if possible, + ! the phase and the number in the list of determinants + END_DOC + implicit none + integer(bit_kind) :: key1(N_int,2),key2(N_int,2) + integer(bit_kind), allocatable :: keytmp(:,:) + integer :: exc(0:2,2,2),ihole,ipart,ierr,nu,ispin + real*8 :: phase + logical :: found + allocate(keytmp(N_int,2)) + + nu=-1 + phase=1.D0 + ierr=0 + call det_copy(key1,key2,N_int) + ! write(6,*) ' key2 before excitation ',ihole,' -> ',ipart,' spin = ',ispin + ! call print_det(key2,N_int) + call do_single_excitation(key2,ihole,ipart,ispin,ierr) + ! write(6,*) ' key2 after ',ihole,' -> ',ipart,' spin = ',ispin + ! call print_det(key2,N_int) + ! write(6,*) ' excitation ',ihole,' -> ',ipart,' gives ierr = ',ierr + if (ierr.eq.1) then + ! excitation is possible + ! get the phase + call get_single_excitation(key1,key2,exc,phase,N_int) + ! get the number in the list + found=.false. + nu=0 + + !TODO BOTTLENECK + do while (.not.found) + nu+=1 + if (nu.gt.N_det) then + ! the determinant is possible, but not in the list + found=.true. + nu=-1 + else + call det_extract(keytmp,nu,N_int) + integer :: i,ii + found=.true. + do ii=1,2 + do i=1,N_int + if (keytmp(i,ii).ne.key2(i,ii)) then + found=.false. + end if + end do + end do + end if + end do + end if + ! + ! we found the new string, the phase, and possibly the number in the list + ! +end subroutine do_signed_mono_excitation + +subroutine det_extract(key,nu,Nint) + BEGIN_DOC + ! extract a determinant from the list of determinants + END_DOC + implicit none + integer :: ispin,i,nu,Nint + integer(bit_kind) :: key(Nint,2) + do ispin=1,2 + do i=1,Nint + key(i,ispin)=psi_det(i,ispin,nu) + end do + end do +end subroutine det_extract + +subroutine det_copy(key1,key2,Nint) + use bitmasks ! you need to include the bitmasks_module.f90 features + BEGIN_DOC + ! copy a determinant from key1 to key2 + END_DOC + implicit none + integer :: ispin,i,Nint + integer(bit_kind) :: key1(Nint,2),key2(Nint,2) + do ispin=1,2 + do i=1,Nint + key2(i,ispin)=key1(i,ispin) + end do + end do +end subroutine det_copy + +subroutine do_spinfree_mono_excitation(key_in,key_out1,key_out2 & + ,nu1,nu2,ihole,ipart,phase1,phase2,ierr,jerr) + BEGIN_DOC + ! we create the spin-free mono-excitation E_pq=(a^+_p a_q + a^+_P a_Q) + ! we may create two determinants as result + ! + END_DOC + implicit none + integer(bit_kind) :: key_in(N_int,2),key_out1(N_int,2) + integer(bit_kind) :: key_out2(N_int,2) + integer :: ihole,ipart,ierr,jerr,nu1,nu2 + integer :: ispin + real*8 :: phase1,phase2 + + ! write(6,*) ' applying E_',ipart,ihole,' on determinant ' + ! call print_det(key_in,N_int) + + ! spin alpha + ispin=1 + call do_signed_mono_excitation(key_in,key_out1,nu1,ihole & + ,ipart,ispin,phase1,ierr) + ! if (ierr.eq.1) then + ! write(6,*) ' 1 result is ',nu1,phase1 + ! call print_det(key_out1,N_int) + ! end if + ! spin beta + ispin=2 + call do_signed_mono_excitation(key_in,key_out2,nu2,ihole & + ,ipart,ispin,phase2,jerr) + ! if (jerr.eq.1) then + ! write(6,*) ' 2 result is ',nu2,phase2 + ! call print_det(key_out2,N_int) + ! end if + +end subroutine do_spinfree_mono_excitation + diff --git a/src/casscf_tc_bi/grad_dm.irp.f b/src/casscf_tc_bi/grad_dm.irp.f new file mode 100644 index 00000000..047b5718 --- /dev/null +++ b/src/casscf_tc_bi/grad_dm.irp.f @@ -0,0 +1,263 @@ + BEGIN_PROVIDER [real*8, gradvec_tc_r, (0:3,nMonoEx)] +&BEGIN_PROVIDER [real*8, gradvec_tc_l, (0:3,nMonoEx)] + BEGIN_DOC +! gradvec_tc_r(0:3,i) = +! +! gradvec_tc_l(0:3,i) = +! +! where the indices "i" corresponds to E_q^p(i) +! +! i = mat_idx_c_a(q,p) +! +! and gradvec_tc_r/l(0) = full matrix element +! +! gradvec_tc_r/l(1) = one-body part + +! gradvec_tc_r/l(2) = two-body part + +! gradvec_tc_r/l(3) = three-body part + END_DOC + implicit none + integer :: ii,tt,aa,indx + integer :: i,t,a,fff + double precision :: res_l(0:3), res_r(0:3) + gradvec_tc_l = 0.d0 + gradvec_tc_r = 0.d0 + ! computing the core/inactive --> virtual orbitals gradients + do i=1,n_core_inact_orb + ii=list_core_inact(i) + do t=1,n_act_orb + tt=list_act(t) + indx = mat_idx_c_a(i,t) + call gradvec_tc_it(ii,tt,res_l,res_r) + do fff = 0,3 + gradvec_tc_l(fff,indx)=res_l(fff) + gradvec_tc_r(fff,indx)=res_r(fff) + enddo + end do + end do + + do i=1,n_core_inact_orb + ii=list_core_inact(i) + do a=1,n_virt_orb + indx = mat_idx_c_v(i,a) + aa=list_virt(a) + call gradvec_tc_ia(ii,aa,res_l,res_r) + do fff = 0,3 + gradvec_tc_l(fff,indx)=res_l(fff) + gradvec_tc_r(fff,indx)=res_r(fff) + enddo + end do + end do + +! print*,'DM grad' + do t=1,n_act_orb + tt=list_act(t) + do a=1,n_virt_orb + aa=list_virt(a) + indx = mat_idx_a_v(t,a) +! print*,indx,t,a + call gradvec_tc_ta(tt,aa,res_l, res_r) + do fff = 0,3 + gradvec_tc_l(fff,indx)=res_l(fff) + gradvec_tc_r(fff,indx)=res_r(fff) + enddo + end do + end do +END_PROVIDER + +subroutine gradvec_tc_ia(i,a,res_l, res_r) + implicit none + BEGIN_DOC +! doubly occupied --> virtual TC gradient +! +! Corresponds to res_r = , +! +! res_l = + END_DOC + integer, intent(in) :: i,a + double precision, intent(out) :: res_l(0:3), res_r(0:3) + res_l = 0.d0 + res_r = 0.d0 + res_l(1) = -2 * mo_bi_ortho_tc_one_e(a,i) + res_r(1) = -2 * mo_bi_ortho_tc_one_e(i,a) + integer :: j,t,r,jj,tt,rr + do jj = 1, n_core_inact_orb + j = list_core_inact(jj) + res_r(2) += -2.d0 * ( 2.d0 * mo_bi_ortho_tc_two_e(j,i,j,a) - mo_bi_ortho_tc_two_e(i,j,j,a)) + res_l(2) -= -2.d0 * ( 2.d0 * mo_bi_ortho_tc_two_e(j,a,j,i) - mo_bi_ortho_tc_two_e(j,a,i,j)) + enddo + do tt = 1, n_act_orb + t = list_act(tt) + do rr = 1, n_act_orb + r = list_act(rr) + res_r(2) += -0.5d0 * ( & + tc_transition_matrix_mo(r,t,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(r,i,t,a) - mo_bi_ortho_tc_two_e(i,r,t,a)) & + +tc_transition_matrix_mo(t,r,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(t,i,r,a) - mo_bi_ortho_tc_two_e(i,t,r,a)) & + ) + res_l(2) -= -0.5d0 * ( & + tc_transition_matrix_mo(t,r,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(t,a,r,i) - mo_bi_ortho_tc_two_e(t,a,i,r)) & + +tc_transition_matrix_mo(r,t,1,1) *(2.d0 * mo_bi_ortho_tc_two_e(r,a,t,i) - mo_bi_ortho_tc_two_e(r,a,i,t)) & + ) + enddo + enddo + res_r(0) = res_r(1) + res_r(2) + res_r(3) + res_l(0) = res_l(1) + res_l(2) + res_l(3) +end + +subroutine gradvec_tc_it(i,t,res_l, res_r) + implicit none + BEGIN_DOC +! doubly occupied --> active TC gradient +! +! Corresponds to res_r = +! +! res_l = + END_DOC + integer, intent(in) :: i,t + double precision, intent(out) :: res_l(0:3),res_r(0:3) + integer :: rr,r,j,jj,u,uu,v,vv + res_r = 0.d0 + res_l = 0.d0 + res_r(1) += -2.d0 * mo_bi_ortho_tc_one_e(i,t) + res_l(1) -= -2.D0 * mo_bi_ortho_tc_one_e(t,i) + + do rr = 1, n_act_orb + r = list_act(rr) + res_r(1) += mo_bi_ortho_tc_one_e(i,r) * tc_transition_matrix_mo(t,r,1,1) + res_l(1) -= mo_bi_ortho_tc_one_e(r,i) * tc_transition_matrix_mo(r,t,1,1) + enddo + + do jj = 1, n_core_inact_orb + j = list_core_inact(jj) + res_r(2) += -2.d0 * (2d0 * mo_bi_ortho_tc_two_e(i,j,t,j) - mo_bi_ortho_tc_two_e(j,i,t,j)) + res_l(2) -= -2.d0 * (2d0 * mo_bi_ortho_tc_two_e(t,j,i,j) - mo_bi_ortho_tc_two_e(t,j,j,i)) + do rr = 1, n_act_orb + r = list_act(rr) + res_r(2) += tc_transition_matrix_mo(t,r,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(i,j,r,j) - mo_bi_ortho_tc_two_e(i,j,j,r)) + res_l(2) -= tc_transition_matrix_mo(r,t,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(r,j,i,j) - mo_bi_ortho_tc_two_e(j,r,j,i)) + enddo + enddo + do rr = 1, n_act_orb + r = list_act(rr) + do uu = 1, n_act_orb + u = list_act(uu) + res_r(2) += -0.5d0 * ( & + tc_transition_matrix_mo(u,r,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(u,i,r,t) - mo_bi_ortho_tc_two_e(u,i,t,r)) & + + tc_transition_matrix_mo(r,u,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(i,r,t,u) - mo_bi_ortho_tc_two_e(i,r,u,t)) & + ) + res_l(2) -= -0.5d0 * ( & + tc_transition_matrix_mo(r,u,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(r,t,u,i) - mo_bi_ortho_tc_two_e(t,r,u,i)) & + + tc_transition_matrix_mo(u,r,1,1) * (2.d0 * mo_bi_ortho_tc_two_e(t,u,i,r) - mo_bi_ortho_tc_two_e(u,t,i,r)) & + ) + do vv = 1, n_act_orb + v = list_act(vv) + res_r(2) += 0.5d0 * ( & + mo_bi_ortho_tc_two_e(i,r,v,u) * tc_two_rdm(t,r,v,u) + mo_bi_ortho_tc_two_e(r,i,v,u) * tc_two_rdm(r,t,v,u) ) + res_l(2) -= 0.5d0 * ( & + mo_bi_ortho_tc_two_e(v,u,i,r) * tc_two_rdm(v,u,t,r) + mo_bi_ortho_tc_two_e(v,u,r,i) * tc_two_rdm(v,u,r,t) ) + enddo + enddo + enddo + res_r(0) = res_r(1) + res_r(2) + res_r(3) + res_l(0) = res_l(1) + res_l(2) + res_l(3) +end + +subroutine gradvec_tc_ta(t,a,res_l, res_r) + implicit none + BEGIN_DOC +! active --> virtual TC gradient +! +! Corresponds to res_r = +! +! res_l = + END_DOC + integer, intent(in) :: t,a + double precision, intent(out) :: res_l(0:3),res_r(0:3) + integer :: rr,r,j,jj,u,uu,v,vv + double precision :: res_r_inact_test, res_r_act_test + double precision :: res_l_inact_test, res_l_act_test + res_r = 0.d0 + res_l = 0.d0 + do rr = 1, n_act_orb + r = list_act(rr) + res_l(1) += mo_bi_ortho_tc_one_e(a,r) * tc_transition_matrix_mo(t,r,1,1) + res_r(1) -= mo_bi_ortho_tc_one_e(r,a) * tc_transition_matrix_mo(r,t,1,1) + enddo + + res_r_inact_test = 0.d0 + res_l_inact_test = 0.d0 + do jj = 1, n_core_inact_orb + j = list_core_inact(jj) + do rr = 1, n_act_orb + r = list_act(rr) + res_r_inact_test += -tc_transition_matrix_mo(r,t,1,1) * & + (2.d0 * mo_bi_ortho_tc_two_e(r,j,a,j) - mo_bi_ortho_tc_two_e(r,j,j,a)) + res_l_inact_test -= -tc_transition_matrix_mo(t,r,1,1) * & + (2.d0 * mo_bi_ortho_tc_two_e(a,j,r,j) - mo_bi_ortho_tc_two_e(j,a,r,j)) + enddo + enddo + res_r_act_test = 0.d0 + res_l_act_test = 0.d0 + do rr = 1, n_act_orb + r = list_act(rr) + do vv = 1, n_act_orb + v = list_act(vv) + do uu = 1, n_act_orb + u = list_act(uu) + res_r_act_test += - (mo_bi_ortho_tc_two_e(v,r,u,a) * tc_two_rdm(r,v,t,u) & + +mo_bi_ortho_tc_two_e(v,r,a,u) * tc_two_rdm(r,v,u,t)) + res_l_act_test -= - (mo_bi_ortho_tc_two_e(u,a,v,r) * tc_two_rdm(t,u,r,v) & + +mo_bi_ortho_tc_two_e(a,u,v,r) * tc_two_rdm(u,t,r,v)) + enddo + enddo + enddo + res_r_act_test *= 0.5d0 + res_l_act_test *= 0.5d0 + res_r(2) = res_r_inact_test + res_r_act_test + res_l(2) = res_l_inact_test + res_l_act_test + + integer :: m,x,y + double precision :: res_r_inact, res_r_act + if(.False.)then + ! test quantities + res_r_inact = 0.d0 + res_r_act = 0.d0 + do m = 1, mo_num + do x = 1, mo_num + do jj = 1, n_core_inact_orb + j = list_core_inact(jj) + res_r_inact += 0.5d0 * mo_bi_ortho_tc_two_e(t,j,m,x) * tc_two_rdm(a,j,m,x) & + -0.5d0 * mo_bi_ortho_tc_two_e(m,j,a,x) * tc_two_rdm(m,j,t,x) & + +0.5d0 * mo_bi_ortho_tc_two_e(j,t,m,x) * tc_two_rdm(j,a,m,x) & + -0.5d0 * mo_bi_ortho_tc_two_e(x,j,m,a) * tc_two_rdm(x,j,m,t) + enddo + do rr = 1, n_act_orb + r = list_act(rr) + res_r_act += 0.5d0 * mo_bi_ortho_tc_two_e(t,r,m,x) * tc_two_rdm(a,r,m,x) & + -0.5d0 * mo_bi_ortho_tc_two_e(m,r,a,x) * tc_two_rdm(m,r,t,x) & + +0.5d0 * mo_bi_ortho_tc_two_e(r,t,m,x) * tc_two_rdm(r,a,m,x) & + -0.5d0 * mo_bi_ortho_tc_two_e(x,r,m,a) * tc_two_rdm(x,r,m,t) + enddo + enddo + enddo + if(dabs(res_r_inact).gt.1.d-12)then + if(dabs(res_r_inact_test - res_r_inact).gt.1.d-10)then + print*,'inact' + print*,'t,a',t,a + print*,res_r_inact_test , res_r_inact, dabs(res_r_inact_test - res_r_inact) + endif + endif + if(dabs(res_r_act).gt.1.d-12)then + if(dabs(res_r_act_test - res_r_act).gt.1.d-10)then + print*,'act' + print*,'t,a',t,a + print*,res_r_act_test , res_r_act, dabs(res_r_act_test - res_r_act) + endif + endif + endif + + res_r(0) = res_r(1) + res_r(2) + res_r(3) + res_l(0) = res_l(1) + res_l(2) + res_l(3) + +end diff --git a/src/casscf_tc_bi/grad_old.irp.f b/src/casscf_tc_bi/grad_old.irp.f new file mode 100644 index 00000000..3f0ffb5e --- /dev/null +++ b/src/casscf_tc_bi/grad_old.irp.f @@ -0,0 +1,134 @@ + + BEGIN_PROVIDER [real*8, gradvec_detail_right_old, (0:3,nMonoEx)] +&BEGIN_PROVIDER [real*8, gradvec_detail_left_old, (0:3,nMonoEx)] + BEGIN_DOC + ! calculate the orbital gradient by hand, i.e. for + ! each determinant I we determine the string E_pq |I> (alpha and beta + ! separately) and generate + ! sum_I c_I is then the pq component of the orbital + ! gradient + ! E_pq = a^+_pa_q + a^+_Pa_Q + END_DOC + implicit none + integer :: ii,tt,aa,indx,ihole,ipart,istate,ll + real*8 :: res_l(0:3), res_r(0:3) + + do ii = 1, n_core_inact_orb + ihole = list_core_inact(ii) + do aa = 1, n_virt_orb + ipart = list_virt(aa) + indx = mat_idx_c_v(ii,aa) + call calc_grad_elem_h_tc(ihole,ipart,res_l, res_r) + do ll = 0, 3 + gradvec_detail_left_old (ll,indx)=res_l(ll) + gradvec_detail_right_old(ll,indx)=res_r(ll) + enddo + enddo + enddo + + do ii = 1, n_core_inact_orb + ihole = list_core_inact(ii) + do tt = 1, n_act_orb + ipart = list_act(tt) + indx = mat_idx_c_a(ii,tt) + call calc_grad_elem_h_tc(ihole,ipart,res_l, res_r) + do ll = 0, 3 + gradvec_detail_left_old (ll,indx)=res_l(ll) + gradvec_detail_right_old(ll,indx)=res_r(ll) + enddo + enddo + enddo + +! print*,'old grad' + do tt = 1, n_act_orb + ihole = list_act(tt) + do aa = 1, n_virt_orb + ipart = list_virt(aa) + indx = mat_idx_a_v(tt,aa) +! print*,indx,tt,aa + call calc_grad_elem_h_tc(ihole,ipart,res_l, res_r) + do ll = 0, 3 + gradvec_detail_left_old (ll,indx)=res_l(ll) + gradvec_detail_right_old(ll,indx)=res_r(ll) + enddo + enddo + enddo + + real*8 :: norm_grad_left, norm_grad_right + norm_grad_left=0.d0 + norm_grad_right=0.d0 + do indx=1,nMonoEx + norm_grad_left+=gradvec_detail_left_old(0,indx)*gradvec_detail_left_old(0,indx) + norm_grad_right+=gradvec_detail_right_old(0,indx)*gradvec_detail_right_old(0,indx) + end do + norm_grad_left=sqrt(norm_grad_left) + norm_grad_right=sqrt(norm_grad_right) +! if (bavard) then + write(6,*) + write(6,*) ' Norm of the LEFT orbital gradient (via <0|EH|0>) : ', norm_grad_left + write(6,*) ' Norm of the RIGHT orbital gradient (via <0|HE|0>) : ', norm_grad_right + write(6,*) +! endif + + +END_PROVIDER + +subroutine calc_grad_elem_h_tc(ihole,ipart,res_l, res_r) + BEGIN_DOC + ! Computes the gradient with respect to orbital rotation BRUT FORCE + ! + ! res_l = + ! + ! res_r = + ! + ! q=hole, p=particle. NOTE that on res_l it is E_qp and on res_r it is E_pq + ! + ! res_l(0) = total matrix element, res_l(1) = one-electron part, + ! + ! res_l(2) = two-electron part, res_l(3) = three-electron part + ! + END_DOC + implicit none + integer, intent(in) :: ihole,ipart + double precision, intent(out) :: res_l(0:3), res_r(0:3) + integer :: mu,iii,ispin,ierr,nu,istate,ll + integer(bit_kind), allocatable :: det_mu(:,:),det_mu_ex(:,:) + real*8 :: chi_H_mu_ex_array(0:3,N_states),mu_ex_H_phi_array(0:3,N_states),phase + allocate(det_mu(N_int,2)) + allocate(det_mu_ex(N_int,2)) + + res_l=0.D0 + res_r=0.D0 + + do mu=1,n_det + ! get the string of the determinant |mu> + call det_extract(det_mu,mu,N_int) + do ispin=1,2 + ! do the monoexcitation on it: |det_mu_ex> = a^dagger_{p,ispin} a_{q,ispin} |mu> + call det_copy(det_mu,det_mu_ex,N_int) + call do_signed_mono_excitation(det_mu,det_mu_ex,nu & + ,ihole,ipart,ispin,phase,ierr) + ! |det_mu_ex> = a^dagger_{p,ispin} a_{q,ispin} |mu> + if (ierr.eq.1) then + call i_H_tc_psi_phi(det_mu_ex,psi_det,psi_l_coef_bi_ortho,psi_r_coef_bi_ortho,N_int & + ,N_det,psi_det_size,N_states,chi_H_mu_ex_array,mu_ex_H_phi_array) + ! chi_H_mu_ex_array = + ! mu_ex_H_phi_array = + do istate=1,N_states + do ll = 0,3 ! loop over the body components (1e,2e,3e) + !res_l = \sum_mu c_mu^l = + res_l(ll)+= mu_ex_H_phi_array(ll,istate)*psi_l_coef_bi_ortho(mu,istate)*phase + !res_r = \sum_mu c_mu^r = + res_r(ll)+= chi_H_mu_ex_array(ll,istate)*psi_r_coef_bi_ortho(mu,istate)*phase + enddo + end do + end if + end do + end do + + ! state-averaged gradient + res_l*=1.d0/dble(N_states) + res_r*=1.d0/dble(N_states) + +end + diff --git a/src/casscf_tc_bi/gradient.irp.f b/src/casscf_tc_bi/gradient.irp.f new file mode 100644 index 00000000..630bd891 --- /dev/null +++ b/src/casscf_tc_bi/gradient.irp.f @@ -0,0 +1,94 @@ +use bitmasks + +BEGIN_PROVIDER [ integer, nMonoEx ] + BEGIN_DOC + ! Number of single excitations + END_DOC + implicit none + nMonoEx=n_core_inact_orb*n_act_orb+n_core_inact_orb*n_virt_orb+n_act_orb*n_virt_orb +END_PROVIDER + + BEGIN_PROVIDER [integer, n_c_a_prov] +&BEGIN_PROVIDER [integer, n_c_v_prov] +&BEGIN_PROVIDER [integer, n_a_v_prov] + implicit none + n_c_a_prov = n_core_inact_orb * n_act_orb + n_c_v_prov = n_core_inact_orb * n_virt_orb + n_a_v_prov = n_act_orb * n_virt_orb + END_PROVIDER + + BEGIN_PROVIDER [integer, excit, (2,nMonoEx)] +&BEGIN_PROVIDER [character*3, excit_class, (nMonoEx)] +&BEGIN_PROVIDER [integer, list_idx_c_a, (3,n_c_a_prov) ] +&BEGIN_PROVIDER [integer, list_idx_c_v, (3,n_c_v_prov) ] +&BEGIN_PROVIDER [integer, list_idx_a_v, (3,n_a_v_prov) ] +&BEGIN_PROVIDER [integer, mat_idx_c_a, (n_core_inact_orb,n_act_orb) +&BEGIN_PROVIDER [integer, mat_idx_c_v, (n_core_inact_orb,n_virt_orb) +&BEGIN_PROVIDER [integer, mat_idx_a_v, (n_act_orb,n_virt_orb) + BEGIN_DOC + ! a list of the orbitals involved in the excitation + END_DOC + + implicit none + integer :: i,t,a,ii,tt,aa,indx,indx_tmp + indx=0 + indx_tmp = 0 + do ii=1,n_core_inact_orb + i=list_core_inact(ii) + do tt=1,n_act_orb + t=list_act(tt) + indx+=1 + excit(1,indx)=i + excit(2,indx)=t + excit_class(indx)='c-a' + indx_tmp += 1 + list_idx_c_a(1,indx_tmp) = indx + list_idx_c_a(2,indx_tmp) = ii + list_idx_c_a(3,indx_tmp) = tt + mat_idx_c_a(ii,tt) = indx + end do + end do + + indx_tmp = 0 + do ii=1,n_core_inact_orb + i=list_core_inact(ii) + do aa=1,n_virt_orb + a=list_virt(aa) + indx+=1 + excit(1,indx)=i + excit(2,indx)=a + excit_class(indx)='c-v' + indx_tmp += 1 + list_idx_c_v(1,indx_tmp) = indx + list_idx_c_v(2,indx_tmp) = ii + list_idx_c_v(3,indx_tmp) = aa + mat_idx_c_v(ii,aa) = indx + end do + end do + + indx_tmp = 0 + do tt=1,n_act_orb + t=list_act(tt) + do aa=1,n_virt_orb + a=list_virt(aa) + indx+=1 + excit(1,indx)=t + excit(2,indx)=a + excit_class(indx)='a-v' + indx_tmp += 1 + list_idx_a_v(1,indx_tmp) = indx + list_idx_a_v(2,indx_tmp) = tt + list_idx_a_v(3,indx_tmp) = aa + mat_idx_a_v(tt,aa) = indx + end do + end do + +! if (bavard) then + write(6,*) ' Filled the table of the Monoexcitations ' + do indx=1,nMonoEx + write(6,*) ' ex ',indx,' : ',excit(1,indx),' -> ' & + ,excit(2,indx),' ',excit_class(indx) + end do +! end if + +END_PROVIDER diff --git a/src/casscf_tc_bi/test_tc_casscf.irp.f b/src/casscf_tc_bi/test_tc_casscf.irp.f new file mode 100644 index 00000000..baa50c0f --- /dev/null +++ b/src/casscf_tc_bi/test_tc_casscf.irp.f @@ -0,0 +1,252 @@ +program tc_bi_ortho + + BEGIN_DOC + ! + ! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together + ! with the energy. Saves the left-right wave functions at the end. + ! + END_DOC + + my_grid_becke = .True. + my_n_pt_r_grid = 30 + my_n_pt_a_grid = 50 + read_wf = .True. + touch read_wf + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + print*, ' nb of states = ', N_states + print*, ' nb of det = ', N_det + +! call routine_i_h_psi +! call routine_grad + call routine_grad_num_dm_one_body +end + +subroutine routine_i_h_psi + implicit none + integer :: i,j + double precision :: i_H_chi_array(0:3,N_states),i_H_phi_array(0:3,N_states) + double precision :: hmono, htwoe, hthree, htot + double precision :: accu_l_hmono, accu_l_htwoe, accu_l_hthree, accu_l_htot + double precision :: accu_r_hmono, accu_r_htwoe, accu_r_hthree, accu_r_htot + double precision :: test_l_hmono, test_l_htwoe, test_l_hthree, test_l_htot + double precision :: test_r_hmono, test_r_htwoe, test_r_hthree, test_r_htot + + test_l_hmono = 0.d0 + test_l_htwoe = 0.d0 + test_l_hthree= 0.d0 + test_l_htot = 0.d0 + test_r_hmono = 0.d0 + test_r_htwoe = 0.d0 + test_r_hthree= 0.d0 + test_r_htot = 0.d0 + + do i = 1, N_det + call i_H_tc_psi_phi(psi_det(1,1,i),psi_det,psi_l_coef_bi_ortho,psi_r_coef_bi_ortho,& + N_int,N_det,N_det,N_states,i_H_chi_array,i_H_phi_array) + accu_l_hmono = 0.d0 + accu_l_htwoe = 0.d0 + accu_l_hthree= 0.d0 + accu_l_htot = 0.d0 + accu_r_hmono = 0.d0 + accu_r_htwoe = 0.d0 + accu_r_hthree= 0.d0 + accu_r_htot = 0.d0 + do j = 1, N_det + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot) + accu_l_hmono += psi_l_coef_bi_ortho(j,1) * hmono + accu_l_htwoe += psi_l_coef_bi_ortho(j,1) * htwoe + accu_l_hthree += psi_l_coef_bi_ortho(j,1) * hthree + accu_l_htot += psi_l_coef_bi_ortho(j,1) * htot + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) + accu_r_hmono += psi_r_coef_bi_ortho(j,1) * hmono + accu_r_htwoe += psi_r_coef_bi_ortho(j,1) * htwoe + accu_r_hthree += psi_r_coef_bi_ortho(j,1) * hthree + accu_r_htot += psi_r_coef_bi_ortho(j,1) * htot + enddo + test_l_htot += dabs(i_H_chi_array(0,1)-accu_l_htot) + test_l_hmono += dabs(i_H_chi_array(1,1)-accu_l_hmono) + test_l_htwoe += dabs(i_H_chi_array(2,1)-accu_l_htwoe) + test_l_hthree += dabs(i_H_chi_array(3,1)-accu_l_hthree) + + test_r_htot += dabs(i_H_phi_array(0,1)-accu_r_htot) + test_r_hmono += dabs(i_H_phi_array(1,1)-accu_r_hmono) + test_r_htwoe += dabs(i_H_phi_array(2,1)-accu_r_htwoe) + test_r_hthree += dabs(i_H_phi_array(3,1)-accu_r_hthree) + + enddo + + test_l_htot *= 1.D0/dble(N_det) + test_l_hmono *= 1.D0/dble(N_det) + test_l_htwoe *= 1.D0/dble(N_det) + test_l_hthree *= 1.D0/dble(N_det) + + test_r_htot *= 1.D0/dble(N_det) + test_r_hmono *= 1.D0/dble(N_det) + test_r_htwoe *= 1.D0/dble(N_det) + test_r_hthree *= 1.D0/dble(N_det) + + print*,'**************************' + print*,'test_l_htot = ',test_l_htot + print*,'test_l_hmono = ',test_l_hmono + print*,'test_l_htwoe = ',test_l_htwoe + print*,'test_l_hthree = ',test_l_hthree + print*,'**************************' + print*,'test_r_htot = ',test_r_htot + print*,'test_r_hmono = ',test_r_hmono + print*,'test_r_htwoe = ',test_r_htwoe + print*,'test_r_hthree = ',test_r_hthree + +end + +subroutine routine_grad_num + implicit none + integer :: indx,ihole,ipart + integer :: p,q + double precision :: accu_l, accu_r + double precision :: contrib_l, contrib_r + + accu_l = 0.d0 + accu_r = 0.d0 + do indx=1,nMonoEx + q = excit(1,indx) + p = excit(2,indx) + contrib_l = dabs(dabs(gradvec_detail_left_old(0,indx)) - 2.D0 * dabs( Fock_matrix_tc_mo_tot(q,p))) + contrib_r = dabs(dabs(gradvec_detail_right_old(0,indx)) -2.D0 * dabs( Fock_matrix_tc_mo_tot(p,q))) + if(contrib_l.gt.1.d-10.or.contrib_r.gt.1.d-10)then + print*,indx,q,p + print*,gradvec_detail_left_old(0,indx),gradvec_detail_right_old(0,indx) + print*,2.D0* Fock_matrix_tc_mo_tot(q,p), 2.d0* Fock_matrix_tc_mo_tot(p,q) + endif + accu_l += contrib_l + accu_r += contrib_r + enddo + print*,'accu_l,accu_r' + print*,accu_l,accu_r + +! do i = 1, nMonoEx +! print*,i,gradvec_old(i) +! enddo + +end + +subroutine routine_grad_num_dm_one_body + implicit none + integer :: indx,ii,i,a,aa,tt,t,ibody + double precision :: accu_l, accu_r,ref_r, new_r, ref_l, new_l + double precision :: contrib_l, contrib_r + double precision :: res_l(0:3),res_r(0:3) + + ibody = 2 ! check only the two-body term + provide gradvec_detail_left_old gradvec_tc_l + if(.True.)then + print*,'**************************' + print*,'**************************' + print*,'testing inactive-->virtual' + accu_l = 0.d0 + accu_r = 0.d0 + do ii = 1, n_core_inact_orb + do aa = 1, n_virt_orb + indx = mat_idx_c_v(ii,aa) + ref_l = gradvec_detail_left_old(ibody,indx) + new_l = gradvec_tc_l(ibody,indx) + contrib_l = dabs(dabs(ref_l) - dabs(new_l)) + ref_r = gradvec_detail_right_old(ibody,indx) + new_r = gradvec_tc_r(ibody,indx) + contrib_r = dabs(dabs(ref_r) - dabs(new_r)) + i = list_core_inact(ii) + a = list_virt(aa) +! if(i==1.and.a==9)then +! print*,i,a,ref_r, new_r +! stop +! endif + if(contrib_l.gt.1.d-10.or.contrib_r.gt.1.d-10)then + print*,'---------' + print*,'warning !' + print*,indx,i,a,ii,aa + print*,ref_l, new_l, contrib_l + print*,ref_r, new_r, contrib_r + print*,gradvec_detail_left_old(0,indx),gradvec_tc_l(0,indx) + print*,gradvec_detail_right_old(0,indx),gradvec_tc_r(0,indx) + print*,'---------' + endif + accu_l += contrib_l + accu_r += contrib_r + enddo + enddo + print*,'accu_l,accu_r' + print*,accu_l,accu_r + print*,'**************************' + print*,'**************************' + endif + + ibody = 2 ! check only the two-body term + if(.True.)then + print*,'**************************' + print*,'**************************' + print*,'testing inactive-->active' + accu_l = 0.d0 + accu_r = 0.d0 + do ii = 1, n_core_inact_orb + do tt = 1, n_act_orb + indx = mat_idx_c_a(ii,tt) + ref_l = gradvec_detail_left_old(ibody,indx) + new_l = gradvec_tc_l(ibody,indx) + contrib_l = dabs(dabs(ref_l) - dabs(new_l)) + ref_r = gradvec_detail_right_old(ibody,indx) + new_r = gradvec_tc_r(ibody,indx) + contrib_r = dabs(dabs(ref_r) - dabs(new_r)) + if(contrib_l.gt.1.d-10.or.contrib_r.gt.1.d-10)then + print*,'---------' + print*,'warning !' + i = list_core_inact(ii) + t = list_act(tt) + print*,indx,i,t + print*,ref_l, new_l, contrib_l + print*,ref_r, new_r, contrib_r + print*,'---------' + endif + accu_l += contrib_l + accu_r += contrib_r + enddo + enddo + print*,'accu_l,accu_r' + print*,accu_l,accu_r + endif + + if(.True.)then + print*,'**************************' + print*,'**************************' + print*,'testing active-->virtual ' + accu_l = 0.d0 + accu_r = 0.d0 + do tt = 1, n_act_orb + do aa = 1, n_virt_orb + indx = mat_idx_a_v(tt,aa) + ref_l = gradvec_detail_left_old(ibody,indx) + new_l = gradvec_tc_l(ibody,indx) + contrib_l = dabs(dabs(ref_l) - dabs(new_l)) + ref_r = gradvec_detail_right_old(ibody,indx) + new_r = gradvec_tc_r(ibody,indx) + contrib_r = dabs(dabs(ref_r) - dabs(new_r)) + if(contrib_l.gt.1.d-10.or.contrib_r.gt.1.d-10)then + print*,'---------' + print*,'warning !' + a = list_virt(aa) + t = list_act(tt) + print*,indx,t,a + print*,ref_l, new_l, contrib_l + print*,ref_r, new_r, contrib_r +! print*,gradvec_detail_right_old(0,indx),gradvec_tc_r(0,indx) + print*,'---------' + endif + accu_l += contrib_l + accu_r += contrib_r + enddo + enddo + print*,'accu_l,accu_r' + print*,accu_l,accu_r + endif + + +end diff --git a/src/ccsd/ccsd_space_orb_sub.irp.f b/src/ccsd/ccsd_space_orb_sub.irp.f index 40c57188..b48ca7da 100644 --- a/src/ccsd/ccsd_space_orb_sub.irp.f +++ b/src/ccsd/ccsd_space_orb_sub.irp.f @@ -9,7 +9,7 @@ subroutine run_ccsd_space_orb double precision :: uncorr_energy,energy, max_elem, max_r, max_r1, max_r2,ta,tb logical :: not_converged - double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:) + double precision, allocatable :: t2(:,:,:,:), r2(:,:,:,:), tau(:,:,:,:), tau_x(:,:,:,:) double precision, allocatable :: t1(:,:), r1(:,:) double precision, allocatable :: H_oo(:,:), H_vv(:,:), H_vo(:,:) @@ -18,7 +18,12 @@ subroutine run_ccsd_space_orb integer(bit_kind) :: det(N_int,2) integer :: nO, nV, nOa, nVa -! PROVIDE mo_two_e_integrals_in_map + if (do_ao_cholesky) then + PROVIDE cholesky_mo_transp + FREE cholesky_ao + else + PROVIDE mo_two_e_integrals_in_map + endif det = psi_det(:,:,cc_ref) print*,'Reference determinant:' @@ -46,13 +51,39 @@ subroutine run_ccsd_space_orb allocate(t2(nO,nO,nV,nV), r2(nO,nO,nV,nV)) allocate(tau(nO,nO,nV,nV)) + allocate(tau_x(nO,nO,nV,nV)) allocate(t1(nO,nV), r1(nO,nV)) allocate(H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO)) if (cc_update_method == 'diis') then - allocate(all_err(nO*nV+nO*nO*nV*nV,cc_diis_depth), all_t(nO*nV+nO*nO*nV*nV,cc_diis_depth)) - all_err = 0d0 - all_t = 0d0 + double precision :: rss, diis_mem, extra_mem + double precision, external :: memory_of_double + diis_mem = 2.d0*memory_of_double(nO*nV)*(1.d0+nO*nV) + call resident_memory(rss) + do while (cc_diis_depth > 1) + if (rss + diis_mem * cc_diis_depth > qp_max_mem) then + cc_diis_depth = cc_diis_depth - 1 + else + exit + endif + end do + if (cc_diis_depth <= 1) then + print *, 'Not enough memory for DIIS' + stop -1 + endif + print *, 'DIIS size ', cc_diis_depth + + allocate(all_err(nO*nV+nO*nO*nV*(nV*1_8),cc_diis_depth), all_t(nO*nV+nO*nO*nV*(nV*1_8),cc_diis_depth)) + !$OMP PARALLEL PRIVATE(i,j) DEFAULT(SHARED) + do j=1,cc_diis_depth + !$OMP DO + do i=1, size(all_err,1) + all_err(i,j) = 0d0 + all_t(i,j) = 0d0 + enddo + !$OMP END DO NOWAIT + enddo + !$OMP END PARALLEL endif if (elec_alpha_num /= elec_beta_num) then @@ -67,10 +98,11 @@ subroutine run_ccsd_space_orb call guess_t1(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_f_ov,t1) call guess_t2(nO,nV,cc_space_f_o,cc_space_f_v,cc_space_v_oovv,t2) call update_tau_space(nO,nV,t1,t2,tau) + call update_tau_x_space(nO,nV,tau,tau_x) !print*,'hf_energy', hf_energy call det_energy(det,uncorr_energy) print*,'Det energy', uncorr_energy - call ccsd_energy_space(nO,nV,tau,t1,energy) + call ccsd_energy_space_x(nO,nV,tau_x,t1,energy) print*,'Guess energy', uncorr_energy+energy, energy nb_iter = 0 @@ -85,13 +117,23 @@ subroutine run_ccsd_space_orb do while (not_converged) - call compute_H_oo(nO,nV,t1,t2,tau,H_oo) - call compute_H_vv(nO,nV,t1,t2,tau,H_vv) - call compute_H_vo(nO,nV,t1,t2,H_vo) - ! Residue - call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) - call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + if (do_ao_cholesky) then +! if (.False.) then + call compute_H_oo_chol(nO,nV,tau_x,H_oo) + call compute_H_vv_chol(nO,nV,tau_x,H_vv) + call compute_H_vo_chol(nO,nV,t1,H_vo) + + call compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + call compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + else + call compute_H_oo(nO,nV,t1,t2,tau,H_oo) + call compute_H_vv(nO,nV,t1,t2,tau,H_vv) + call compute_H_vo(nO,nV,t1,t2,H_vo) + + call compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + call compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + endif max_r = max(max_r1,max_r2) ! Update @@ -109,10 +151,11 @@ subroutine run_ccsd_space_orb endif call update_tau_space(nO,nV,t1,t2,tau) + call update_tau_x_space(nO,nV,tau,tau_x) ! Energy - call ccsd_energy_space(nO,nV,tau,t1,energy) - write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,1pE10.2,A3,1pE10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' + call ccsd_energy_space_x(nO,nV,tau_x,t1,energy) + write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' nb_iter = nb_iter + 1 if (max_r < cc_thresh_conv .or. nb_iter > cc_max_iter) then @@ -132,7 +175,7 @@ subroutine run_ccsd_space_orb print*,'' write(*,'(A15,F18.12,A3)') ' E(CCSD) = ', uncorr_energy+energy, ' Ha' write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha' - write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r + write(*,'(A15,ES10.2,A3)')' Conv = ', max_r print*,'' if (write_amplitudes) then @@ -239,6 +282,51 @@ subroutine ccsd_energy_space(nO,nV,tau,t1,energy) end +subroutine ccsd_energy_space_x(nO,nV,tau_x,t1,energy) + + implicit none + + integer, intent(in) :: nO, nV + double precision, intent(in) :: tau_x(nO,nO,nV,nV) + double precision, intent(in) :: t1(nO,nV) + double precision, intent(out) :: energy + + ! internal + integer :: i,j,a,b + double precision :: e + + energy = 0d0 + !$omp parallel & + !$omp shared(nO,nV,energy,tau_x,t1,& + !$omp cc_space_f_vo,cc_space_v_oovv) & + !$omp private(i,j,a,b,e) & + !$omp default(none) + e = 0d0 + !$omp do + do a = 1, nV + do i = 1, nO + e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a) + enddo + enddo + !$omp end do nowait + !$omp do + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + e = e + tau_x(i,j,a,b) * cc_space_v_oovv(i,j,a,b) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp critical + energy = energy + e + !$omp end critical + !$omp end parallel + +end + ! Tau subroutine update_tau_space(nO,nV,t1,t2,tau) @@ -274,6 +362,39 @@ subroutine update_tau_space(nO,nV,t1,t2,tau) end +subroutine update_tau_x_space(nO,nV,tau,tau_x) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: tau(nO,nO,nV,nV) + + ! out + double precision, intent(out) :: tau_x(nO,nO,nV,nV) + + ! internal + integer :: i,j,a,b + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tau,tau_x) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + tau_x(i,j,a,b) = 2.d0*tau(i,j,a,b) - tau(i,j,b,a) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + ! R1 subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) @@ -449,25 +570,16 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) ! enddo ! enddo !enddo + + integer :: iblock, block_size, nVmax double precision, allocatable :: W_vvov(:,:,:,:), T_vvoo(:,:,:,:) - allocate(W_vvov(nV,nV,nO,nV), T_vvoo(nV,nV,nO,nO)) + block_size = 8 + allocate(W_vvov(nV,nV,nO,block_size), T_vvoo(nV,nV,nO,nO)) !$omp parallel & !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau) & !$omp private(b,beta,i,a) & !$omp default(none) - !$omp do - do beta = 1, nV - do i = 1, nO - do b = 1, nV - do a = 1, nV - W_vvov(a,b,i,beta) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) - enddo - enddo - enddo - enddo - !$omp end do nowait - !$omp do do u = 1, nO do i = 1, nO @@ -481,10 +593,30 @@ subroutine compute_r1_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) !$omp end do nowait !$omp end parallel - call dgemm('T','N',nO,nV,nO*nV*nV, & - 1d0, T_vvoo, size(T_vvoo,1) * size(T_vvoo,2) * size(T_vvoo,3), & - W_vvov, size(W_vvov,1) * size(W_vvov,2) * size(W_vvov,3), & - 1d0, r1 , size(r1,1)) + do iblock = 1, nV, block_size + nVmax = min(block_size,nV-iblock+1) + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_vvov,W_vvov,T_vvoo,tau,nVmax,iblock) & + !$omp private(b,i,a,beta) & + !$omp default(none) + !$omp do collapse(2) + do beta = iblock, iblock + nVmax - 1 + do i = 1, nO + do b = 1, nV + do a = 1, nV + W_vvov(a,b,i,beta-iblock+1) = 2d0 * cc_space_v_vvov(a,b,i,beta) - cc_space_v_vvov(b,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp end parallel + + call dgemm('T','N',nO,nVmax,nO*nV*nV, & + 1d0, T_vvoo, nV*nV*nO, & + W_vvov, nO*nV*nV, & + 1d0, r1(1,iblock), nO) + enddo deallocate(W_vvov,T_vvoo) @@ -839,6 +971,10 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) ! allocate(B1(nV,nV,nV,nV)) ! call compute_B1(nO,nV,t1,t2,B1) +! call dgemm('N','N',nO*nO,nV*nV,nV*nV, & +! 1d0, tau, size(tau,1) * size(tau,2), & +! B1 , size(B1_gam,1) * size(B1_gam,2), & +! 1d0, r2, size(r2,1) * size(r2,2)) allocate(B1_gam(nV,nV,nV)) do gam=1,nV call compute_B1_gam(nO,nV,t1,t2,B1_gam,gam) @@ -1323,7 +1459,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) !enddo !$omp parallel & - !$omp shared(nO,nV,K1,X_ovov,Z_ovov,t2) & + !$omp shared(nO,nV,K1,X_ovov,Y_ovov,t2) & !$omp private(u,v,gam,beta,i,a) & !$omp default(none) !$omp do @@ -1343,7 +1479,7 @@ subroutine compute_r2_space(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) do v = 1, nO do a = 1, nV do i = 1, nO - Z_ovov(i,a,v,beta) = t2(i,v,beta,a) + Y_ovov(i,a,v,beta) = t2(i,v,beta,a) enddo enddo enddo @@ -1547,21 +1683,29 @@ subroutine compute_B1_gam(nO,nV,t1,t2,B1,gam) ! enddo double precision, allocatable :: X_vvvo(:,:,:), Y_vvvv(:,:,:) + allocate(X_vvvo(nV,nV,nO), Y_vvvv(nV,nV,nV)) ! ! B1(a,b,beta,gam) = cc_space_v_vvvv(a,b,beta,gam) + + call gen_v_space(cc_nVa,cc_nVa,cc_nVa,1, & + cc_list_vir,cc_list_vir,cc_list_vir,cc_list_vir(gam), B1) + + !$omp parallel & !$omp shared(nO,nV,B1,cc_space_v_vvvv,cc_space_v_vvov,X_vvvo,gam) & !$omp private(a,b,beta) & !$omp default(none) - !$omp do - do beta = 1, nV - do b = 1, nV - do a = 1, nV - B1(a,b,beta) = cc_space_v_vvvv(a,b,beta,gam) - enddo - enddo - enddo - !$omp end do nowait + +! !$omp do +! do beta = 1, nV +! do b = 1, nV +! do a = 1, nV +! B1(a,b,beta) = cc_space_v_vvvv(a,b,beta,gam) +! enddo +! enddo +! enddo +! !$omp end do nowait + do i = 1, nO !$omp do do b = 1, nV @@ -1569,7 +1713,7 @@ subroutine compute_B1_gam(nO,nV,t1,t2,B1,gam) X_vvvo(a,b,i) = cc_space_v_vvov(a,b,i,gam) enddo enddo - !$omp end do nowait + !$omp end do enddo !$omp end parallel diff --git a/src/ccsd/ccsd_space_orb_sub_chol.irp.f b/src/ccsd/ccsd_space_orb_sub_chol.irp.f new file mode 100644 index 00000000..b59dc0bb --- /dev/null +++ b/src/ccsd/ccsd_space_orb_sub_chol.irp.f @@ -0,0 +1,1458 @@ +subroutine ccsd_energy_space_chol(nO,nV,tau,t1,energy) + + implicit none + + integer, intent(in) :: nO, nV + double precision, intent(in) :: tau(nO,nO,nV,nV) + double precision, intent(in) :: t1(nO,nV) + double precision, intent(out) :: energy + + ! internal + integer :: i,j,a,b + double precision :: e + + energy = 0d0 + !$omp parallel & + !$omp shared(nO,nV,energy,tau,t1,& + !$omp cc_space_f_vo,cc_space_w_oovv) & + !$omp private(i,j,a,b,e) & + !$omp default(none) + e = 0d0 + !$omp do + do a = 1, nV + do i = 1, nO + e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a) + enddo + enddo + !$omp end do nowait + !$omp do + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + e = e + tau(i,j,a,b) * cc_space_w_oovv(i,j,a,b) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp critical + energy = energy + e + !$omp end critical + !$omp end parallel + +end + +! Tau + +subroutine update_tau_space_chol(nO,nV,t1,t2,tau) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV) + + ! out + double precision, intent(out) :: tau(nO,nO,nV,nV) + + ! internal + integer :: i,j,a,b + + !$OMP PARALLEL & + !$OMP SHARED(nO,nV,tau,t2,t1) & + !$OMP PRIVATE(i,j,a,b) & + !$OMP DEFAULT(NONE) + !$OMP DO + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a) * t1(j,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + +end + +! R1 + +subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) + double precision, intent(in) :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO) + + ! out + double precision, intent(out) :: r1(nO,nV), max_r1 + + ! internal + integer :: u,i,j,beta,a,b + + !$omp parallel & + !$omp shared(nO,nV,r1,cc_space_f_ov) & + !$omp private(u,beta) & + !$omp default(none) + !$omp do + do beta = 1, nV + do u = 1, nO + r1(u,beta) = cc_space_f_ov(u,beta) + enddo + enddo + !$omp end do + !$omp end parallel + + double precision, allocatable :: X_oo(:,:) + allocate(X_oo(nO,nO)) + call dgemm('N','N', nO, nO, nV, & + -2d0, t1 , size(t1,1), & + cc_space_f_vo, size(cc_space_f_vo,1), & + 0d0, X_oo , size(X_oo,1)) + + call dgemm('T','N', nO, nV, nO, & + 1d0, X_oo, size(X_oo,2), & + t1 , size(t1,1), & + 1d0, r1 , size(r1,1)) + deallocate(X_oo) + + call dgemm('N','N', nO, nV, nV, & + 1d0, t1 , size(t1,1), & + H_vv, size(H_vv,1), & + 1d0, r1 , size(r1,1)) + + call dgemm('N','N', nO, nV, nO, & + -1d0, H_oo, size(H_oo,1), & + t1 , size(t1,1), & + 1d0, r1, size(r1,1)) + + double precision, allocatable :: X_voov(:,:,:,:) + allocate(X_voov(nV, nO, nO, nV)) + + !$omp parallel & + !$omp shared(nO,nV,X_voov,t2,t1) & + !$omp private(u,beta,i,a) & + !$omp default(none) + !$omp do + do beta = 1, nV + do u = 1, nO + do i = 1, nO + do a = 1, nV + X_voov(a,i,u,beta) = 2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemv('T', nV*nO, nO*nV, & + 1d0, X_voov, size(X_voov,1) * size(X_voov,2), & + H_vo , 1, & + 1d0, r1 , 1) + + deallocate(X_voov) + + double precision, allocatable :: X_ovov(:,:,:,:) + allocate(X_ovov(nO, nV, nO, nV)) + + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_ovov,cc_space_v_voov,X_ovov) & + !$omp private(u,beta,i,a) & + !$omp default(none) + !$omp do + do beta = 1, nV + do u = 1, nO + do a = 1, nv + do i = 1, nO + X_ovov(i,a,u,beta) = 2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemv('T', nO*nV, nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + t1 , 1, & + 1d0, r1 , 1) + + deallocate(X_ovov) + + integer :: iblock, block_size, nVmax + double precision, allocatable :: W_vvov(:,:,:,:), W_vvov_tmp(:,:,:,:), T_vvoo(:,:,:,:) + block_size = 16 + allocate(W_vvov(nV,nV,nO,block_size), W_vvov_tmp(nV,nO,nV,block_size), T_vvoo(nV,nV,nO,nO)) + + !$omp parallel & + !$omp private(u,i,b,a) & + !$omp default(shared) + !$omp do + do u = 1, nO + do i = 1, nO + do b = 1, nV + do a = 1, nV + T_vvoo(a,b,i,u) = tau(i,u,a,b) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + do iblock = 1, nV, block_size + nVmax = min(block_size,nV-iblock+1) + + call dgemm('T','N', nV*nO, nV*nVmax, cholesky_mo_num, 1.d0, & + cc_space_v_vo_chol , cholesky_mo_num, & + cc_space_v_vv_chol(1,1,iblock), cholesky_mo_num, & + 0.d0, W_vvov_tmp, nV*nO) + + !$omp parallel & + !$omp private(b,i,a,beta) & + !$omp default(shared) + do beta = 1, nVmax + do i = 1, nO + !$omp do + do b = 1, nV + do a = 1, nV + W_vvov(a,b,i,beta) = 2d0 * W_vvov_tmp(a,i,b,beta) - W_vvov_tmp(b,i,a,beta) + enddo + enddo + !$omp end do nowait + enddo + enddo + !$omp barrier + !$omp end parallel + + call dgemm('T','N',nO,nVmax,nO*nV*nV, & + 1d0, T_vvoo, nV*nV*nO, & + W_vvov, nO*nV*nV, & + 1d0, r1(1,iblock), nO) + enddo + + deallocate(W_vvov,T_vvoo) + + + double precision, allocatable :: W_oovo(:,:,:,:) + allocate(W_oovo(nO,nO,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,cc_space_v_oovo,W_oovo) & + !$omp private(u,a,i,j) & + !$omp default(none) + do u = 1, nO + !$omp do + do a = 1, nV + do j = 1, nO + do i = 1, nO +! W_oovo(i,j,a,u) = 2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i) + W_oovo(i,j,a,u) = 2d0 * cc_space_v_oovo(i,j,a,u) - cc_space_v_oovo(j,i,a,u) + enddo + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + + call dgemm('T','N', nO, nV, nO*nO*nV, & + -1d0, W_oovo, size(W_oovo,1) * size(W_oovo,2) * size(W_oovo,3), & + tau , size(tau,1) * size(tau,2) * size(tau,3), & + 1d0, r1 , size(r1,1)) + + deallocate(W_oovo) + + max_r1 = 0d0 + do a = 1, nV + do i = 1, nO + max_r1 = max(dabs(r1(i,a)), max_r1) + enddo + enddo + + ! Change the sign for consistency with the code in spin orbitals + !$omp parallel & + !$omp shared(nO,nV,r1) & + !$omp private(a,i) & + !$omp default(none) + !$omp do + do a = 1, nV + do i = 1, nO + r1(i,a) = -r1(i,a) + enddo + enddo + !$omp end do + !$omp end parallel + +end + +! H_oo + +subroutine compute_H_oo_chol(nO,nV,tau_x,H_oo) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: tau_x(nO, nO, nV, nV) + double precision, intent(out) :: H_oo(nO, nO) + + integer :: a,b,i,j,u,k + + double precision, allocatable :: tau_kau(:,:,:), tmp_vov(:,:,:) + + allocate(tau_kau(cholesky_mo_num,nV,nO)) + !$omp parallel & + !$omp default(shared) & + !$omp private(i,u,j,k,a,b,tmp_vov) + allocate(tmp_vov(nV,nO,nV) ) + !$omp do + do u = 1, nO + do b=1,nV + do j=1,nO + do a=1,nV + tmp_vov(a,j,b) = tau_x(u,j,a,b) + enddo + enddo + enddo + call dgemm('N','T',cholesky_mo_num,nV,nO*nV,1.d0, & + cc_space_v_ov_chol, cholesky_mo_num, tmp_vov, nV, & + 0.d0, tau_kau(1,1,u), cholesky_mo_num) + enddo + !$omp end do nowait + deallocate(tmp_vov) + !$omp do + do i = 1, nO + do u = 1, nO + H_oo(u,i) = cc_space_f_oo(u,i) + enddo + enddo + !$omp end do nowait + !$omp barrier + !$omp end parallel + call dgemm('T', 'N', nO, nO, cholesky_mo_num*nV, 1.d0, & + tau_kau, cholesky_mo_num*nV, cc_space_v_vo_chol, cholesky_mo_num*nV, & + 1.d0, H_oo, nO) + +end + +! H_vv + +subroutine compute_H_vv_chol(nO,nV,tau_x,H_vv) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: tau_x(nO, nO, nV, nV) + double precision, intent(out) :: H_vv(nV, nV) + + integer :: a,b,i,j,u,k, beta + + double precision, allocatable :: tau_kia(:,:,:), tmp_oov(:,:,:) + + allocate(tau_kia(cholesky_mo_num,nO,nV)) + !$omp parallel & + !$omp default(shared) & + !$omp private(i,beta,j,k,a,b,tmp_oov) + allocate(tmp_oov(nO,nO,nV) ) + !$omp do + do a = 1, nV + do b=1,nV + do j=1,nO + do i=1,nO + tmp_oov(i,j,b) = tau_x(i,j,a,b) + enddo + enddo + enddo + call dgemm('N','T',cholesky_mo_num,nO,nO*nV,1.d0, & + cc_space_v_ov_chol, cholesky_mo_num, tmp_oov, nO, & + 0.d0, tau_kia(1,1,a), cholesky_mo_num) + enddo + !$omp end do nowait + deallocate(tmp_oov) + + !$omp do + do beta = 1, nV + do a = 1, nV + H_vv(a,beta) = cc_space_f_vv(a,beta) + enddo + enddo + !$omp end do nowait + !$omp barrier + !$omp end parallel + call dgemm('T', 'N', nV, nV, cholesky_mo_num*nO, -1.d0, & + tau_kia, cholesky_mo_num*nO, cc_space_v_ov_chol, cholesky_mo_num*nO, & + 1.d0, H_vv, nV) + +end + +! H_vo +subroutine compute_H_vo_chol(nO,nV,t1,H_vo) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(out) :: H_vo(nV, nO) + + integer :: a,b,i,j,u,k + + double precision, allocatable :: tmp_k(:), tmp(:,:,:), tmp2(:,:,:) + do i=1,nO + do a=1,nV + H_vo(a,i) = cc_space_f_vo(a,i) + enddo + enddo + + allocate(tmp_k(cholesky_mo_num)) + call dgemm('N', 'N', cholesky_mo_num, 1, nO*nV, 2.d0, & + cc_space_v_ov_chol, cholesky_mo_num, & + t1, nO*nV, 0.d0, tmp_k, cholesky_mo_num) + + call dgemm('T','N',nV*nO,1,cholesky_mo_num,1.d0, & + cc_space_v_vo_chol, cholesky_mo_num, tmp_k, cholesky_mo_num, 1.d0, & + H_vo, nV*nO) + deallocate(tmp_k) + + allocate(tmp(cholesky_mo_num,nO,nO)) + allocate(tmp2(cholesky_mo_num,nO,nO)) + + call dgemm('N','T', cholesky_mo_num*nO, nO, nV, 1.d0, & + cc_space_v_ov_chol, cholesky_mo_num*nO, t1, nO, 0.d0, tmp, cholesky_mo_num*nO) + + do i=1,nO + do j=1,nO + do k=1,cholesky_mo_num + tmp2(k,j,i) = tmp(k,i,j) + enddo + enddo + enddo + deallocate(tmp) + + call dgemm('T','N', nV, nO, cholesky_mo_num*nO, -1.d0, & + cc_space_v_ov_chol, cholesky_mo_num*nO, tmp2, cholesky_mo_num*nO, & + 1.d0, H_vo, nV) + +end + + +! R2 + +subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2) + + implicit none + + ! in + integer, intent(in) :: nO, nV + double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV) + double precision, intent(in) :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO) + + ! out + double precision, intent(out) :: r2(nO,nO,nV,nV), max_r2 + + ! internal + integer :: u,v,i,j,beta,gam,a,b + double precision :: max_r2_local + + call set_multiple_levels_omp(.False.) + + !$omp parallel & + !$omp shared(nO,nV,r2,cc_space_v_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = cc_space_v_oovv(u,v,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + double precision, allocatable :: A1(:,:,:,:) + allocate(A1(nO,nO,nO,nO)) + call compute_A1_chol(nO,nV,t1,t2,tau,A1) + call dgemm('N','N',nO*nO,nV*nV,nO*nO, & + 1d0, A1, size(A1,1) * size(A1,2), & + tau, size(tau,1) * size(tau,2), & + 1d0, r2, size(r2,1) * size(r2,2)) + + deallocate(A1) + integer :: block_size, iblock, k + block_size = 16 + double precision, dimension(:,:,:), allocatable :: B1, tmp_cc, tmpB1 + double precision, dimension(:,:), allocatable :: tmp_cc2 + + allocate(tmp_cc(cholesky_mo_num,nV,nV)) + call dgemm('N','N', cholesky_mo_num*nV, nV, nO, 1.d0, & + cc_space_v_vo_chol, cholesky_mo_num*nV, t1, nO, 0.d0, tmp_cc, cholesky_mo_num*nV) + + call set_multiple_levels_omp(.False.) + + !$OMP PARALLEL PRIVATE(gam, iblock, B1, tmpB1, tmp_cc2, beta, b, a) + allocate(B1(nV,nV,block_size), tmpB1(nV,block_size,nV), tmp_cc2(cholesky_mo_num,nV)) + !$OMP DO + do gam = 1, nV + + do a=1,nV + do k=1,cholesky_mo_num + tmp_cc2(k,a) = cc_space_v_vv_chol(k,a,gam) - tmp_cc(k,a,gam) + enddo + enddo + + do iblock = 1, nV, block_size + + call dgemm('T', 'N', nV*min(block_size, nV-iblock+1), nV, cholesky_mo_num, & + -1.d0, tmp_cc(1,1,iblock), cholesky_mo_num, & + cc_space_v_vv_chol(1,1,gam), cholesky_mo_num, & + 0.d0, tmpB1, nV*block_size) + + call dgemm('T','N', nV*min(block_size, nV-iblock+1), nV, cholesky_mo_num, & + 1.d0, cc_space_v_vv_chol(1,1,iblock), cholesky_mo_num, & + tmp_cc2, cholesky_mo_num, & + 1.d0, tmpB1, nV*block_size) + + do beta = iblock, min(nV, iblock+block_size-1) + do b = 1, nV + do a = 1, nV + B1(a,b,beta-iblock+1) = tmpB1(a,beta-iblock+1,b) + enddo + enddo + enddo + + call dgemm('N','N',nO*nO,min(block_size, nV-iblock+1),nV*nV, & + 1d0, tau, size(tau,1) * size(tau,2), & + B1 , size(B1 ,1) * size(B1 ,2), & + 1d0, r2(1,1,iblock,gam), size(r2 ,1) * size(r2 ,2)) + enddo + + enddo + !$OMP ENDDO + + deallocate(B1, tmpB1, tmp_cc2) + !$OMP END PARALLEL + + deallocate(tmp_cc) + + + double precision, allocatable :: X_oovv(:,:,:,:) + allocate(X_oovv(nO,nO,nV,nV)) + !$omp parallel & + !$omp shared(nO,nV,t2,X_oovv) & + !$omp private(u,v,gam,a) & + !$omp default(none) + !$omp do + do a = 1, nV + do gam = 1, nV + do v = 1, nO + do u = 1, nO + X_oovv(u,v,gam,a) = t2(u,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + double precision, allocatable :: g_vir(:,:) + allocate(g_vir(nV,nV)) + call compute_g_vir_chol(nO,nV,t1,t2,H_vv,g_vir) + + double precision, allocatable :: Y_oovv(:,:,:,:) + allocate(Y_oovv(nO,nO,nV,nV)) + + call dgemm('N','N',nO*nO*nV,nV,nV, & + 1d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3), & + g_vir, size(g_vir,1), & + 0d0, Y_oovv, size(Y_oovv,1) * size(Y_oovv,2) * size(Y_oovv,3)) + deallocate(g_vir) + deallocate(X_oovv) + + !$omp parallel & + !$omp shared(nO,nV,r2,Y_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(u,v,beta,gam) + Y_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + deallocate(Y_oovv) + + double precision, allocatable :: g_occ(:,:) + allocate(g_occ(nO,nO)) + call compute_g_occ_chol(nO,nV,t1,t2,H_oo,g_occ) + + allocate(X_oovv(nO,nO,nV,nV)) + call dgemm('N','N',nO,nO*nV*nV,nO, & + 1d0, g_occ , size(g_occ,1), & + t2 , size(t2,1), & + 0d0, X_oovv, size(X_oovv,1)) + deallocate(g_occ) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,beta,gam) - X_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_oovv) + + double precision, allocatable :: X_vovv(:,:,:,:) + + allocate(X_vovv(nV,nO,nV,block_size)) + allocate(Y_oovv(nO,nO,nV,nV)) + + do iblock = 1, nV, block_size + do gam = iblock, min(nV, iblock+block_size-1) + call dgemm('T','N',nV, nO*nV, cholesky_mo_num, 1.d0, & + cc_space_v_vv_chol(1,1,gam), cholesky_mo_num, cc_space_v_ov_chol, & + cholesky_mo_num, 0.d0, X_vovv(1,1,1,gam-iblock+1), nV) + + enddo + call dgemm('N','N',nO,nO*nV*min(block_size, nV-iblock+1),nV, & + 1d0, t1 , size(t1,1), & + X_vovv, size(X_vovv,1), & + 0d0, Y_oovv(1,1,1,iblock), size(Y_oovv,1)) + + enddo + deallocate(X_vovv) + + !$omp parallel & + !$omp shared(nO,nV,r2,Y_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(v,u,beta,gam) + Y_oovv(u,v,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + deallocate(Y_oovv) + + double precision, allocatable :: X_ovvo(:,:,:,:) + double precision, allocatable :: tcc(:,:,:), tcc2(:,:,:) + allocate(tcc2(cholesky_mo_num,nV,nO), X_ovvo(nO,nV,nV,nO)) + allocate(tcc(cholesky_mo_num,nO,nV)) + + call dgemm('N','T', cholesky_mo_num*nV, nO, nV, 1.d0, & + cc_space_v_vv_chol, cholesky_mo_num*nV, t1, nO, & + 0.d0, tcc2, cholesky_mo_num*nV) + + call dgemm('N','N', cholesky_mo_num*nO, nV, nO, 1.d0, & + cc_space_v_oo_chol, cholesky_mo_num*nO, t1, nO, & + 0.d0, tcc, cholesky_mo_num*nO) + + call dgemm('T','N', nO*nV, nV*nO, cholesky_mo_num, 1.d0, & + tcc, cholesky_mo_num, tcc2, cholesky_mo_num, 0.d0, & + X_ovvo, nO*nV) + + deallocate(tcc, tcc2) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_ovvo) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_ovvo(u,beta,gam,v) + enddo + enddo + enddo + enddo + !$omp end do + !$omp do + do beta = 1, nV + do gam = 1, nV + do v = 1, nO + do u = 1, nO + r2(v,u,gam,beta) = r2(v,u,gam,beta) - X_ovvo(u,beta,gam,v) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(X_ovvo) + !----- + + allocate(X_oovv(nO,nO,nV,nV)) + + call dgemm('N','N',nO*nO*nV,nV,nO, & + 1d0, cc_space_v_oovo, size(cc_space_v_oovo,1) * size(cc_space_v_oovo,2) * size(cc_space_v_oovo,3), & + t1 , size(t1,1), & + 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,beta,gam) - X_oovv(v,u,gam,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + deallocate(X_oovv) + + double precision, allocatable :: X_vovo(:,:,:,:), Y_oovo(:,:,:,:) + allocate(X_vovo(nV,nO,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,X_vovo,cc_space_v_ovvo) & + !$omp private(a,v,gam,i) & + !$omp default(none) + do i = 1, nO + !$omp do + do gam = 1, nV + do v = 1, nO + do a = 1, nV + X_vovo(a,v,gam,i) = cc_space_v_ovvo(v,a,gam,i) + enddo + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + + allocate(Y_oovo(nO,nO,nV,nO)) + call dgemm('N','N',nO,nO*nV*nO,nV, & + 1d0, t1, size(t1,1), & + X_vovo, size(X_vovo,1), & + 0d0, Y_oovo, size(Y_oovo,1)) + + deallocate(X_vovo) + allocate(X_oovv(nO,nO,nV,nV)) + call dgemm('N','N',nO*nO*nV, nV, nO, & + 1d0, Y_oovo, size(Y_oovo,1) * size(Y_oovo,2) * size(Y_oovo,3), & + t1 , size(t1,1), & + 0d0, X_oovv, size(X_oovv,1) * size(X_oovv,2) * size(X_oovv,3)) + deallocate(Y_oovo) + + !$omp parallel & + !$omp shared(nO,nV,r2,X_oovv) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - X_oovv(u,v,gam,beta) - X_oovv(v,u,beta,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + deallocate(X_oovv) + + + double precision, allocatable :: J1(:,:,:,:) + allocate(J1(nO,nV,nV,nO)) + call compute_J1_chol(nO,nV,t1,t2,cc_space_v_ovvo,cc_space_v_ovoo, & + cc_space_v_vvoo,J1) + + double precision, allocatable :: K1(:,:,:,:) + allocate(K1(nO,nV,nO,nV)) + call compute_K1_chol(nO,nV,t1,t2,cc_space_v_ovoo,cc_space_v_vvoo, & + cc_space_v_ovov,K1) + + allocate(X_ovvo(nO,nV,nV,nO)) + !$omp parallel & + !$omp private(u,v,gam,beta,i,a) & + !$omp default(shared) + do i = 1, nO + !$omp do + do a = 1, nV + do beta = 1, nV + do u = 1, nO + X_ovvo(u,beta,a,i) = (J1(u,a,beta,i) - 0.5d0 * K1(u,a,i,beta)) + enddo + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + deallocate(J1) + + double precision, allocatable :: Y_voov(:,:,:,:) + allocate(Y_voov(nV,nO,nO,nV)) + + !$omp parallel & + !$omp private(u,v,gam,beta,i,a) & + !$omp default(shared) + !$omp do + do gam = 1, nV + do v = 1, nO + do i = 1, nO + do a = 1, nV + Y_voov(a,i,v,gam) = 2d0 * t2(i,v,a,gam) - t2(i,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + double precision, allocatable :: Z_ovov(:,:,:,:) + allocate(Z_ovov(nO,nV,nO,nV)) + + call dgemm('N','N', nO*nV,nO*nV,nV*nO, & + 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & + Y_voov, size(Y_voov,1) * size(Y_voov,2), & + 0d0, Z_ovov, size(Z_ovov,1) * size(Z_ovov,2)) + + deallocate(X_ovvo,Y_voov) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) + Z_ovov(u,beta,v,gam) + Z_ovov(v,gam,u,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(Z_ovov) + + double precision, allocatable :: Y_ovov(:,:,:,:), X_ovov(:,:,:,:) + allocate(X_ovov(nO,nV,nO,nV)) + allocate(Y_ovov(nO,nV,nO,nV)) + + !$omp parallel & + !$omp shared(nO,nV,r2,K1,X_ovov,Y_ovov,t2) & + !$omp private(u,a,i,beta,gam) & + !$omp default(none) + !$omp do + do beta = 1, nV + do u = 1, nO + do a = 1, nV + do i = 1, nO + X_ovov(i,a,u,beta) = 0.5d0 * K1(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do + do gam = 1, nV + do v = 1, nO + do a = 1, nV + do i = 1, nO + Y_ovov(i,a,v,gam) = t2(i,v,gam,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + allocate(Z_ovov(nO,nV,nO,nV)) + call dgemm('T','N',nO*nV,nO*nV,nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) + deallocate(X_ovov, Y_ovov) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - Z_ovov(u,beta,v,gam) - Z_ovov(v,gam,u,beta) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + deallocate(Z_ovov) + + allocate(X_ovov(nO,nV,nO,nV),Y_ovov(nO,nV,nO,nV)) + !$omp parallel & + !$omp shared(nO,nV,K1,X_ovov,Y_ovov,t2) & + !$omp private(u,v,gam,beta,i,a) & + !$omp default(none) + !$omp do + do a = 1, nV + do i = 1, nO + do gam = 1, nV + do u = 1, nO + X_ovov(u,gam,i,a) = K1(u,a,i,gam) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do + do beta = 1, nV + do v = 1, nO + do a = 1, nV + do i = 1, nO + Y_ovov(i,a,v,beta) = t2(i,v,beta,a) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(K1) + + allocate(Z_ovov(nO,nV,nO,nV)) + call dgemm('N','N',nO*nV,nO*nV,nO*nV, & + 1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), & + Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + 0d0, Z_ovov, size(Y_ovov,1) * size(Y_ovov,2)) + + deallocate(X_ovov,Y_ovov) + + !$omp parallel & + !$omp shared(nO,nV,r2,Z_ovov) & + !$omp private(u,v,gam,beta) & + !$omp default(none) + !$omp do + do gam = 1, nV + do beta = 1, nV + do v = 1, nO + do u = 1, nO + r2(u,v,beta,gam) = r2(u,v,beta,gam) - Z_ovov(u,gam,v,beta) - Z_ovov(v,beta,u,gam) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(Z_ovov) + + ! Change the sign for consistency with the code in spin orbitals + + max_r2 = 0d0 + !$omp parallel & + !$omp shared(nO,nV,r2,max_r2) & + !$omp private(i,j,a,b,max_r2_local) & + !$omp default(none) + max_r2_local = 0.d0 + !$omp do + do b = 1, nV + do a = 1, nV + do j = 1, nO + do i = 1, nO + r2(i,j,a,b) = -r2(i,j,a,b) + max_r2_local = max(r2(i,j,a,b), max_r2_local) + enddo + enddo + enddo + enddo + !$omp end do nowait + !$omp critical + max_r2 = max(max_r2, max_r2_local) + !$omp end critical + !$omp end parallel + +end + +! A1 + +subroutine compute_A1_chol(nO,nV,t1,t2,tau,A1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: tau(nO, nO, nV, nV) + double precision, intent(out) :: A1(nO, nO, nO, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta + + double precision, allocatable :: Y_oooo(:,:,:,:) + allocate(Y_oooo(nO,nO,nO,nO)) + + ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + ! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) & + + call dgemm('N','N', nO, nO*nO*nO, nV, & + 1d0, t1 , size(t1,1), & + cc_space_v_vooo, size(cc_space_v_vooo,1), & + 0d0, Y_oooo, size(Y_oooo,1)) + + !$omp parallel & + !$omp private(u,v,i,j) & + !$omp default(shared) + !$omp do collapse(2) + do j = 1, nO + do i = 1, nO + do v = 1, nO + do u = 1, nO + A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + Y_oooo(v,u,j,i) + Y_oooo(u,v,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(Y_oooo) + + ! A1(u,v,i,j) += cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b) + call dgemm('N','N', nO*nO, nO*nO, nV*nV, & + 1d0, tau , size(tau,1) * size(tau,2), & + cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), & + 1d0, A1 , size(A1,1) * size(A1,2)) + +end + +! g_occ + +subroutine compute_g_occ_chol(nO,nV,t1,t2,H_oo,g_occ) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV), H_oo(nO, nO) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: g_occ(nO, nO) + + g_occ = H_oo + + call dgemm('N','N',nO,nO,nV, & + 1d0, t1, size(t1,1), & + cc_space_f_vo, size(cc_space_f_vo,1), & + 1d0, g_occ, size(g_occ,1)) + + double precision, allocatable :: X(:) + allocate(X(cholesky_mo_num)) + call dgemv('N',cholesky_mo_num,nO*nV,2.d0, & + cc_space_v_ov_chol, cholesky_mo_num, & + t1, 1, 0.d0, X, 1) + + call dgemv('T',cholesky_mo_num,nO*nO,1.d0, & + cc_space_v_oo_chol, cholesky_mo_num, & + X, 1, 1.d0, g_occ, 1) + deallocate(X) + + call dgemv('T',nO*nV,nO*nO,-1.d0, & + cc_space_v_ovoo, nO*nV, & + t1, 1, 1.d0, g_occ, 1) + +end + +! g_vir + +subroutine compute_g_vir_chol(nO,nV,t1,t2,H_vv,g_vir) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV), H_vv(nV, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(out) :: g_vir(nV, nV) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + call dgemm('N','N',nV,nV,nO, & + -1d0, cc_space_f_vo , size(cc_space_f_vo,1), & + t1 , size(t1,1), & + 0d0, g_vir, size(g_vir,1)) + + double precision, allocatable :: tmp_k(:), tmp_vo(:,:,:), tmp_vo2(:,:,:) + allocate(tmp_k(cholesky_mo_num)) + call dgemm('N','N', cholesky_mo_num, 1, nO*nV, 1.d0, & + cc_space_v_ov_chol, cholesky_mo_num, t1, nO*nV, 0.d0, tmp_k, cholesky_mo_num) + + call dgemm('T','N', nV*nV, 1, cholesky_mo_num, 2.d0, & + cc_space_v_vv_chol, cholesky_mo_num, tmp_k, cholesky_mo_num, 1.d0, & + g_vir, nV*nV) + deallocate(tmp_k) + + allocate(tmp_vo(cholesky_mo_num,nV,nO)) + call dgemm('N','T',cholesky_mo_num*nV, nO, nV, 1.d0, & + cc_space_v_vv_chol, cholesky_mo_num*nV, t1, nO, 0.d0, tmp_vo, cholesky_mo_num*nV) + + allocate(tmp_vo2(cholesky_mo_num,nO,nV)) + do beta=1,nV + do i=1,nO + do k=1,cholesky_mo_num + tmp_vo2(k,i,beta) = -tmp_vo(k,beta,i) + enddo + enddo + enddo + deallocate(tmp_vo) + + do beta = 1, nV + do a = 1, nV + g_vir(a,beta) = g_vir(a,beta) + H_vv(a,beta) + enddo + enddo + + call dgemm('T','N', nV, nV, nO*cholesky_mo_num, 1.d0, & + cc_space_v_ov_chol, cholesky_mo_num*nO, & + tmp_vo2, cholesky_mo_num*nO, 1.d0, g_vir, nV) + +end + +! J1 + +subroutine compute_J1_chol(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvoo,J1) + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: v_ovvo(nO,nV,nV,nO), v_ovoo(nO,nV,nO,nO) + double precision, intent(in) :: v_vvoo(nV,nV,nO,nO) + double precision, intent(out) :: J1(nO, nV, nV, nO) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + double precision, allocatable :: X_ovoo(:,:,:,:), Y_ovov(:,:,:,:) + allocate(X_ovoo(nO,nV,nO,nO),Y_ovov(nO,nV,nO,nV)) + + !$omp parallel & + !$omp shared(nO,nV,J1,v_ovvo,v_ovoo,X_ovoo) & + !$omp private(i,j,a,u,beta) & + !$omp default(none) + do i = 1, nO + !$omp do + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = v_ovvo(u,a,beta,i) + enddo + enddo + enddo + !$omp end do nowait + enddo + + !$omp do collapse(2) + do j = 1, nO + do i = 1, nO + do a = 1, nV + do u = 1, nO + X_ovoo(u,a,i,j) = v_ovoo(u,a,j,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','N',nO*nV*nO,nV,nO, & + -1d0, X_ovoo, size(X_ovoo,1) * size(X_ovoo,2) * size(X_ovoo,3), & + t1 , size(t1,1), & + 0d0, Y_ovov, size(Y_ovov,1) * size(Y_ovov,2) * size(Y_ovov,3)) + + !$omp parallel & + !$omp shared(nO,nV,J1,Y_ovov) & + !$omp private(i,beta,a,u) & + !$omp default(none) + do i = 1, nO + !$omp do + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Y_ovov(u,a,i,beta) + enddo + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + deallocate(X_ovoo) + + double precision, allocatable :: tmp_cc(:,:,:), J1_tmp(:,:,:,:) + allocate(tmp_cc(cholesky_mo_num,nV,nO), J1_tmp(nV,nO,nV,nO)) + + call dgemm('N','T', cholesky_mo_num*nV, nO, nV, 1.d0, & + cc_space_v_vv_chol, cholesky_mo_num*nV, & + t1, nO, & + 0.d0, tmp_cc, cholesky_mo_num*nV) + + call dgemm('T','N', nV*nO, nV*nO, cholesky_mo_num, 1.d0, & + tmp_cc, cholesky_mo_num, cc_space_v_vo_chol, cholesky_mo_num, & + 0.d0, J1_tmp, nV*nO) + + deallocate(tmp_cc) + + do i=1,nO + do b=1,nV + do a=1,nV + do u=1,nO + J1(u,a,b,i) = J1(u,a,b,i) + J1_tmp(b,u,a,i) + enddo + enddo + enddo + enddo + + deallocate(J1_tmp) + + !- cc_space_v_vvoo(a,b,i,j) * (0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta)) & + double precision, allocatable :: X_voov(:,:,:,:), Z_ovvo(:,:,:,:) + allocate(X_voov(nV,nO,nO,nV), Z_ovvo(nO,nV,nV,nO)) + !$omp parallel & + !$omp shared(nO,nV,t2,t1,Y_ovov,X_voov,v_vvoo) & + !$omp private(i,beta,a,u,b,j) & + !$omp default(none) + !$omp do + do b = 1, nV + do j = 1, nO + do beta = 1, nV + do u = 1, nO + Y_ovov(u,beta,j,b) = 0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + !$omp do + do b = 1, nV + do j = 1, nO + do i = 1, nO + do a = 1, nV + X_voov(a,i,j,b) = v_vvoo(a,b,i,j) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + call dgemm('N','T',nO*nV,nV*nO,nO*nV, & + -1d0, Y_ovov, size(Y_ovov,1) * size(Y_ovov,2), & + X_voov, size(X_voov,1) * size(X_voov,2), & + 0d0, Z_ovvo, size(Z_ovvo,1) * size(Z_ovvo,2)) + deallocate(X_voov) + + double precision, allocatable :: X_ovvo(:,:,:,:), Y_vovo(:,:,:,:) + allocate(X_ovvo(nO,nV,nV,nO),Y_vovo(nV,nO,nV,nO)) + !$omp parallel & + !$omp shared(nO,nV,J1,Z_ovvo,t2,Y_vovo,v_vvoo,X_ovvo) & + !$omp private(i,beta,a,u,j,b) & + !$omp default(none) + do i = 1, nO + !$omp do + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Z_ovvo(u,beta,a,i) + enddo + enddo + enddo + !$omp end do nowait + enddo + + !+ 0.5d0 * (2d0 * cc_space_v_vvoo(a,b,i,j) - cc_space_v_vvoo(b,a,i,j)) * t2(u,j,beta,b) + do j = 1, nO + !$omp do + do b = 1, nV + do i = 1, nO + do a = 1, nV + Y_vovo(a,i,b,j) = 0.5d0 * (2d0 * v_vvoo(a,b,i,j) - v_vvoo(b,a,i,j)) + enddo + enddo + enddo + !$omp end do nowait + enddo + + do j = 1, nO + !$omp do + do b = 1, nV + do beta = 1, nV + do u = 1, nO + X_ovvo(u,beta,b,j) = t2(u,j,beta,b) + enddo + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + + call dgemm('N','T',nO*nV,nV*nO,nV*nO, & + 1d0, X_ovvo, size(X_ovvo,1) * size(X_ovvo,2), & + Y_vovo, size(Y_vovo,1) * size(Y_vovo,2), & + 0d0, Z_ovvo, size(Z_ovvo,1) * size(Z_ovvo,2)) + + !$omp parallel & + !$omp shared(nO,nV,J1,Z_ovvo) & + !$omp private(i,beta,a,u) & + !$omp default(none) + do i = 1, nO + !$omp do + do beta = 1, nV + do a = 1, nV + do u = 1, nO + J1(u,a,beta,i) = J1(u,a,beta,i) + Z_ovvo(u,beta,a,i) + enddo + enddo + enddo + !$omp end do nowait + enddo + !$omp end parallel + + deallocate(X_ovvo,Z_ovvo,Y_ovov) + +end + +! K1 + +subroutine compute_K1_chol(nO,nV,t1,t2,v_ovoo,v_vvoo,v_ovov,K1) + + implicit none + + integer, intent(in) :: nO,nV + double precision, intent(in) :: t1(nO, nV) + double precision, intent(in) :: t2(nO, nO, nV, nV) + double precision, intent(in) :: v_vvoo(nV,nV,nO,nO), v_ovov(nO,nV,nO,nV) + double precision, intent(in) :: v_ovoo(nO,nV,nO,nO) + double precision, intent(out) :: K1(nO, nV, nO, nV) + + double precision, allocatable :: X(:,:,:,:), Y(:,:,:,:), Z(:,:,:,:) + + integer :: a,tmp_a,b,k,l,c,d,tmp_c,tmp_d,i,j,u,v, beta, gam + + allocate(X(nV,nO,nV,nO),Y(nO,nV,nV,nO),Z(nO,nV,nV,nO)) + + !$omp parallel & + !$omp shared(nO,nV,K1,X,Y,v_vvoo,v_ovov,t1,t2) & + !$omp private(i,beta,a,u,j,b) & + !$omp default(none) + !$omp do + do beta = 1, nV + do i = 1, nO + do a = 1, nV + do u = 1, nO + K1(u,a,i,beta) = v_ovov(u,a,i,beta) + enddo + enddo + enddo + enddo + !$omp end do nowait + + do i = 1, nO + !$omp do + do a = 1, nV + do j = 1, nO + do b = 1, nV + X(b,j,a,i) = - v_vvoo(b,a,i,j) + enddo + enddo + enddo + !$omp end do nowait + enddo + + do j = 1, nO + !$omp do + do b = 1, nV + do beta = 1, nV + do u = 1, nO + Y(u,beta,b,j) = 0.5d0 * t2(u,j,b,beta) + t1(u,b) * t1(j,beta) + enddo + enddo + enddo + !$omp end do + enddo + !$omp end parallel + + call dgemm('N','N',nO*nV*nO,nV,nO, & + -1d0, v_ovoo, size(v_ovoo,1) * size(v_ovoo,2) * size(v_ovoo,3), & + t1 , size(t1,1), & + 1d0, K1 , size(K1,1) * size(K1,2) * size(K1,3)) + + double precision, allocatable :: K1tmp(:,:,:,:), t1v(:,:,:) + allocate(K1tmp(nO,nO,nV,nV), t1v(cholesky_mo_num,nO,nO)) + + call dgemm('N','T', cholesky_mo_num*nO, nO, nV, 1.d0, & + cc_space_v_ov_chol, cholesky_mo_num*nO, t1, nO, 0.d0, & + t1v, cholesky_mo_num*nO) + + call dgemm('T','N', nO*nO, nV*nV, cholesky_mo_num, 1.d0, & + t1v, cholesky_mo_num, cc_space_v_vv_chol, cholesky_mo_num, 0.d0, & + K1tmp, nO*nO) + + deallocate(t1v) + ! Y(u,beta,b,j) * X(b,j,a,i) = Z(u,beta,a,i) + call dgemm('N','N',nV*nO,nO*nV,nV*nO, & + 1d0, Y, size(Y,1) * size(Y,2), & + X, size(X,1) * size(X,2), & + 0d0, Z, size(Z,1) * size(Z,2)) + + !$omp parallel & + !$omp shared(nO,nV,K1,Z,K1tmp) & + !$omp private(i,beta,a,u) & + !$omp default(none) + !$omp do + do beta = 1, nV + do i = 1, nO + do a = 1, nV + do u = 1, nO + K1(u,a,i,beta) = K1(u,a,i,beta) + K1tmp(u,i,a,beta) + Z(u,beta,a,i) + enddo + enddo + enddo + enddo + !$omp end do + !$omp end parallel + + deallocate(K1tmp,X,Y,Z) + +end diff --git a/src/ccsd/ccsd_spin_orb_sub.irp.f b/src/ccsd/ccsd_spin_orb_sub.irp.f index a267cc45..09d6a0fe 100644 --- a/src/ccsd/ccsd_spin_orb_sub.irp.f +++ b/src/ccsd/ccsd_spin_orb_sub.irp.f @@ -241,7 +241,7 @@ subroutine run_ccsd_spin_orb call ccsd_energy_spin(nO,nV,t1,t2,F_ov,v_oovv,energy) call wall_time(tfi) - write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,1pE10.2,A3,1pE10.2,A2)') ' | ',nb_iter,' | ', & + write(*,'(A3,I6,A3,F18.12,A3,F16.12,A3,ES10.2,A3,ES10.2,A2)') ' | ',nb_iter,' | ', & uncorr_energy+energy,' | ', energy,' | ', max_r1,' | ', max_r2,' |' if (cc_dev) then print*,'Total:',tfi-tbi,'s' @@ -266,7 +266,7 @@ subroutine run_ccsd_spin_orb print*,'' write(*,'(A15,F18.12,A3)') ' E(CCSD) = ', uncorr_energy+energy, ' Ha' write(*,'(A15,F18.12,A3)') ' Correlation = ', energy, ' Ha' - write(*,'(A15,1pE10.2,A3)')' Conv = ', max_r + write(*,'(A15,ES10.2,A3)')' Conv = ', max_r print*,'' if (write_amplitudes) then diff --git a/src/ccsd/ccsd_t_space_orb_abc.irp.f b/src/ccsd/ccsd_t_space_orb_abc.irp.f index 1aab6bd7..12a71045 100644 --- a/src/ccsd/ccsd_t_space_orb_abc.irp.f +++ b/src/ccsd/ccsd_t_space_orb_abc.irp.f @@ -101,7 +101,7 @@ subroutine ccsd_par_t_space_v3(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energy) !$OMP PARALLEL PRIVATE(a,b,c,e) DEFAULT(SHARED) e = 0d0 - !$OMP DO SCHEDULE(dynamic) + !$OMP DO SCHEDULE(guided) do a = 1, nV do b = a+1, nV do c = b+1, nV diff --git a/src/ccsd/ccsd_t_space_orb_stoch.irp.f b/src/ccsd/ccsd_t_space_orb_stoch.irp.f index 31fe67ce..13fa4f1a 100644 --- a/src/ccsd/ccsd_t_space_orb_stoch.irp.f +++ b/src/ccsd/ccsd_t_space_orb_stoch.irp.f @@ -94,6 +94,7 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ enddo !$OMP END DO nowait + !$OMP BARRIER !$OMP END PARALLEL double precision, external :: ccsd_t_task_aba @@ -209,9 +210,9 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ Pabc(:) = 1.d0/Pabc(:) print '(A)', '' - print '(A)', ' +----------------------+--------------+----------+' - print '(A)', ' | E(CCSD(T)) | Error | % |' - print '(A)', ' +----------------------+--------------+----------+' + print '(A)', ' ======================= ============== ==========' + print '(A)', ' E(CCSD(T)) Error % ' + print '(A)', ' ======================= ============== ==========' call wall_time(t00) @@ -256,7 +257,7 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ if (imin >= bounds(2,isample)) then cycle endif - ieta = binary_search(waccu,(eta + dble(isample-1))/dble(nbuckets),Nabc) + ieta = binary_search(waccu,(eta + dble(isample-1))/dble(nbuckets),Nabc)+1 if (sampled(ieta) == -1_8) then sampled(ieta) = 0_8 @@ -280,9 +281,10 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ call wall_time(t01) if ((t01-t00 > 1.0d0).or.(imin >= Nabc)) then - t00 = t01 !$OMP TASKWAIT + call wall_time(t01) + t00 = t01 double precision :: ET, ET2 double precision :: energy_stoch, energy_det @@ -322,17 +324,20 @@ subroutine ccsd_par_t_space_stoch(nO,nV,t1,t2,f_o,f_v,v_vvvo,v_vvoo,v_vooo,energ energy = energy_det + energy_stoch - print '('' | '',F20.8, '' | '', E12.4,'' | '', F8.2,'' |'')', eccsd+energy, dsqrt(variance/(norm-1.d0)), 100.*real(Ncomputed)/real(Nabc) + print '('' '',F20.8, '' '', ES12.4,'' '', F8.2,'' '')', eccsd+energy, dsqrt(variance/(norm-1.d0)), 100.*real(Ncomputed)/real(Nabc) endif !$OMP END MASTER if (imin >= Nabc) exit enddo !$OMP END PARALLEL - print '(A)', ' +----------------------+--------------+----------+' + print '(A)', ' ======================= ============== ========== ' print '(A)', '' - deallocate(X_vovv,X_ooov,T_voov,T_oovv) + deallocate(X_vovv) + deallocate(X_ooov) + deallocate(T_voov) + deallocate(T_oovv) end diff --git a/src/cipsi/pt2_stoch_routines.irp.f b/src/cipsi/pt2_stoch_routines.irp.f index 7909007a..3b048c14 100644 --- a/src/cipsi/pt2_stoch_routines.irp.f +++ b/src/cipsi/pt2_stoch_routines.irp.f @@ -591,7 +591,7 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_ time-time0 ! Old print - !print '(I10, X, F12.6, X, G10.3, X, F10.6, X, G10.3, X, F10.6, X, G10.3, X, F10.1,1pE16.6,1pE16.6)', c, & + !print '(I10, X, F12.6, X, G10.3, X, F10.6, X, G10.3, X, F10.6, X, G10.3, X, F10.1,ES16.6,ES16.6)', c, & ! pt2_data % pt2(pt2_stoch_istate) +E, & ! pt2_data_err % pt2(pt2_stoch_istate), & ! pt2_data % variance(pt2_stoch_istate), & diff --git a/src/cipsi_tc_bi_ortho/get_d0_good.irp.f b/src/cipsi_tc_bi_ortho/get_d0_good.irp.f index 4270e7b8..9bba162e 100644 --- a/src/cipsi_tc_bi_ortho/get_d0_good.irp.f +++ b/src/cipsi_tc_bi_ortho/get_d0_good.irp.f @@ -53,7 +53,7 @@ subroutine get_d0_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (hij == (0.d0,0.d0)) cycle !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,1) * hij ! HOTSPOT + mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,2) * hij ! HOTSPOT enddo end do !!!!!!!!!! @@ -72,7 +72,7 @@ subroutine get_d0_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (hji == (0.d0,0.d0)) cycle !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,2) * hji ! HOTSPOT + mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,1) * hji ! HOTSPOT enddo end do end do @@ -109,7 +109,7 @@ subroutine get_d0_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, end if !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij enddo end do @@ -128,7 +128,7 @@ subroutine get_d0_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, end if !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji enddo end do end do diff --git a/src/cipsi_tc_bi_ortho/get_d1_good.irp.f b/src/cipsi_tc_bi_ortho/get_d1_good.irp.f index bc19e7e4..b2a38e02 100644 --- a/src/cipsi_tc_bi_ortho/get_d1_good.irp.f +++ b/src/cipsi_tc_bi_ortho/get_d1_good.irp.f @@ -76,7 +76,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowij(k,putj) = tmp_rowij(k,putj) + hij * coefs(k,1) + tmp_rowij(k,putj) = tmp_rowij(k,putj) + hij * coefs(k,2) enddo endif end do @@ -88,7 +88,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowij(k,putj) = tmp_rowij(k,putj) + hij * coefs(k,1) + tmp_rowij(k,putj) = tmp_rowij(k,putj) + hij * coefs(k,2) enddo endif end do @@ -114,7 +114,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowji(k,putj) = tmp_rowji(k,putj) + hji * coefs(k,2) + tmp_rowji(k,putj) = tmp_rowji(k,putj) + hji * coefs(k,1) enddo endif end do @@ -126,7 +126,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowji(k,putj) = tmp_rowji(k,putj) + hji * coefs(k,2) + tmp_rowji(k,putj) = tmp_rowji(k,putj) + hji * coefs(k,1) enddo endif end do @@ -169,7 +169,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowij(k,puti) = tmp_rowij(k,puti) + hij * coefs(k,1) + tmp_rowij(k,puti) = tmp_rowij(k,puti) + hij * coefs(k,2) enddo endif end if @@ -180,7 +180,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) do k=1,N_states - tmp_rowij2(k,puti) = tmp_rowij2(k,puti) + hij * coefs(k,1) + tmp_rowij2(k,puti) = tmp_rowij2(k,puti) + hij * coefs(k,2) enddo endif end if @@ -211,7 +211,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowji(k,puti) = tmp_rowji(k,puti) + hji * coefs(k,2) + tmp_rowji(k,puti) = tmp_rowji(k,puti) + hji * coefs(k,1) enddo endif end if @@ -222,7 +222,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (hji /= 0.d0) then hji = hji * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int) do k=1,N_states - tmp_rowji2(k,puti) = tmp_rowji2(k,puti) + hji * coefs(k,2) + tmp_rowji2(k,puti) = tmp_rowji2(k,puti) + hji * coefs(k,1) enddo endif end if @@ -265,7 +265,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij_cache(putj,1) - hij_cache(putj,2) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) - tmp_rowij(:,putj) = tmp_rowij(:,putj) + hij * coefs(:,1) + tmp_rowij(:,putj) = tmp_rowij(:,putj) + hij * coefs(:,2) endif end do do putj=hfix+1,mo_num @@ -274,7 +274,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij_cache(putj,2) - hij_cache(putj,1) if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) - tmp_rowij(:,putj) = tmp_rowij(:,putj) + hij * coefs(:,1) + tmp_rowij(:,putj) = tmp_rowij(:,putj) + hij * coefs(:,2) endif end do @@ -293,7 +293,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji_cache(putj,1) - hji_cache(putj,2) if (hji /= 0.d0) then hji = hji * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int) - tmp_rowji(:,putj) = tmp_rowji(:,putj) + hji * coefs(:,2) + tmp_rowji(:,putj) = tmp_rowji(:,putj) + hji * coefs(:,1) endif end do do putj=hfix+1,mo_num @@ -302,7 +302,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji_cache(putj,2) - hji_cache(putj,1) if (hji /= 0.d0) then hji = hji * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int) - tmp_rowji(:,putj) = tmp_rowji(:,putj) + hji * coefs(:,2) + tmp_rowji(:,putj) = tmp_rowji(:,putj) + hji * coefs(:,1) endif end do @@ -342,7 +342,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowij(k,puti) = tmp_rowij(k,puti) + hij * coefs(k,1) + tmp_rowij(k,puti) = tmp_rowij(k,puti) + hij * coefs(k,2) enddo endif end if @@ -353,7 +353,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (hij /= 0.d0) then hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) do k=1,N_states - tmp_rowij2(k,puti) = tmp_rowij2(k,puti) + hij * coefs(k,1) + tmp_rowij2(k,puti) = tmp_rowij2(k,puti) + hij * coefs(k,2) enddo endif end if @@ -385,7 +385,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - tmp_rowji(k,puti) = tmp_rowji(k,puti) + hji * coefs(k,2) + tmp_rowji(k,puti) = tmp_rowji(k,puti) + hji * coefs(k,1) enddo endif end if @@ -396,7 +396,7 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (hji /= 0.d0) then hji = hji * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int) do k=1,N_states - tmp_rowji2(k,puti) = tmp_rowji2(k,puti) + hji * coefs(k,2) + tmp_rowji2(k,puti) = tmp_rowji2(k,puti) + hji * coefs(k,1) enddo endif end if @@ -445,8 +445,8 @@ subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, do k=1,N_states ! take conjugate to get contribution to instead of ! mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,1) * dconjg(hij) - mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,1) * hij - mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,2) * hji + mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,2) * hij + mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,1) * hji enddo end do end do diff --git a/src/cipsi_tc_bi_ortho/get_d2_good.irp.f b/src/cipsi_tc_bi_ortho/get_d2_good.irp.f index 0a08c808..d01ed433 100644 --- a/src/cipsi_tc_bi_ortho/get_d2_good.irp.f +++ b/src/cipsi_tc_bi_ortho/get_d2_good.irp.f @@ -79,12 +79,12 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if(ma == 1) then ! if particle spins are (alpha,alpha,alpha,beta), then puti is beta and putj is alpha !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,1) * hij + mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,2) * hij enddo else ! if particle spins are (beta,beta,beta,alpha), then puti is alpha and putj is beta !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij enddo end if end do @@ -103,12 +103,12 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if(ma == 1) then ! if particle spins are (alpha,alpha,alpha,beta), then puti is beta and putj is alpha !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,2) * hji + mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,1) * hji enddo else ! if particle spins are (beta,beta,beta,alpha), then puti is alpha and putj is beta !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji enddo end if end do @@ -135,7 +135,7 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij enddo endif end do @@ -154,7 +154,7 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji enddo endif end do @@ -189,7 +189,7 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, puti, putj) = mat_r(k, puti, putj) +coefs(k,1) * hij + mat_r(k, puti, putj) = mat_r(k, puti, putj) +coefs(k,2) * hij enddo end do end do @@ -210,7 +210,7 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, puti, putj) = mat_l(k, puti, putj) +coefs(k,2) * hji + mat_l(k, puti, putj) = mat_l(k, puti, putj) +coefs(k,1) * hji enddo end do end do @@ -239,12 +239,12 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (puti < putj) then !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij enddo else !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,1) * hij + mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,2) * hij enddo endif end do @@ -262,12 +262,12 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, if (puti < putj) then !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji enddo else !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,2) * hji + mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,1) * hji enddo endif end do @@ -290,7 +290,7 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hij = hij * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij + mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,2) * hij enddo end if !! @@ -299,7 +299,7 @@ subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, hji = hji * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int) !DIR$ LOOP COUNT AVG(4) do k=1,N_states - mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji + mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,1) * hji enddo end if end if diff --git a/src/cipsi_tc_bi_ortho/selection.irp.f b/src/cipsi_tc_bi_ortho/selection.irp.f index 77377554..06cf848b 100644 --- a/src/cipsi_tc_bi_ortho/selection.irp.f +++ b/src/cipsi_tc_bi_ortho/selection.irp.f @@ -893,20 +893,45 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d alpha_h_psi += alpha_h_i * psi_selectors_coef_tc(iii,1,1) ! right function enddo else if(debug_tc_pt2 == 2)then !! debugging the new version +! psi_h_alpha_tmp = 0.d0 +! alpha_h_psi_tmp = 0.d0 +! do iii = 1, N_det_selectors ! old version +! call htilde_mu_mat_opt_bi_ortho_no_3e(psi_selectors(1,1,iii), det, N_int, i_h_alpha) +! call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_selectors(1,1,iii), N_int, alpha_h_i) +! psi_h_alpha_tmp += i_h_alpha * psi_selectors_coef_tc(iii,1,1) ! left function +! alpha_h_psi_tmp += alpha_h_i * psi_selectors_coef_tc(iii,2,1) ! right function +! enddo psi_h_alpha_tmp = mat_l(istate, p1, p2) ! new version alpha_h_psi_tmp = mat_r(istate, p1, p2) ! new version psi_h_alpha = 0.d0 alpha_h_psi = 0.d0 - do iii = 1, N_det_selectors ! old version - call htilde_mu_mat_opt_bi_ortho_no_3e(psi_selectors(1,1,iii), det, N_int, i_h_alpha) - call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_selectors(1,1,iii), N_int, alpha_h_i) - psi_h_alpha += i_h_alpha * psi_selectors_coef_tc(iii,2,1) ! left function - alpha_h_psi += alpha_h_i * psi_selectors_coef_tc(iii,1,1) ! right function + do iii = 1, N_det ! old version + call htilde_mu_mat_opt_bi_ortho_no_3e(psi_det(1,1,iii), det, N_int, i_h_alpha) + call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_det(1,1,iii), N_int, alpha_h_i) + psi_h_alpha += i_h_alpha * psi_l_coef_bi_ortho(iii,1) ! left function + alpha_h_psi += alpha_h_i * psi_r_coef_bi_ortho(iii,1) ! right function enddo if(dabs(psi_h_alpha*alpha_h_psi/delta_E).gt.1.d-10)then error = dabs(psi_h_alpha * alpha_h_psi - psi_h_alpha_tmp * alpha_h_psi_tmp)/dabs(psi_h_alpha * alpha_h_psi) if(error.gt.1.d-2)then + call debug_det(det, N_int) print*,'error =',error,psi_h_alpha * alpha_h_psi/delta_E,psi_h_alpha_tmp * alpha_h_psi_tmp/delta_E + print*,psi_h_alpha , alpha_h_psi + print*,psi_h_alpha_tmp , alpha_h_psi_tmp + print*,'selectors ' + do iii = 1, N_det_selectors ! old version + print*,'iii',iii,psi_selectors_coef_tc(iii,1,1),psi_selectors_coef_tc(iii,2,1) + call htilde_mu_mat_opt_bi_ortho_no_3e(psi_selectors(1,1,iii), det, N_int, i_h_alpha) + call htilde_mu_mat_opt_bi_ortho_no_3e(det, psi_selectors(1,1,iii), N_int, alpha_h_i) + print*,i_h_alpha,alpha_h_i + call debug_det(psi_selectors(1,1,iii),N_int) + enddo +! print*,'psi_det ' +! do iii = 1, N_det! old version +! print*,'iii',iii,psi_l_coef_bi_ortho(iii,1),psi_r_coef_bi_ortho(iii,1) +! call debug_det(psi_det(1,1,iii),N_int) +! enddo + stop endif endif else diff --git a/src/cisd/EZFIO.cfg b/src/cisd/EZFIO.cfg index 4565d2df..688f802a 100644 --- a/src/cisd/EZFIO.cfg +++ b/src/cisd/EZFIO.cfg @@ -5,3 +5,11 @@ interface: ezfio size: (determinants.n_states) + +[lcc_energy] +type: double precision +doc: lccsd energy +interface: ezfio +size: (determinants.n_states) + + diff --git a/src/cisd/NEED b/src/cisd/NEED index d9ad3efc..616d021e 100644 --- a/src/cisd/NEED +++ b/src/cisd/NEED @@ -1,3 +1,4 @@ selectors_full single_ref_method davidson_undressed +dav_general_mat diff --git a/src/cisd/lccsd.irp.f b/src/cisd/lccsd.irp.f new file mode 100644 index 00000000..919c5aaa --- /dev/null +++ b/src/cisd/lccsd.irp.f @@ -0,0 +1,95 @@ +program lccsd + implicit none + BEGIN_DOC +! Linerarized CCSD +! + ! This program takes a reference Slater determinant of ROHF-like occupancy, + ! + ! and performs all single and double excitations on top of it, disregarding + ! spatial symmetry and compute the "n_states" lowest eigenstates of that CI + ! matrix (see :option:`determinants n_states`). + ! + ! This program can be useful in many cases: + ! + ! * **Ground state calculation**: if even after a :c:func:`cis` calculation, natural + ! orbitals (see :c:func:`save_natorb`) and then :c:func:`scf` optimization, you are not sure to have the lowest scf + ! solution, + ! do the same strategy with the :c:func:`cisd` executable instead of the :c:func:`cis` exectuable to generate the natural + ! orbitals as a guess for the :c:func:`scf`. + ! + ! + ! + ! * **Excited states calculations**: the lowest excited states are much likely to + ! be dominanted by single- or double-excitations. + ! Therefore, running a :c:func:`cisd` will save the "n_states" lowest states within + ! the CISD space + ! in the |EZFIO| directory, which can afterward be used as guess wave functions + ! for a further multi-state fci calculation if you specify "read_wf" = True + ! before running the fci executable (see :option:`determinants read_wf`). + ! Also, if you specify "s2_eig" = True, the cisd will only retain states + ! having the good value :math:`S^2` value + ! (see :option:`determinants expected_s2` and :option:`determinants s2_eig`). + ! If "s2_eig" = False, it will take the lowest n_states, whatever + ! multiplicity they are. + ! + ! + ! + ! Note: if you would like to discard some orbitals, use + ! :ref:`qp_set_mo_class` to specify: + ! + ! * "core" orbitals which will be always doubly occupied + ! + ! * "act" orbitals where an electron can be either excited from or to + ! + ! * "del" orbitals which will be never occupied + ! + END_DOC + PROVIDE N_states + read_wf = .False. + TOUCH read_wf + call run +end + +subroutine run + implicit none + + if(pseudo_sym)then + call H_apply_cisd_sym + else + call H_apply_cisd + endif + call get_lccsd_2 +end + +subroutine get_lccsd_2 + implicit none + integer :: i,k + double precision :: cisdq(N_states), delta_e + double precision,external :: diag_h_mat_elem + psi_coef = lccsd_coef + SOFT_TOUCH psi_coef + call save_wavefunction_truncated(save_threshold) + call ezfio_set_cisd_lcc_energy(lccsd_energies) + + print *, 'N_det = ', N_det + print*,'' + print*,'******************************' + print *, 'LCCSD Energies' + do i = 1,N_states + print *, i, lccsd_energies(i) + enddo + if (N_states > 1) then + print*,'******************************' + print*,'Excitation energies (au) (LCCSD)' + do i = 2, N_states + print*, i ,lccsd_energies(i) - lccsd_energies(1) + enddo + print*,'' + print*,'******************************' + print*,'Excitation energies (eV) (LCCSD)' + do i = 2, N_states + print*, i ,(lccsd_energies(i) - lccsd_energies(1)) * ha_to_ev + enddo + endif + +end diff --git a/src/cisd/lccsd_prov.irp.f b/src/cisd/lccsd_prov.irp.f new file mode 100644 index 00000000..8338cf81 --- /dev/null +++ b/src/cisd/lccsd_prov.irp.f @@ -0,0 +1,50 @@ + BEGIN_PROVIDER [ double precision, lccsd_coef, (N_det, N_states)] +&BEGIN_PROVIDER [ double precision, lccsd_energies, (N_states)] + implicit none + double precision, allocatable :: Dress_jj(:), H_jj(:), u_in(:,:) + double precision :: ebefore, eafter, ecorr, thresh + integer :: i,it,degree + logical :: converged + external H_u_0_nstates_openmp + allocate(Dress_jj(N_det),H_jj(N_det),u_in(N_det,N_states_diag)) + thresh = 1.d-6 + converged = .False. + Dress_jj = 0.d0 + u_in = 0.d0 + it = 0 + ! initial guess + do i = 1, N_states_diag + u_in(i,i) = 1.d0 + enddo + do i = 1,N_det + call i_H_j(psi_det(1,1,i),psi_det(1,1,i),N_int,H_jj(i)) + enddo + ebefore = H_jj(1) + do while (.not.converged) + it += 1 + print*,'N_det = ',N_det + call davidson_general_ext_rout_diag_dressed(u_in,H_jj,Dress_jj,lccsd_energies,& + N_det,N_states,N_states_diag,converged,H_u_0_nstates_openmp) + ecorr = lccsd_energies(1) - H_jj(1) + print*,'---------------------' + print*,'it = ',it + print*,'ecorr = ',ecorr + Dress_jj(1) = 0.d0 + do i = 2, N_det + if(ecorr + H_jj(i) .lt. H_jj(1))then + print*,'Warning, some dets are not dressed: ' + call get_excitation_degree(ref_bitmask,psi_det(1,1,i),degree,N_int) + print*,'degree, Delta E, coef', degree, H_jj(i)-H_jj(1), u_in(i,1)/u_in(1,1) + else + Dress_jj(i) = ecorr + endif + enddo + eafter = lccsd_energies(1) + converged = (dabs(eafter - ebefore).lt.thresh) + ebefore = eafter + enddo + do i = 1, N_states + lccsd_coef(1:N_det,i) = u_in(1:N_det,i) + enddo + +END_PROVIDER diff --git a/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f b/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f index 73608720..0dc939cb 100644 --- a/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f +++ b/src/dav_general_mat/dav_diag_dressed_ext_rout.irp.f @@ -331,7 +331,7 @@ subroutine davidson_general_ext_rout_diag_dressed(u_in,H_jj,Dress_jj,energies,sz !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/dav_general_mat/dav_double_dress_ext_rout.irp.f b/src/dav_general_mat/dav_double_dress_ext_rout.irp.f index e59d21d1..24f4fa10 100644 --- a/src/dav_general_mat/dav_double_dress_ext_rout.irp.f +++ b/src/dav_general_mat/dav_double_dress_ext_rout.irp.f @@ -405,7 +405,7 @@ subroutine dav_double_dressed(u_in,H_jj,Dress_jj,Dressing_vec,idx_dress,energies !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/dav_general_mat/dav_dressed_ext_rout.irp.f b/src/dav_general_mat/dav_dressed_ext_rout.irp.f index c045aa1a..cedaaf0a 100644 --- a/src/dav_general_mat/dav_dressed_ext_rout.irp.f +++ b/src/dav_general_mat/dav_dressed_ext_rout.irp.f @@ -398,7 +398,7 @@ subroutine davidson_general_ext_rout_dressed(u_in,H_jj,energies,sze,N_st,N_st_di !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/dav_general_mat/dav_ext_rout.irp.f b/src/dav_general_mat/dav_ext_rout.irp.f index 2621e3a9..deb7e3a9 100644 --- a/src/dav_general_mat/dav_ext_rout.irp.f +++ b/src/dav_general_mat/dav_ext_rout.irp.f @@ -316,7 +316,7 @@ subroutine davidson_general_ext_rout(u_in,H_jj,energies,sze,N_st,N_st_diag_in,co !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/dav_general_mat/dav_general.irp.f b/src/dav_general_mat/dav_general.irp.f index cd9124e6..9940bf1e 100644 --- a/src/dav_general_mat/dav_general.irp.f +++ b/src/dav_general_mat/dav_general.irp.f @@ -327,7 +327,7 @@ subroutine davidson_general(u_in,H_jj,energies,dim_in,sze,N_st,N_st_diag_in,conv !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/davidson/diagonalization_h_dressed.irp.f b/src/davidson/diagonalization_h_dressed.irp.f index 26853df9..b7179c18 100644 --- a/src/davidson/diagonalization_h_dressed.irp.f +++ b/src/davidson/diagonalization_h_dressed.irp.f @@ -457,7 +457,7 @@ subroutine davidson_diag_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,N_st,N_st_dia !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/davidson/diagonalization_hcsf_dressed.irp.f b/src/davidson/diagonalization_hcsf_dressed.irp.f index 0c3c6f92..fa8aff80 100644 --- a/src/davidson/diagonalization_hcsf_dressed.irp.f +++ b/src/davidson/diagonalization_hcsf_dressed.irp.f @@ -477,7 +477,7 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,E11.3))') iter-1, to_print(1:2,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index 45258c1c..7b559925 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -611,7 +611,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ !don't print continue else - write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,E11.3))') iter-1, to_print(1:3,1:N_st) + write(*,'(1X,I3,1X,100(1X,F16.10,1X,F11.6,1X,ES11.3))') iter-1, to_print(1:3,1:N_st) endif ! Check convergence diff --git a/src/davidson/diagonalization_nonsym_h_dressed.irp.f b/src/davidson/diagonalization_nonsym_h_dressed.irp.f index 3ff060a6..96ca84ab 100644 --- a/src/davidson/diagonalization_nonsym_h_dressed.irp.f +++ b/src/davidson/diagonalization_nonsym_h_dressed.irp.f @@ -436,7 +436,7 @@ subroutine davidson_diag_nonsym_hjj(dets_in, u_in, H_jj, energies, dim_in, sze, !don't print continue else - write(*, '(1X, I3, 1X, 100(1X, F16.10, 1X, E11.3))') iter-1, to_print(1:2,1:N_st) + write(*, '(1X, I3, 1X, 100(1X, F16.10, 1X, ES11.3))') iter-1, to_print(1:2,1:N_st) endif ! Check convergence diff --git a/src/davidson_keywords/usef.irp.f b/src/davidson_keywords/usef.irp.f index fed2ba9b..7ca2d203 100644 --- a/src/davidson_keywords/usef.irp.f +++ b/src/davidson_keywords/usef.irp.f @@ -13,7 +13,9 @@ BEGIN_PROVIDER [ integer, nthreads_davidson ] character*(32) :: env call getenv('QP_NTHREADS_DAVIDSON',env) if (trim(env) /= '') then + call lock_io read(env,*) nthreads_davidson + call unlock_io call write_int(6,nthreads_davidson,'Target number of threads for ') endif END_PROVIDER diff --git a/src/determinants/density_matrix.irp.f b/src/determinants/density_matrix.irp.f index 1a1d92b5..ce4d96c2 100644 --- a/src/determinants/density_matrix.irp.f +++ b/src/determinants/density_matrix.irp.f @@ -117,7 +117,7 @@ END_PROVIDER !$OMP N_det_alpha_unique,N_det_beta_unique,irp_here) allocate(tmp_a(mo_num,mo_num,N_states), tmp_b(mo_num,mo_num,N_states) ) tmp_a = 0.d0 - !$OMP DO SCHEDULE(dynamic,64) + !$OMP DO SCHEDULE(guided) do k_a=1,N_det krow = psi_bilinear_matrix_rows(k_a) ASSERT (krow <= N_det_alpha_unique) @@ -173,7 +173,7 @@ END_PROVIDER deallocate(tmp_a) tmp_b = 0.d0 - !$OMP DO SCHEDULE(dynamic,64) + !$OMP DO SCHEDULE(guided) do k_b=1,N_det krow = psi_bilinear_matrix_transp_rows(k_b) ASSERT (krow <= N_det_alpha_unique) diff --git a/src/determinants/dipole_moments.irp.f b/src/determinants/dipole_moments.irp.f index 06fca0cd..e445c56b 100644 --- a/src/determinants/dipole_moments.irp.f +++ b/src/determinants/dipole_moments.irp.f @@ -66,9 +66,9 @@ END_PROVIDER write(*,'(i16)',advance='no') i end do write(*,*) '' - write(*,'(A17,100(1pE16.8))') 'x_dipole_moment = ',x_dipole_moment - write(*,'(A17,100(1pE16.8))') 'y_dipole_moment = ',y_dipole_moment - write(*,'(A17,100(1pE16.8))') 'z_dipole_moment = ',z_dipole_moment + write(*,'(A17,100(ES16.8))') 'x_dipole_moment = ',x_dipole_moment + write(*,'(A17,100(ES16.8))') 'y_dipole_moment = ',y_dipole_moment + write(*,'(A17,100(ES16.8))') 'z_dipole_moment = ',z_dipole_moment !print*, 'x_dipole_moment = ',x_dipole_moment !print*, 'y_dipole_moment = ',y_dipole_moment !print*, 'z_dipole_moment = ',z_dipole_moment diff --git a/src/determinants/h_apply.irp.f b/src/determinants/h_apply.irp.f index 078c2104..65f1a832 100644 --- a/src/determinants/h_apply.irp.f +++ b/src/determinants/h_apply.irp.f @@ -250,7 +250,7 @@ subroutine remove_duplicates_in_psi_det(found_duplicates) enddo !$OMP END DO - !$OMP DO schedule(dynamic,1024) + !$OMP DO schedule(guided,64) do i=1,N_det-1 if (duplicate(i)) then cycle diff --git a/src/determinants/s2.irp.f b/src/determinants/s2.irp.f index 2c1a8757..6dc49526 100644 --- a/src/determinants/s2.irp.f +++ b/src/determinants/s2.irp.f @@ -317,7 +317,7 @@ subroutine get_uJ_s2_uI(psi_keys_tmp,psi_coefs_tmp,n,nmax_coefs,nmax_keys,s2,nst !$OMP SHARED (ll,jj,psi_keys_tmp,psi_coefs_tmp,N_int,n,nstates)& !$OMP REDUCTION(+:accu) allocate(idx(0:n)) - !$OMP DO SCHEDULE(dynamic) + !$OMP DO SCHEDULE(guided) do i = n,1,-1 ! Better OMP scheduling call get_s2(psi_keys_tmp(1,1,i),psi_keys_tmp(1,1,i),N_int,s2_tmp) accu += psi_coefs_tmp(i,ll) * s2_tmp * psi_coefs_tmp(i,jj) diff --git a/src/ezfio_files/NEED b/src/ezfio_files/NEED index d06d604c..1766924f 100644 --- a/src/ezfio_files/NEED +++ b/src/ezfio_files/NEED @@ -1,2 +1,3 @@ mpi zmq +utils diff --git a/src/ezfio_files/ezfio.irp.f b/src/ezfio_files/ezfio.irp.f index 4f53b173..7e414a04 100644 --- a/src/ezfio_files/ezfio.irp.f +++ b/src/ezfio_files/ezfio.irp.f @@ -5,7 +5,9 @@ BEGIN_PROVIDER [ character*(1024), ezfio_filename ] ! variable if it is set, or as the 1st argument of the command line. END_DOC - PROVIDE mpi_initialized + PROVIDE mpi_initialized output_wall_time_0 + + integer :: i ! Get the QPACKAGE_INPUT environment variable call getenv('QPACKAGE_INPUT',ezfio_filename) @@ -44,11 +46,14 @@ BEGIN_PROVIDER [ character*(1024), ezfio_filename ] END_PROVIDER BEGIN_PROVIDER [ character*(1024), ezfio_work_dir ] + use c_functions implicit none BEGIN_DOC ! EZFIO/work/ END_DOC - call ezfio_set_work_empty(.False.) + logical :: b + b = mkl_serv_intel_cpu_true() /= 1 + call ezfio_set_work_empty(b) ezfio_work_dir = trim(ezfio_filename)//'/work/' END_PROVIDER diff --git a/src/fci_tc_bi/selectors.irp.f b/src/fci_tc_bi/selectors.irp.f index 4d3de7d0..7f93ae55 100644 --- a/src/fci_tc_bi/selectors.irp.f +++ b/src/fci_tc_bi/selectors.irp.f @@ -27,6 +27,8 @@ END_PROVIDER implicit none BEGIN_DOC ! Determinants on which we apply for perturbation. + ! psi_selectors_coef_tc(iii,1,istate) = left coefficient of the iii determinant + ! psi_selectors_coef_tc(iii,2,istate) = right coefficient of the iii determinant END_DOC integer :: i,k diff --git a/src/hartree_fock/fock_matrix_hf.irp.f b/src/hartree_fock/fock_matrix_hf.irp.f index 8c6658c5..a5ab6a60 100644 --- a/src/hartree_fock/fock_matrix_hf.irp.f +++ b/src/hartree_fock/fock_matrix_hf.irp.f @@ -190,47 +190,75 @@ END_PROVIDER deallocate(X) - ao_two_e_integral_beta_chol = ao_two_e_integral_alpha_chol + if (elec_alpha_num > elec_beta_num) then + ao_two_e_integral_beta_chol = ao_two_e_integral_alpha_chol + endif - allocate(X2(ao_num,ao_num,cholesky_ao_num,2)) + double precision :: rss + double precision :: memory_of_double + integer :: iblock + integer, parameter :: block_size = 32 + + rss = memory_of_double(ao_num*ao_num) + call check_mem(2.d0*block_size*rss, irp_here) + allocate(X2(ao_num,ao_num,block_size,2)) + allocate(X3(ao_num,block_size,ao_num,2)) + ! ao_two_e_integral_alpha_chol (l,s) -= cholesky_ao(l,m,j) * SCF_density_matrix_ao_beta (m,n) * cholesky_ao(n,s,j) - call dgemm('N','N',ao_num,ao_num*cholesky_ao_num,ao_num, 1.d0, & - SCF_density_matrix_ao_alpha, ao_num, & - cholesky_ao, ao_num, 0.d0, & - X2(1,1,1,1), ao_num) + do iblock=1,cholesky_ao_num,block_size - call dgemm('N','N',ao_num,ao_num*cholesky_ao_num,ao_num, 1.d0, & - SCF_density_matrix_ao_beta, ao_num, & - cholesky_ao, ao_num, 0.d0, & - X2(1,1,1,2), ao_num) + call dgemm('N','N',ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size),ao_num, 1.d0, & + SCF_density_matrix_ao_alpha, ao_num, & + cholesky_ao(1,1,iblock), ao_num, 0.d0, & + X2(1,1,1,1), ao_num) - allocate(X3(ao_num,cholesky_ao_num,ao_num,2)) + if (elec_alpha_num > elec_beta_num) then + call dgemm('N','N',ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size),ao_num, 1.d0, & + SCF_density_matrix_ao_beta, ao_num, & + cholesky_ao(1,1,iblock), ao_num, 0.d0, & + X2(1,1,1,2), ao_num) + + do s=1,ao_num + do j=1,min(cholesky_ao_num-iblock+1,block_size) + do m=1,ao_num + X3(m,j,s,1) = X2(m,s,j,1) + X3(m,j,s,2) = X2(m,s,j,2) + enddo + enddo + enddo + + else + + do s=1,ao_num + do j=1,min(cholesky_ao_num-iblock+1,block_size) + do m=1,ao_num + X3(m,j,s,1) = X2(m,s,j,1) + enddo + enddo + enddo + endif + + call dgemm('N','N',ao_num,ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size), -1.d0, & + cholesky_ao(1,1,iblock), ao_num, & + X3(1,1,1,1), ao_num*block_size, 1.d0, & + ao_two_e_integral_alpha_chol, ao_num) + + if (elec_alpha_num > elec_beta_num) then + call dgemm('N','N',ao_num,ao_num,ao_num*min(cholesky_ao_num-iblock+1,block_size), -1.d0, & + cholesky_ao(1,1,iblock), ao_num, & + X3(1,1,1,2), ao_num*block_size, 1.d0, & + ao_two_e_integral_beta_chol, ao_num) + endif - do s=1,ao_num - do j=1,cholesky_ao_num - do m=1,ao_num - X3(m,j,s,1) = X2(m,s,j,1) - X3(m,j,s,2) = X2(m,s,j,2) - enddo - enddo enddo - deallocate(X2) - - call dgemm('N','N',ao_num,ao_num,ao_num*cholesky_ao_num, -1.d0, & - cholesky_ao, ao_num, & - X3(1,1,1,1), ao_num*cholesky_ao_num, 1.d0, & - ao_two_e_integral_alpha_chol, ao_num) - - call dgemm('N','N',ao_num,ao_num,ao_num*cholesky_ao_num, -1.d0, & - cholesky_ao, ao_num, & - X3(1,1,1,2), ao_num*cholesky_ao_num, 1.d0, & - ao_two_e_integral_beta_chol, ao_num) - - deallocate(X3) + if (elec_alpha_num == elec_beta_num) then + ao_two_e_integral_beta_chol = ao_two_e_integral_alpha_chol + endif + deallocate(X2,X3) END_PROVIDER diff --git a/src/mo_optimization/EZFIO.cfg b/src/mo_optimization/EZFIO.cfg index e6aa2d67..078da3a2 100644 --- a/src/mo_optimization/EZFIO.cfg +++ b/src/mo_optimization/EZFIO.cfg @@ -2,7 +2,7 @@ type: character*(32) doc: Define the kind of hessian for the orbital optimization full : full hessian, diag : diagonal hessian, none : no hessian interface: ezfio,provider,ocaml -default: full +default: diag [n_det_max_opt] type: integer @@ -14,7 +14,7 @@ default: 200000 type: integer doc: Maximal number of iterations for the orbital optimization interface: ezfio,provider,ocaml -default: 20 +default: 10 [thresh_opt_max_elem_grad] type: double precision diff --git a/src/mo_optimization/optimization.irp.f b/src/mo_optimization/cipsi_orb_opt.irp.f similarity index 96% rename from src/mo_optimization/optimization.irp.f rename to src/mo_optimization/cipsi_orb_opt.irp.f index 9892b3e3..ae3aa1bf 100644 --- a/src/mo_optimization/optimization.irp.f +++ b/src/mo_optimization/cipsi_orb_opt.irp.f @@ -15,7 +15,7 @@ subroutine run_optimization logical :: not_converged character (len=100) :: filename - PROVIDE psi_det psi_coef mo_two_e_integrals_in_map + PROVIDE psi_det psi_coef mo_two_e_integrals_in_map ao_pseudo_integrals not_converged = .True. nb_iter = 0 diff --git a/src/mo_optimization/first_gradient_opt.irp.f b/src/mo_optimization/first_gradient_opt.irp.f index d6918a00..f08b9d1f 100644 --- a/src/mo_optimization/first_gradient_opt.irp.f +++ b/src/mo_optimization/first_gradient_opt.irp.f @@ -111,7 +111,7 @@ subroutine first_gradient_opt(n,v_grad) if (debug) then print*,'Matrix containing the gradient :' do i = 1, mo_num - write(*,'(100(E12.5))') A(i,1:mo_num) + write(*,'(100(ES12.5))') A(i,1:mo_num) enddo endif diff --git a/src/mo_optimization/state_average_energy.irp.f b/src/mo_optimization/state_average_energy.irp.f index 2cd063da..05aec18a 100644 --- a/src/mo_optimization/state_average_energy.irp.f +++ b/src/mo_optimization/state_average_energy.irp.f @@ -39,17 +39,24 @@ subroutine state_average_energy(energy) double precision :: get_two_e_integral double precision :: mono_e, bi_e integer :: i,j,k,l - + + energy = nuclear_repulsion ! mono electronic part + !$OMP PARALLEL DEFAULT(NONE) PRIVATE(i,j,k,l,mono_e, bi_e) & + !$OMP SHARED(mo_num, mo_integrals_map, two_e_dm_mo, one_e_dm_mo, energy, & + !$OMP mo_one_e_integrals) mono_e = 0d0 + !$OMP DO do j = 1, mo_num do i = 1, mo_num mono_e = mono_e + mo_one_e_integrals(i,j) * one_e_dm_mo(i,j) enddo enddo + !$OMP END DO NOWAIT ! bi electronic part bi_e = 0d0 + !$OMP DO do l = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -59,13 +66,17 @@ subroutine state_average_energy(energy) enddo enddo enddo + !$OMP END DO ! State average energy - energy = mono_e + 0.5d0 * bi_e + nuclear_repulsion + !$OMP CRITICAL + energy = energy + mono_e + 0.5d0 * bi_e + !$OMP END CRITICAL + !$OMP END PARALLEL ! Check !call print_energy_components - + print*,'State average energy:', energy !print*,ci_energy diff --git a/src/mo_two_e_ints/cholesky.irp.f b/src/mo_two_e_ints/cholesky.irp.f index 32c0dccd..349f13b9 100644 --- a/src/mo_two_e_ints/cholesky.irp.f +++ b/src/mo_two_e_ints/cholesky.irp.f @@ -1,49 +1,51 @@ -BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_ao_num) ] +BEGIN_PROVIDER [ integer, cholesky_mo_num ] implicit none BEGIN_DOC - ! Cholesky vectors in MO basis + ! Number of Cholesky vectors in MO basis END_DOC - - integer :: k - - call set_multiple_levels_omp(.False.) - print *, 'AO->MO Transformation of Cholesky vectors' - !$OMP PARALLEL DO PRIVATE(k) - do k=1,cholesky_ao_num - call ao_to_mo(cholesky_ao(1,1,k),ao_num,cholesky_mo(1,1,k),mo_num) - enddo - !$OMP END PARALLEL DO - print *, '' - + cholesky_mo_num = cholesky_ao_num END_PROVIDER -BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_ao_num, mo_num, mo_num) ] +BEGIN_PROVIDER [ double precision, cholesky_mo, (mo_num, mo_num, cholesky_mo_num) ] implicit none BEGIN_DOC ! Cholesky vectors in MO basis END_DOC - integer :: i,j,k - double precision, allocatable :: buffer(:,:) - - print *, 'AO->MO Transformation of Cholesky vectors .' + integer :: k, i, j call set_multiple_levels_omp(.False.) - !$OMP PARALLEL PRIVATE(i,j,k,buffer) - allocate(buffer(mo_num,mo_num)) - !$OMP DO SCHEDULE(static) - do k=1,cholesky_ao_num - call ao_to_mo(cholesky_ao(1,1,k),ao_num,buffer,mo_num) + !$OMP PARALLEL DO PRIVATE(k) + do k=1,cholesky_mo_num do j=1,mo_num do i=1,mo_num - cholesky_mo_transp(k,i,j) = buffer(i,j) + cholesky_mo(i,j,k) = cholesky_mo_transp(k,i,j) enddo enddo enddo - !$OMP END DO - deallocate(buffer) - !$OMP END PARALLEL - print *, '' + !$OMP END PARALLEL DO + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, cholesky_mo_transp, (cholesky_mo_num, mo_num, mo_num) ] + implicit none + BEGIN_DOC + ! Cholesky vectors in MO basis + END_DOC + + double precision, allocatable :: X(:,:,:) + integer :: ierr + print *, 'AO->MO Transformation of Cholesky vectors' + + allocate(X(mo_num,cholesky_mo_num,ao_num), stat=ierr) + if (ierr /= 0) then + print *, irp_here, ': Allocation failed' + endif + call dgemm('T','N', ao_num*cholesky_mo_num, mo_num, ao_num, 1.d0, & + cholesky_ao, ao_num, mo_coef, ao_num, 0.d0, X, ao_num*cholesky_mo_num) + call dgemm('T','N', cholesky_mo_num*mo_num, mo_num, ao_num, 1.d0, & + X, ao_num, mo_coef, ao_num, 0.d0, cholesky_mo_transp, cholesky_mo_num*mo_num) + deallocate(X) END_PROVIDER diff --git a/src/mo_two_e_ints/integrals_3_index.irp.f b/src/mo_two_e_ints/integrals_3_index.irp.f index d807f619..eb05da84 100644 --- a/src/mo_two_e_ints/integrals_3_index.irp.f +++ b/src/mo_two_e_ints/integrals_3_index.irp.f @@ -13,14 +13,14 @@ if (do_ao_cholesky) then double precision, allocatable :: buffer_jj(:,:), buffer(:,:,:) - allocate(buffer_jj(cholesky_ao_num,mo_num), buffer(mo_num,mo_num,mo_num)) + allocate(buffer_jj(cholesky_mo_num,mo_num), buffer(mo_num,mo_num,mo_num)) do j=1,mo_num buffer_jj(:,j) = cholesky_mo_transp(:,j,j) enddo - call dgemm('T','N', mo_num*mo_num,mo_num,cholesky_ao_num, 1.d0, & - cholesky_mo_transp, cholesky_ao_num, & - buffer_jj, cholesky_ao_num, 0.d0, & + call dgemm('T','N', mo_num*mo_num,mo_num,cholesky_mo_num, 1.d0, & + cholesky_mo_transp, cholesky_mo_num, & + buffer_jj, cholesky_mo_num, 0.d0, & buffer, mo_num*mo_num) do k = 1, mo_num @@ -36,9 +36,9 @@ do j = 1, mo_num - call dgemm('T','N',mo_num,mo_num,cholesky_ao_num, 1.d0, & - cholesky_mo_transp(1,1,j), cholesky_ao_num, & - cholesky_mo_transp(1,1,j), cholesky_ao_num, 0.d0, & + call dgemm('T','N',mo_num,mo_num,cholesky_mo_num, 1.d0, & + cholesky_mo_transp(1,1,j), cholesky_mo_num, & + cholesky_mo_transp(1,1,j), cholesky_mo_num, 0.d0, & buffer_jj, mo_num) do k=1,mo_num diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index a461504e..0e77b6a2 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -37,7 +37,9 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ] call map_load_from_disk(trim(ezfio_filename)//'/work/mo_ints',mo_integrals_map) print*, 'MO integrals provided' return - else + endif + + if (.not. do_direct_integrals) then PROVIDE ao_two_e_integrals_in_map endif @@ -90,6 +92,10 @@ subroutine four_idx_dgemm double precision, allocatable :: a1(:,:,:,:) double precision, allocatable :: a2(:,:,:,:) + if (ao_num > 1289) then + print *, irp_here, ': Integer overflow in ao_num**3' + endif + allocate (a1(ao_num,ao_num,ao_num,ao_num)) print *, 'Getting AOs' @@ -103,6 +109,7 @@ subroutine four_idx_dgemm enddo !$OMP END PARALLEL DO + print *, '1st transformation' ! 1st transformation allocate (a2(ao_num,ao_num,ao_num,mo_num)) @@ -166,11 +173,9 @@ subroutine four_idx_dgemm deallocate (a1) + call map_sort(mo_integrals_map) call map_unique(mo_integrals_map) - integer*8 :: get_mo_map_size, mo_map_size - mo_map_size = get_mo_map_size() - end subroutine subroutine add_integrals_to_map(mask_ijkl) @@ -250,7 +255,7 @@ subroutine add_integrals_to_map(mask_ijkl) call wall_time(wall_1) - size_buffer = min(ao_num*ao_num*ao_num,8000000) + size_buffer = min(ao_num*ao_num,8000000) print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+& ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core' @@ -443,11 +448,6 @@ subroutine add_integrals_to_map(mask_ijkl) !$OMP END PARALLEL call map_merge(mo_integrals_map) - call wall_time(wall_2) - call cpu_time(cpu_2) - integer*8 :: get_mo_map_size, mo_map_size - mo_map_size = get_mo_map_size() - deallocate(list_ijkl) @@ -465,51 +465,53 @@ subroutine add_integrals_to_map_cholesky integer :: size_buffer, n_integrals size_buffer = min(mo_num*mo_num*mo_num,16000000) - double precision, allocatable :: Vtmp(:,:,:,:) + double precision, allocatable :: Vtmp(:,:,:) integer(key_kind) , allocatable :: buffer_i(:) real(integral_kind), allocatable :: buffer_value(:) - if (.True.) then - ! In-memory transformation + call set_multiple_levels_omp(.False.) - allocate (Vtmp(mo_num,mo_num,mo_num,mo_num)) + !$OMP PARALLEL DEFAULT(SHARED) & + !$OMP PRIVATE(i,j,k,l,n_integrals,buffer_value, buffer_i, Vtmp) + allocate (buffer_i(size_buffer), buffer_value(size_buffer)) + allocate (Vtmp(mo_num,mo_num,mo_num)) + n_integrals = 0 - call dgemm('N','T',mo_num*mo_num,mo_num*mo_num,cholesky_ao_num,1.d0, & - cholesky_mo, mo_num*mo_num, & - cholesky_mo, mo_num*mo_num, 0.d0, & + !$OMP DO SCHEDULE(dynamic) + do l=1,mo_num + call dgemm('T','N',mo_num*mo_num,mo_num,cholesky_mo_num,1.d0, & + cholesky_mo_transp, cholesky_mo_num, & + cholesky_mo_transp(1,1,l), cholesky_mo_num, 0.d0, & Vtmp, mo_num*mo_num) - !$OMP PARALLEL PRIVATE(i,j,k,l,n_integrals,buffer_value, buffer_i) - allocate (buffer_i(size_buffer), buffer_value(size_buffer)) - n_integrals = 0 - !$OMP DO - do l=1,mo_num - do k=1,l - do j=1,mo_num - do i=1,j - if (abs(Vtmp(i,j,k,l)) > mo_integrals_threshold) then - n_integrals += 1 - buffer_value(n_integrals) = Vtmp(i,j,k,l) - !DIR$ FORCEINLINE - call mo_two_e_integrals_index(i,k,j,l,buffer_i(n_integrals)) - if (n_integrals == size_buffer) then - call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals) - n_integrals = 0 - endif + do k=1,l + do j=1,mo_num + do i=1,j + if (dabs(Vtmp(i,j,k)) > mo_integrals_threshold) then + n_integrals = n_integrals + 1 + buffer_value(n_integrals) = Vtmp(i,j,k) + !DIR$ FORCEINLINE + call mo_two_e_integrals_index(i,k,j,l,buffer_i(n_integrals)) + if (n_integrals == size_buffer) then + call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals) + n_integrals = 0 endif - enddo + endif enddo enddo enddo - !$OMP END DO + enddo + !$OMP END DO NOWAIT + + if (n_integrals > 0) then call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals) - deallocate(buffer_i, buffer_value) - !$OMP END PARALLEL - - deallocate(Vtmp) - call map_unique(mo_integrals_map) - endif + deallocate(buffer_i, buffer_value, Vtmp) + !$OMP BARRIER + !$OMP END PARALLEL + + call map_sort(mo_integrals_map) + call map_unique(mo_integrals_map) end @@ -580,6 +582,9 @@ subroutine add_integrals_to_map_three_indices(mask_ijk) return endif + if (ao_num > 1289) then + print *, irp_here, ': Integer overflow in ao_num**3' + endif size_buffer = min(ao_num*ao_num*ao_num,16000000) print*, 'Providing the molecular integrals ' print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+& @@ -855,6 +860,9 @@ subroutine add_integrals_to_map_no_exit_34(mask_ijkl) call bitstring_to_list( mask_ijkl(1,3), list_ijkl(1,3), n_k, N_int ) call bitstring_to_list( mask_ijkl(1,4), list_ijkl(1,4), n_l, N_int ) + if (ao_num > 1289) then + print *, irp_here, ': Integer overflow in ao_num**3' + endif size_buffer = min(ao_num*ao_num*ao_num,16000000) print*, 'Providing the molecular integrals ' print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+& @@ -1350,16 +1358,29 @@ END_PROVIDER ! mo_two_e_integrals_jj_anti(i,j) = J_ij - K_ij END_DOC - integer :: i,j + integer :: i,j,k double precision :: get_two_e_integral if (do_ao_cholesky) then + double precision, allocatable :: buffer(:,:) + allocate (buffer(cholesky_mo_num,mo_num)) + do k=1,cholesky_mo_num + do i=1,mo_num + buffer(k,i) = cholesky_mo_transp(k,i,i) + enddo + enddo + call dgemm('T','N',mo_num,mo_num,cholesky_mo_num,1.d0, & + buffer, cholesky_mo_num, buffer, cholesky_mo_num, 0.d0, mo_two_e_integrals_jj, mo_num) + deallocate(buffer) + do j=1,mo_num do i=1,mo_num - !TODO: use dgemm - mo_two_e_integrals_jj(i,j) = sum(cholesky_mo_transp(:,i,i)*cholesky_mo_transp(:,j,j)) - mo_two_e_integrals_jj_exchange(i,j) = sum(cholesky_mo_transp(:,i,j)*cholesky_mo_transp(:,j,i)) + mo_two_e_integrals_jj_exchange(i,j) = 0.d0 + do k=1,cholesky_mo_num + mo_two_e_integrals_jj_exchange(i,j) = mo_two_e_integrals_jj_exchange(i,j) + & + cholesky_mo_transp(k,i,j)*cholesky_mo_transp(k,j,i) + enddo enddo enddo diff --git a/src/tc_bi_ortho/h_mat_triple.irp.f b/src/tc_bi_ortho/h_mat_triple.irp.f new file mode 100644 index 00000000..4c8c107a --- /dev/null +++ b/src/tc_bi_ortho/h_mat_triple.irp.f @@ -0,0 +1,391 @@ +subroutine get_excitation_general(key_i,key_j, Nint,degree_array,holes_array, particles_array,phase) + use bitmasks + BEGIN_DOC +! returns the array, for each spin, of holes/particles between key_i and key_j +! +! with the following convention: a^+_{particle} a_{hole}|key_i> = |key_j> + END_DOC + include 'utils/constants.include.F' + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: holes_array(100,2),particles_array(100,2),degree_array(2) + double precision, intent(out) :: phase + integer :: ispin,k,i,pos + integer(bit_kind) :: key_hole, key_particle + integer(bit_kind) :: xorvec(N_int_max,2) + holes_array = -1 + particles_array = -1 + degree_array = 0 + do i = 1, N_int + xorvec(i,1) = xor( key_i(i,1), key_j(i,1)) + xorvec(i,2) = xor( key_i(i,2), key_j(i,2)) + degree_array(1) += popcnt(xorvec(i,1)) + degree_array(2) += popcnt(xorvec(i,2)) + enddo + degree_array(1) = shiftr(degree_array(1),1) + degree_array(2) = shiftr(degree_array(2),1) + + do ispin = 1, 2 + k = 1 + !!! GETTING THE HOLES + do i = 1, N_int + key_hole = iand(xorvec(i,ispin),key_i(i,ispin)) + do while(key_hole .ne.0_bit_kind) + pos = trailz(key_hole) + holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_hole = ibclr(key_hole,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_excitation_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo + do ispin = 1, 2 + k = 1 + !!! GETTING THE PARTICLES + do i = 1, N_int + key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) + do while(key_particle .ne.0_bit_kind) + pos = trailz(key_particle) + particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_particle = ibclr(key_particle,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_excitation_general ' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo + integer :: h,p, i_ok + integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) + integer :: exc(0:2,2,2) + double precision :: phase_tmp + allocate(det_i(Nint,2),det_ip(N_int,2)) + det_i = key_i + phase = 1.d0 + do ispin = 1, 2 + do i = 1, degree_array(ispin) + h = holes_array(i,ispin) + p = particles_array(i,ispin) + det_ip = det_i + call do_single_excitation(det_ip,h,p,ispin,i_ok) + if(i_ok == -1)then + print*,'excitation was not possible ' + stop + endif + call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) + phase *= phase_tmp + det_i = det_ip + enddo + enddo + +end + +subroutine get_holes_general(key_i, key_j,Nint, holes_array) + use bitmasks + BEGIN_DOC +! returns the array, per spin, of holes between key_i and key_j +! +! with the following convention: a_{hole}|key_i> --> |key_j> + END_DOC + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: holes_array(100,2) + integer(bit_kind) :: key_hole + integer :: ispin,k,i,pos + holes_array = -1 + do ispin = 1, 2 + k = 1 + do i = 1, N_int + key_hole = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_i(i,ispin)) + do while(key_hole .ne.0_bit_kind) + pos = trailz(key_hole) + holes_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_hole = ibclr(key_hole,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_holes_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'stoping ...' + stop + endif + enddo + enddo + enddo +end + +subroutine get_particles_general(key_i, key_j,Nint,particles_array) + use bitmasks + BEGIN_DOC +! returns the array, per spin, of particles between key_i and key_j +! +! with the following convention: a^dagger_{particle}|key_i> --> |key_j> + END_DOC + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2) + integer, intent(out) :: particles_array(100,2) + integer(bit_kind) :: key_particle + integer :: ispin,k,i,pos + particles_array = -1 + do ispin = 1, 2 + k = 1 + do i = 1, N_int + key_particle = iand(xor(key_i(i,ispin),key_j(i,ispin)),key_j(i,ispin)) + do while(key_particle .ne.0_bit_kind) + pos = trailz(key_particle) + particles_array(k,ispin) = 1+ bit_kind_size * (i-1) + pos + key_particle = ibclr(key_particle,pos) + k += 1 + if(k .gt.100)then + print*,'WARNING in get_holes_general' + print*,'More than a 100-th excitation for spin ',ispin + print*,'Those are the two determinants' + call debug_det(key_i, N_int) + call debug_det(key_j, N_int) + print*,'stoping ...' + stop + endif + enddo + enddo + enddo +end + +subroutine get_phase_general(key_i,Nint,degree, holes_array, particles_array,phase) + implicit none + integer, intent(in) :: degree(2), Nint + integer(bit_kind), intent(in) :: key_i(Nint,2) + integer, intent(in) :: holes_array(100,2),particles_array(100,2) + double precision, intent(out) :: phase + integer :: i,ispin,h,p, i_ok + integer(bit_kind), allocatable :: det_i(:,:),det_ip(:,:) + integer :: exc(0:2,2,2) + double precision :: phase_tmp + allocate(det_i(Nint,2),det_ip(N_int,2)) + det_i = key_i + phase = 1.d0 + do ispin = 1, 2 + do i = 1, degree(ispin) + h = holes_array(i,ispin) + p = particles_array(i,ispin) + det_ip = det_i + call do_single_excitation(det_ip,h,p,ispin,i_ok) + if(i_ok == -1)then + print*,'excitation was not possible ' + stop + endif + call get_single_excitation(det_i,det_ip,exc,phase_tmp,Nint) + phase *= phase_tmp + det_i = det_ip + enddo + enddo + +end + +subroutine H_tc_s2_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze) + BEGIN_DOC + ! Computes $v_0 = H^TC | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + call H_tc_s2_u_0_opt(v_0, s_0, u_0, N_st, sze) + integer :: i,j,degree,ist + double precision :: hmono, htwoe, hthree, htot + do i = 1, N_det + do j = 1, N_det + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if(degree .ne. 3)cycle + call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,i), psi_det(1,1,j), hmono, htwoe, hthree, htot) + do ist = 1, N_st + v_0(i,ist) += htot * u_0(j,ist) + enddo + enddo + enddo +end + +subroutine H_tc_s2_u_0_with_pure_three_omp(v_0, s_0, u_0, N_st, sze) + BEGIN_DOC + ! Computes $v_0 = H^TC | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + call H_tc_s2_u_0_opt(v_0, s_0, u_0, N_st, sze) + integer :: i,j,degree,ist + double precision :: hmono, htwoe, hthree, htot + !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & + !$OMP SHARED(N_st, N_det, N_int, psi_det, u_0, v_0) & + !$OMP PRIVATE(ist, i, j, degree, hmono, htwoe, hthree,htot) + do i = 1, N_det + do j = 1, N_det + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if(degree .ne. 3)cycle + call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,i), psi_det(1,1,j), hmono, htwoe, hthree, htot) + do ist = 1, N_st + v_0(i,ist) += htot * u_0(j,ist) + enddo + enddo + enddo + !$OMP END PARALLEL DO +end + +! --- + +subroutine H_tc_s2_dagger_u_0_with_pure_three(v_0, s_0, u_0, N_st, sze) + BEGIN_DOC + ! Computes $v_0 = (H^TC)^dagger | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + call H_tc_s2_dagger_u_0_opt(v_0, s_0, u_0, N_st, sze) + integer :: i,j,degree,ist + double precision :: hmono, htwoe, hthree, htot + do i = 1, N_det + do j = 1, N_det + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if(degree .ne. 3)cycle + call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,j), psi_det(1,1,i), hmono, htwoe, hthree, htot) + do ist = 1, N_st + v_0(i,ist) += htot * u_0(j,ist) + enddo + enddo + enddo +end + +subroutine H_tc_s2_dagger_u_0_with_pure_three_omp(v_0, s_0, u_0, N_st, sze) + BEGIN_DOC + ! Computes $v_0 = (H^TC)^dagger | u_0\rangle$ WITH PURE TRIPLE EXCITATION TERMS + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + + use bitmasks + implicit none + + integer, intent(in) :: N_st,sze + double precision, intent(in) :: u_0(sze,N_st) + double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st) + call H_tc_s2_dagger_u_0_opt(v_0, s_0, u_0, N_st, sze) + integer :: i,j,degree,ist + double precision :: hmono, htwoe, hthree, htot + !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) & + !$OMP SHARED(N_st, N_det, N_int, psi_det, u_0, v_0) & + !$OMP PRIVATE(ist, i, j, degree, hmono, htwoe, hthree,htot) + do i = 1, N_det + do j = 1, N_det + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if(degree .ne. 3)cycle + call triple_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,j), psi_det(1,1,i), hmono, htwoe, hthree, htot) + do ist = 1, N_st + v_0(i,ist) += htot * u_0(j,ist) + enddo + enddo + enddo + !$OMP END PARALLEL DO +end + +! --- +subroutine triple_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) + use bitmasks + BEGIN_DOC +! for triple excitation +!! +!! WARNING !! +! +! Genuine triple excitations of the same spin are not yet implemented + END_DOC + implicit none + integer(bit_kind), intent(in) :: key_j(N_int,2),key_i(N_int,2) + integer, intent(in) :: Nint + double precision, intent(out) :: hmono, htwoe, hthree, htot + integer :: degree + integer :: h1, p1, h2, p2, s1, s2, h3, p3, s3 + integer :: holes_array(100,2),particles_array(100,2),degree_array(2) + double precision :: phase,sym_3_e_int_from_6_idx_tensor + + hmono = 0.d0 + htwoe = 0.d0 + hthree = 0.d0 + htot = 0.d0 + call get_excitation_general(key_j, key_i, Nint,degree_array,holes_array, particles_array,phase) + degree = degree_array(1) + degree_array(2) + if(degree .ne. 3)return + if(degree_array(1)==3.or.degree_array(2)==3)then + if(degree_array(1) == 3)then + h1 = holes_array(1,1) + h2 = holes_array(2,1) + h3 = holes_array(3,1) + p1 = particles_array(1,1) + p2 = particles_array(2,1) + p3 = particles_array(3,1) + else + h1 = holes_array(1,2) + h2 = holes_array(2,2) + h3 = holes_array(3,2) + p1 = particles_array(1,2) + p2 = particles_array(2,2) + p3 = particles_array(3,2) + endif + hthree = sym_3_e_int_from_6_idx_tensor(p3, p2, p1, h3, h2, h1) + else + if(degree_array(1) == 2.and.degree_array(2) == 1)then ! double alpha + single beta + h1 = holes_array(1,1) + h2 = holes_array(2,1) + h3 = holes_array(1,2) + p1 = particles_array(1,1) + p2 = particles_array(2,1) + p3 = particles_array(1,2) + else if(degree_array(2) == 2 .and. degree_array(1) == 1)then ! double beta + single alpha + h1 = holes_array(1,2) + h2 = holes_array(2,2) + h3 = holes_array(1,1) + p1 = particles_array(1,2) + p2 = particles_array(2,2) + p3 = particles_array(1,1) + else + print*,'PB !!' + stop + endif + hthree = three_body_ints_bi_ort(p3,p2,p1,h3,h2,h1) - three_body_ints_bi_ort(p3,p2,p1,h3,h1,h2) + endif + hthree *= phase + htot = hthree + end + diff --git a/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f b/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f index 1d1b26cc..e96e738e 100644 --- a/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f +++ b/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f @@ -90,3 +90,96 @@ subroutine htcdag_bi_ortho_calc_tdav_slow(v, u, N_st, sze) end +subroutine i_H_tc_psi_phi(key,keys,coef_l,coef_r,Nint,Ndet,Ndet_max,Nstate,chi_H_i_array,i_H_phi_array) + use bitmasks + implicit none + BEGIN_DOC +! Computes $\langle i|H|Phi \rangle = \sum_J c^R_J \langle i | H | J \rangle$. +! +! AND $\langle Chi|H| i \rangle = \sum_J c^L_J \langle J | H | i \rangle$. +! +! CONVENTION: i_H_phi_array(0) = total matrix element, +! +! i_H_phi_array(1) = one-electron matrix element, +! +! i_H_phi_array(2) = two-electron matrix element, +! +! i_H_phi_array(3) = three-electron matrix element, +! +! Uses filter_connected_i_H_psi0 to get all the $|J \rangle$ to which $|i \rangle$ +! is connected. +! +! The i_H_psi_minilist is much faster but requires to build the +! minilists. + END_DOC + integer, intent(in) :: Nint, Ndet,Ndet_max,Nstate + integer(bit_kind), intent(in) :: keys(Nint,2,Ndet) + integer(bit_kind), intent(in) :: key(Nint,2) + double precision, intent(in) :: coef_l(Ndet_max,Nstate),coef_r(Ndet_max,Nstate) + double precision, intent(out) :: chi_H_i_array(0:3,Nstate),i_H_phi_array(0:3,Nstate) + + integer :: i, ii,j + double precision :: phase + integer :: exc(0:2,2,2) + double precision :: hmono, htwoe, hthree, htot + integer, allocatable :: idx(:) + + ASSERT (Nint > 0) + ASSERT (N_int == Nint) + ASSERT (Nstate > 0) + ASSERT (Ndet > 0) + ASSERT (Ndet_max >= Ndet) + allocate(idx(0:Ndet)) + + chi_H_i_array = 0.d0 + i_H_phi_array = 0.d0 + + call filter_connected_i_H_psi0(keys,key,Nint,Ndet,idx) + if (Nstate == 1) then + + do ii=1,idx(0) + i = idx(ii) + ! computes + !DIR$ FORCEINLINE + call htilde_mu_mat_opt_bi_ortho(keys(1,1,i), key, Nint, hmono, htwoe, hthree, htot) + chi_H_i_array(0,1) = chi_H_i_array(0,1) + coef_l(i,1)*htot + chi_H_i_array(1,1) = chi_H_i_array(1,1) + coef_l(i,1)*hmono + chi_H_i_array(2,1) = chi_H_i_array(2,1) + coef_l(i,1)*htwoe + chi_H_i_array(3,1) = chi_H_i_array(3,1) + coef_l(i,1)*hthree + ! computes + !DIR$ FORCEINLINE + call htilde_mu_mat_opt_bi_ortho(key,keys(1,1,i), Nint, hmono, htwoe, hthree, htot) + i_H_phi_array(0,1) = i_H_phi_array(0,1) + coef_r(i,1)*htot + i_H_phi_array(1,1) = i_H_phi_array(1,1) + coef_r(i,1)*hmono + i_H_phi_array(2,1) = i_H_phi_array(2,1) + coef_r(i,1)*htwoe + i_H_phi_array(3,1) = i_H_phi_array(3,1) + coef_r(i,1)*hthree + enddo + + else + + do ii=1,idx(0) + i = idx(ii) + ! computes + !DIR$ FORCEINLINE + call htilde_mu_mat_opt_bi_ortho(keys(1,1,i), key, Nint, hmono, htwoe, hthree, htot) + do j = 1, Nstate + chi_H_i_array(0,j) = chi_H_i_array(0,j) + coef_l(i,j)*htot + chi_H_i_array(1,j) = chi_H_i_array(1,j) + coef_l(i,j)*hmono + chi_H_i_array(2,j) = chi_H_i_array(2,j) + coef_l(i,j)*htwoe + chi_H_i_array(3,j) = chi_H_i_array(3,j) + coef_l(i,j)*hthree + enddo + ! computes + !DIR$ FORCEINLINE + call htilde_mu_mat_opt_bi_ortho(key,keys(1,1,i), Nint, hmono, htwoe, hthree, htot) + do j = 1, Nstate + i_H_phi_array(0,j) = i_H_phi_array(0,j) + coef_r(i,j)*htot + i_H_phi_array(1,j) = i_H_phi_array(1,j) + coef_r(i,j)*hmono + i_H_phi_array(2,j) = i_H_phi_array(2,j) + coef_r(i,j)*htwoe + i_H_phi_array(3,j) = i_H_phi_array(3,j) + coef_r(i,j)*hthree + enddo + enddo + + endif + +end + diff --git a/src/tc_bi_ortho/normal_ordered_old.irp.f b/src/tc_bi_ortho/normal_ordered_old.irp.f index e5e63355..f40805a9 100644 --- a/src/tc_bi_ortho/normal_ordered_old.irp.f +++ b/src/tc_bi_ortho/normal_ordered_old.irp.f @@ -120,6 +120,13 @@ END_PROVIDER subroutine give_aba_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) use bitmasks ! you need to include the bitmasks_module.f90 features + BEGIN_DOC +! give the contribution for a double excitation of opposite spin BUT averaged over spin +! +! it is the average of and +! +! because the orbitals h1,h2,p1,p2 are spatial orbitals and therefore can be of different spins + END_DOC implicit none integer, intent(in) :: Nint, h1, h2, p1, p2 @@ -158,7 +165,8 @@ subroutine give_aba_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree) call give_integrals_3_body_bi_ort(p2, i, p1, i, h2, h1, integral) int_exc_12 = -1.d0 * integral - hthree += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) + hthree += 1.d0 * int_direct - 0.5d0 * (int_exc_13 + int_exc_12) ! spin average +! hthree += 1.d0 * int_direct - 1.0d0 * (int_exc_13 + int_exc_12) enddo return diff --git a/src/tc_bi_ortho/print_tc_dump.irp.f b/src/tc_bi_ortho/print_tc_dump.irp.f index 868de444..37dfe051 100644 --- a/src/tc_bi_ortho/print_tc_dump.irp.f +++ b/src/tc_bi_ortho/print_tc_dump.irp.f @@ -62,7 +62,7 @@ subroutine KMat_tilde_dump() do j = 1, mo_num do i = 1, mo_num ! TCHint convention - write(33, '(E15.7, 4X, 4(I4, 2X))') mo_bi_ortho_tc_two_e_chemist(j,i,l,k), i, j, k, l + write(33, '(ES15.7, 4X, 4(I4, 2X))') mo_bi_ortho_tc_two_e_chemist(j,i,l,k), i, j, k, l enddo enddo enddo @@ -71,7 +71,7 @@ subroutine KMat_tilde_dump() do j = 1, mo_num do i = 1, mo_num ! TCHint convention - write(33, '(E15.7, 4X, 4(I4, 2X))') mo_bi_ortho_tc_one_e(i,j), i, j, 0, 0 + write(33, '(ES15.7, 4X, 4(I4, 2X))') mo_bi_ortho_tc_one_e(i,j), i, j, 0, 0 enddo enddo @@ -128,7 +128,7 @@ subroutine ERI_dump() do k = 1, mo_num do j = 1, mo_num do i = 1, mo_num - write(33, '(4(I4, 2X), 4X, E15.7)') i, j, k, l, a1(i,j,k,l) + write(33, '(4(I4, 2X), 4X, ES15.7)') i, j, k, l, a1(i,j,k,l) enddo enddo enddo @@ -167,8 +167,8 @@ subroutine LMat_tilde_dump() !write(33, '(6(I4, 2X), 4X, E15.7)') i, j, k, l, m, n, integral ! TCHint convention if(dabs(integral).gt.1d-10) then - write(33, '(E15.7, 4X, 6(I4, 2X))') -integral/3.d0, i, j, k, l, m, n - !write(33, '(E15.7, 4X, 6(I4, 2X))') -integral/3.d0, l, m, n, i, j, k + write(33, '(ES15.7, 4X, 6(I4, 2X))') -integral/3.d0, i, j, k, l, m, n + !write(33, '(ES15.7, 4X, 6(I4, 2X))') -integral/3.d0, l, m, n, i, j, k endif enddo enddo diff --git a/src/tc_bi_ortho/slater_tc_3e_slow.irp.f b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f index 35abbbc4..cb33d343 100644 --- a/src/tc_bi_ortho/slater_tc_3e_slow.irp.f +++ b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f @@ -184,7 +184,7 @@ subroutine single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree) ii = occ(i,s1) do j = i+1, Ne(s1) jj = occ(j,s1) -! ref = sym_3_e_int_from_6_idx_tensor(jj,ii,p1,jj,ii,h1) +! !ref = sym_3_e_int_from_6_idx_tensor(jj,ii,p1,jj,ii,h1) hthree += three_e_single_parrallel_spin(jj,ii,p1,h1) ! USES THE 4-IDX TENSOR enddo enddo diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index f12b83e3..c3a5ba6b 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -19,6 +19,9 @@ subroutine provide_all_three_ints_bi_ortho() if(three_e_4_idx_term) then PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort endif + if(pure_three_body_h_tc)then + provide three_body_ints_bi_ort + endif if(.not. double_normal_ord .and. three_e_5_idx_term) then PROVIDE three_e_5_idx_direct_bi_ort @@ -87,14 +90,26 @@ subroutine htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, hthree = 0.d0 call get_excitation_degree(key_i, key_j, degree, Nint) - if(degree.gt.2) return - - if(degree == 0) then - call diag_htilde_mu_mat_fock_bi_ortho (Nint, key_i, hmono, htwoe, hthree, htot) - else if (degree == 1) then - call single_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i , hmono, htwoe, hthree, htot) - else if(degree == 2) then - call double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) + if(.not.pure_three_body_h_tc)then + if(degree.gt.2) return + if(degree == 0) then + call diag_htilde_mu_mat_fock_bi_ortho (Nint, key_i, hmono, htwoe, hthree, htot) + else if (degree == 1) then + call single_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i , hmono, htwoe, hthree, htot) + else if(degree == 2) then + call double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) + endif + else + if(degree.gt.3) return + if(degree == 0) then + call diag_htilde_mu_mat_fock_bi_ortho (Nint, key_i, hmono, htwoe, hthree, htot) + else if (degree == 1) then + call single_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i , hmono, htwoe, hthree, htot) + else if(degree == 2) then + call double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) + else + call triple_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) + endif endif if(degree==0) then diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index f027c38f..a9e22e03 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -225,6 +225,8 @@ end external H_tc_dagger_u_0_opt external H_tc_s2_dagger_u_0_opt external H_tc_s2_u_0_opt + external H_tc_s2_dagger_u_0_with_pure_three_omp + external H_tc_s2_u_0_with_pure_three_omp allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag)) @@ -250,7 +252,11 @@ end converged = .False. i_it = 0 do while (.not.converged) - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + if(.not.pure_three_body_h_tc)then + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + else + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_with_pure_three_omp) + endif i_it += 1 if(i_it .gt. 5) exit enddo @@ -275,7 +281,11 @@ end converged = .False. i_it = 0 do while (.not. converged) - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_opt) + if(.not.pure_three_body_h_tc)then + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_opt) + else + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_with_pure_three_omp) + endif i_it += 1 if(i_it .gt. 5) exit enddo @@ -328,6 +338,11 @@ end TOUCH psi_r_coef_bi_ortho call ezfio_set_tc_bi_ortho_psi_r_coef_bi_ortho(buffer) deallocate(buffer) +! print*,'After diag' +! do i = 1, N_det! old version +! print*,'i',i,psi_l_coef_bi_ortho(i,1),psi_r_coef_bi_ortho(i,1) +! call debug_det(psi_det(1,1,i),N_int) +! enddo END_PROVIDER diff --git a/src/tc_bi_ortho/tc_natorb.irp.f b/src/tc_bi_ortho/tc_natorb.irp.f index b7e5ae81..a72d356a 100644 --- a/src/tc_bi_ortho/tc_natorb.irp.f +++ b/src/tc_bi_ortho/tc_natorb.irp.f @@ -23,7 +23,7 @@ dm_tmp(1:mo_num,1:mo_num) = -tc_transition_matrix_mo(1:mo_num,1:mo_num,1,1) - print *, ' dm_tmp' + print *, ' Transition density matrix ' do i = 1, mo_num fock_diag(i) = fock_matrix_tc_mo_tot(i,i) write(*, '(100(F16.10,X))') -dm_tmp(:,i) @@ -32,12 +32,17 @@ thr_d = 1.d-6 thr_nd = 1.d-6 thr_deg = 1.d-3 - call diag_mat_per_fock_degen( fock_diag, dm_tmp, mo_num, thr_d, thr_nd, thr_deg & - , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) -! call non_hrmt_bieig( mo_num, dm_tmp& -! , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo& -! , mo_num, natorb_tc_eigval ) +! if(n_core_orb.ne.0)then +! call diag_mat_per_fock_degen_core( fock_diag, dm_tmp, list_core, n_core_orb, mo_num, thr_d, thr_nd, thr_deg & +! , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) +! else +! call diag_mat_per_fock_degen( fock_diag, dm_tmp, mo_num, thr_d, thr_nd, thr_deg & +! , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) +! endif + call non_hrmt_bieig(mo_num, dm_tmp, thresh_biorthog_diag, thresh_biorthog_nondiag & + , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo & + , mo_num, natorb_tc_eigval ) accu = 0.d0 do i = 1, mo_num print*,'natorb_tc_eigval(i) = ',-natorb_tc_eigval(i) diff --git a/src/tc_bi_ortho/tc_prop.irp.f b/src/tc_bi_ortho/tc_prop.irp.f index 5bb0e2c0..a13dc9a2 100644 --- a/src/tc_bi_ortho/tc_prop.irp.f +++ b/src/tc_bi_ortho/tc_prop.irp.f @@ -29,7 +29,7 @@ tc_transition_matrix_mo_alpha(m,m,istate,jstate)+= psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate) enddo do p = 1, n_occ_ab(2) ! browsing the beta electrons - m = occ(p,1) + m = occ(p,2) tc_transition_matrix_mo_beta(m,m,istate,jstate)+= psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate) enddo else @@ -38,12 +38,14 @@ ! Single alpha h = exc(1,1,1) ! hole in psi_det(1,1,j) p = exc(1,2,1) ! particle in psi_det(1,1,j) - tc_transition_matrix_mo_alpha(p,h,istate,jstate)+= phase * psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate) + tc_transition_matrix_mo_alpha(p,h,istate,jstate)+= & + phase * psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate) else ! Single beta h = exc(1,1,2) ! hole in psi_det(1,1,j) p = exc(1,2,2) ! particle in psi_det(1,1,j) - tc_transition_matrix_mo_beta(p,h,istate,jstate)+= phase * psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate) + tc_transition_matrix_mo_beta(p,h,istate,jstate)+= & + phase * psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate) endif endif enddo diff --git a/src/tc_bi_ortho/test_normal_order.irp.f b/src/tc_bi_ortho/test_normal_order.irp.f index e805eecb..0cf27396 100644 --- a/src/tc_bi_ortho/test_normal_order.irp.f +++ b/src/tc_bi_ortho/test_normal_order.irp.f @@ -33,7 +33,7 @@ subroutine test integer :: h1,h2,p1,p2,s1,s2,i_ok,degree,Ne(2) integer :: exc(0:2,2,2) integer(bit_kind), allocatable :: det_i(:,:) - double precision :: hmono,htwoe,hthree,htilde_ij,accu,phase,normal + double precision :: hmono,htwoe,hthree,htilde_ij,accu,phase,normal,hthree_tmp integer, allocatable :: occ(:,:) allocate( occ(N_int*bit_kind_size,2) ) call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int) @@ -45,15 +45,44 @@ subroutine test do p1 = elec_alpha_num+1, mo_num do h2 = 1, elec_beta_num do p2 = elec_beta_num+1, mo_num + hthree = 0.d0 + det_i = ref_bitmask + s1 = 1 + s2 = 2 call do_single_excitation(det_i,h1,p1,s1,i_ok) + if(i_ok.ne.1)cycle call do_single_excitation(det_i,h2,p2,s2,i_ok) - call htilde_mu_mat_bi_ortho_slow(det_i,HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij) + if(i_ok.ne.1)cycle + call htilde_mu_mat_bi_ortho_slow(det_i,HF_bitmask,N_int,hmono,htwoe,hthree_tmp,htilde_ij) call get_excitation_degree(ref_bitmask,det_i,degree,N_int) call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) - hthree *= phase -! !normal = normal_two_body_bi_orth_ab(p2,h2,p1,h1) - call three_comp_two_e_elem(det_i,h1,h2,p1,p2,s1,s2,normal) + hthree_tmp *= phase + hthree += 0.5d0 * hthree_tmp + det_i = ref_bitmask + s1 = 2 + s2 = 1 + call do_single_excitation(det_i,h1,p1,s1,i_ok) + if(i_ok.ne.1)cycle + call do_single_excitation(det_i,h2,p2,s2,i_ok) + if(i_ok.ne.1)cycle + call htilde_mu_mat_bi_ortho_slow(det_i,HF_bitmask,N_int,hmono,htwoe,hthree_tmp,htilde_ij) + call get_excitation_degree(ref_bitmask,det_i,degree,N_int) + call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int) + hthree_tmp *= phase + hthree += 0.5d0 * hthree_tmp + + +! normal = normal_two_body_bi_orth_ab(p2,h2,p1,h1) + call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, normal) + if(dabs(hthree).lt.1.d-10)cycle + if(dabs(hthree-normal).gt.1.d-10)then +! print*,pp2,pp1,hh2,hh1 + print*,p2,p1,h2,h1 + print*,hthree,normal,dabs(hthree-normal) + stop + endif +! call three_comp_two_e_elem(det_i,h1,h2,p1,p2,s1,s2,normal) ! normal = eff_2_e_from_3_e_ab(p2,p1,h2,h1) accu += dabs(hthree-normal) enddo @@ -86,8 +115,8 @@ do h1 = 1, elec_alpha_num integer :: hh1, pp1, hh2, pp2, ss1, ss2 call decode_exc(exc, 2, hh1, pp1, hh2, pp2, ss1, ss2) hthree *= phase -! normal = normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1) - normal = eff_2_e_from_3_e_aa(p2,p1,h2,h1) + normal = normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1) +! normal = eff_2_e_from_3_e_aa(p2,p1,h2,h1) if(dabs(hthree).lt.1.d-10)cycle if(dabs(hthree-normal).gt.1.d-10)then print*,pp2,pp1,hh2,hh1 diff --git a/src/tc_bi_ortho/test_s2_tc.irp.f b/src/tc_bi_ortho/test_s2_tc.irp.f index b398507a..7c70b119 100644 --- a/src/tc_bi_ortho/test_s2_tc.irp.f +++ b/src/tc_bi_ortho/test_s2_tc.irp.f @@ -14,12 +14,14 @@ program test_tc read_wf = .True. touch read_wf - call routine_test_s2 - call routine_test_s2_davidson + call provide_all_three_ints_bi_ortho() + call routine_h_triple_left + call routine_h_triple_right +! call routine_test_s2_davidson end -subroutine routine_test_s2 +subroutine routine_h_triple_right implicit none logical :: do_right integer :: sze ,i, N_st, j @@ -29,67 +31,65 @@ subroutine routine_test_s2 sze = N_det N_st = 1 allocate(v_0_ref(N_det,1),u_0(N_det,1),s_0_ref(N_det,1),s_0_new(N_det,1),v_0_new(N_det,1)) - print*,'Checking first the Left ' - do_right = .False. - do i = 1, sze - u_0(i,1) = psi_l_coef_bi_ortho(i,1) - enddo - call H_tc_u_0_nstates_openmp(v_0_ref,u_0,N_st,sze, do_right) - s_0_ref = 0.d0 - do i = 1, sze - do j = 1, sze - call get_s2(psi_det(1,1,i),psi_det(1,1,j),N_int,sij) - s_0_ref(i,1) += u_0(j,1) * sij - enddo - enddo - call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,u_0,N_st,sze, do_right) - accu_e = 0.d0 - accu_s = 0.d0 - accu_e_0 = 0.d0 - accu_s_0 = 0.d0 - do i = 1, sze - accu_e_0 += v_0_ref(i,1) * psi_r_coef_bi_ortho(i,1) - accu_s_0 += s_0_ref(i,1) * psi_r_coef_bi_ortho(i,1) - accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) - accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) - enddo - print*,'accu_e = ',accu_e - print*,'accu_s = ',accu_s - print*,'accu_e_0 = ',accu_e_0 - print*,'accu_s_0 = ',accu_s_0 - - print*,'Checking then the right ' - do_right = .True. + print*,'Checking first the Right ' do i = 1, sze u_0(i,1) = psi_r_coef_bi_ortho(i,1) enddo - call H_tc_u_0_nstates_openmp(v_0_ref,u_0,N_st,sze, do_right) - s_0_ref = 0.d0 - do i = 1, sze - do j = 1, sze - call get_s2(psi_det(1,1,i),psi_det(1,1,j),N_int,sij) - s_0_ref(i,1) += u_0(j,1) * sij - enddo - enddo - call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,u_0,N_st,sze, do_right) + double precision :: wall0,wall1 + call wall_time(wall0) + call H_tc_s2_u_0_with_pure_three_omp(v_0_ref,s_0_ref, u_0,N_st,sze) + call wall_time(wall1) + print*,'time for omp',wall1 - wall0 + call wall_time(wall0) + call H_tc_s2_u_0_with_pure_three(v_0_new, s_0_new, u_0, N_st, sze) + call wall_time(wall1) + print*,'time serial ',wall1 - wall0 accu_e = 0.d0 accu_s = 0.d0 - accu_e_0 = 0.d0 - accu_s_0 = 0.d0 do i = 1, sze - accu_e_0 += v_0_ref(i,1) * psi_l_coef_bi_ortho(i,1) - accu_s_0 += s_0_ref(i,1) * psi_l_coef_bi_ortho(i,1) accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) enddo print*,'accu_e = ',accu_e print*,'accu_s = ',accu_s - print*,'accu_e_0 = ',accu_e_0 - print*,'accu_s_0 = ',accu_s_0 - end +subroutine routine_h_triple_left + implicit none + logical :: do_right + integer :: sze ,i, N_st, j + double precision :: sij, accu_e, accu_s, accu_e_0, accu_s_0 + double precision, allocatable :: v_0_ref(:,:),u_0(:,:),s_0_ref(:,:) + double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) + sze = N_det + N_st = 1 + allocate(v_0_ref(N_det,1),u_0(N_det,1),s_0_ref(N_det,1),s_0_new(N_det,1),v_0_new(N_det,1)) + print*,'Checking the Left ' + do i = 1, sze + u_0(i,1) = psi_l_coef_bi_ortho(i,1) + enddo + double precision :: wall0,wall1 + call wall_time(wall0) + call H_tc_s2_dagger_u_0_with_pure_three_omp(v_0_ref,s_0_ref, u_0,N_st,sze) + call wall_time(wall1) + print*,'time for omp',wall1 - wall0 + call wall_time(wall0) + call H_tc_s2_dagger_u_0_with_pure_three(v_0_new, s_0_new, u_0, N_st, sze) + call wall_time(wall1) + print*,'time serial ',wall1 - wall0 + accu_e = 0.d0 + accu_s = 0.d0 + do i = 1, sze + accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) + accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) + enddo + print*,'accu_e = ',accu_e + print*,'accu_s = ',accu_s + +end + + subroutine routine_test_s2_davidson implicit none double precision, allocatable :: H_jj(:),vec_tmp(:,:), energies(:) , s2(:) diff --git a/src/tc_bi_ortho/test_tc_fock.irp.f b/src/tc_bi_ortho/test_tc_fock.irp.f index 182c03d7..f1a7cc0a 100644 --- a/src/tc_bi_ortho/test_tc_fock.irp.f +++ b/src/tc_bi_ortho/test_tc_fock.irp.f @@ -152,9 +152,7 @@ subroutine routine_tot() ! do i = 1, elec_num_tab(s1) ! do a = elec_num_tab(s1)+1, mo_num ! virtual do i = 1, elec_beta_num - do a = elec_beta_num+1, elec_alpha_num! virtual -! do i = elec_beta_num+1, elec_alpha_num -! do a = elec_alpha_num+1, mo_num! virtual + do a = elec_beta_num+1, mo_num! virtual print*,i,a det_i = ref_bitmask @@ -167,7 +165,7 @@ subroutine routine_tot() call htilde_mu_mat_bi_ortho_slow(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij) print*,htilde_ij - if(dabs(htilde_ij).lt.1.d-10)cycle +! if(dabs(htilde_ij).lt.1.d-10)cycle print*, ' excited det' call debug_det(det_i, N_int) @@ -184,9 +182,12 @@ subroutine routine_tot() ! endif err_ai = dabs(dabs(ref) - dabs(new)) if(err_ai .gt. 1d-7) then + print*,'---------' print*,'s1 = ',s1 print*, ' warning on', i, a print*, ref,new,err_ai + print*,hmono, htwoe, hthree + print*,'---------' endif print*, ref,new,err_ai err_tot += err_ai diff --git a/src/tc_bi_ortho/test_tc_two_rdm.irp.f b/src/tc_bi_ortho/test_tc_two_rdm.irp.f new file mode 100644 index 00000000..68b96f37 --- /dev/null +++ b/src/tc_bi_ortho/test_tc_two_rdm.irp.f @@ -0,0 +1,67 @@ +program test_tc_rdm + + BEGIN_DOC + ! + ! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together + ! with the energy. Saves the left-right wave functions at the end. + ! + END_DOC + + my_grid_becke = .True. + PROVIDE tc_grid1_a tc_grid1_r + my_n_pt_r_grid = tc_grid1_r + my_n_pt_a_grid = tc_grid1_a + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + read_wf = .True. + touch read_wf + + print*, ' nb of states = ', N_states + print*, ' nb of det = ', N_det + + call test() + +end + +subroutine test + implicit none + integer :: h1,p1,h2,p2,i,j,istate,s1,s2 + double precision :: rdm, integral, accu,ref, accu_new ,rdm_new + double precision :: hmono, htwoe, hthree, htot + accu = 0.d0 + accu_new = 0.d0 + do h1 = 1, mo_num + do p1 = 1, mo_num + do h2 = 1, mo_num + do p2 = 1, mo_num + integral = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) + rdm = tc_two_rdm(p2,p1,h2,h1) + accu += integral * rdm + rdm_new = 0.d0 + do s2 = 1, 2 + do s1 = 1, 2 + rdm_new += tc_two_rdm_s1s2(p2,p1,h2,h1,s1,s2) + enddo + enddo + accu_new += integral * rdm_new + enddo + enddo + enddo + enddo + accu *= 0.5d0 + accu_new *= 0.5d0 + print*,'accu = ',accu + print*,'accu_new = ',accu_new + ref = 0.d0 + do i = 1, N_det + do j = 1, N_det + call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot) + do istate = 1,N_states + ref += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * state_average_weight(istate) * htwoe + enddo + enddo + enddo + print*,' ref = ',ref + print*,'delta= ',ref-accu + +end diff --git a/src/tc_bi_ortho/two_rdm_naive.irp.f b/src/tc_bi_ortho/two_rdm_naive.irp.f new file mode 100644 index 00000000..90163de5 --- /dev/null +++ b/src/tc_bi_ortho/two_rdm_naive.irp.f @@ -0,0 +1,166 @@ + BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist, (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, tc_two_rdm_chemist_s1s2, (mo_num, mo_num, mo_num, mo_num, 2,2)] + implicit none + BEGIN_DOC + ! tc_two_rdm_chemist(p,s,q,r) = = CHEMIST NOTATION + END_DOC + integer :: i,j,istate,m,mm,nn + integer :: exc(0:2,2,2) + double precision :: phase + double precision :: contrib + integer :: h1,p1,s1,h2,p2,s2,degree + integer, allocatable :: occ(:,:) + integer :: n_occ_ab(2),other_spin(2) + other_spin(1) = 2 + other_spin(2) = 1 + allocate(occ(N_int*bit_kind_size,2)) + tc_two_rdm_chemist = 0.d0 + tc_two_rdm_chemist_s1s2 = 0.d0 + + do i = 1, N_det ! psi_left + do j = 1, N_det ! psi_right + call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int) + if(degree.gt.2)cycle + if(degree.gt.0)then + ! get excitation operators: from psi_det(j) --> psi_det(i) + ! T_{j-->i} = a^p1_s1 a_h1_s1 + call get_excitation(psi_det(1,1,j),psi_det(1,1,i),exc,degree,phase,N_int) + call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2) + contrib = psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * phase * state_average_weight(1) + do istate = 2, N_states + contrib += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * phase * state_average_weight(istate) + enddo + if(degree == 2)then + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) + else if(degree==1)then + ! occupation of the determinant psi_det(j) + call bitstring_to_list_ab(psi_det(1,1,j), occ, n_occ_ab, N_int) + + ! run over the electrons of opposite spin than the excitation + s2 = other_spin(s1) + do mm = 1, n_occ_ab(s2) + m = occ(mm,s2) + h2 = m + p2 = m + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) + enddo + ! run over the electrons of same spin than the excitation + s2 = s1 + do mm = 1, n_occ_ab(s2) + m = occ(mm,s2) + h2 = m + p2 = m + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) + enddo + endif + else if(degree == 0)then +! cycle + contrib = psi_l_coef_bi_ortho(i,1) * psi_r_coef_bi_ortho(j,1) * state_average_weight(1) + do istate = 2, N_states + contrib += psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,istate) * state_average_weight(istate) + enddo + ! occupation of the determinant psi_det(j) + call bitstring_to_list_ab(psi_det(1,1,j), occ, n_occ_ab, N_int) + s1 = 1 ! alpha electrons + do nn = 1, n_occ_ab(s1) + h1 = occ(nn,s1) + p1 = occ(nn,s1) + ! run over the couple of alpha-beta electrons + s2 = other_spin(s1) + do mm = 1, n_occ_ab(s2) + m = occ(mm,s2) + h2 = m + p2 = m + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) + enddo + ! run over the couple of alpha-alpha electrons + s2 = s1 + do mm = 1, n_occ_ab(s2) + m = occ(mm,s2) + h2 = m + p2 = m + if(h2.le.h1)cycle + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) + enddo + enddo + s1 = 2 + do nn = 1, n_occ_ab(s1) + h1 = occ(nn,s1) + p1 = occ(nn,s1) + ! run over the couple of beta-beta electrons + s2 = s1 + do mm = 1, n_occ_ab(s2) + m = occ(mm,s2) + h2 = m + p2 = m + if(h2.le.h1)cycle + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist,mo_num,contrib) + call update_tc_rdm(h1,p1,h2,p2,s1,s2,tc_two_rdm_chemist_s1s2(1,1,1,1,s1,s2) ,mo_num,contrib) + enddo + enddo + endif + enddo + enddo + +END_PROVIDER + +subroutine update_tc_rdm(h1,p1,h2,p2,s1,s2,array,sze,contrib) + implicit none + integer, intent(in) :: h1,p1,h2,p2,s1,s2,sze + double precision, intent(in) :: contrib + double precision, intent(inout) :: array(sze, sze, sze, sze) + integer :: istate + if(s1.ne.s2)then + array(p1,h1,p2,h2) += contrib + ! permutation for particle symmetry + array(p2,h2,p1,h1) += contrib + else ! same spin double excitation + array(p1,h1,p2,h2) += contrib + ! exchange + ! exchanging the particles + array(p2,h1,p1,h2) -= contrib + ! exchanging the + array(p1,h2,p2,h1) -= contrib + ! permutation for particle symmetry + array(p2,h2,p1,h1) += contrib + endif + +end + + + BEGIN_PROVIDER [ double precision, tc_two_rdm, (mo_num, mo_num, mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, tc_two_rdm_s1s2, (mo_num, mo_num, mo_num, mo_num,2,2)] + implicit none + BEGIN_DOC + ! tc_two_rdm(p,q,s,r) = = PHYSICIST NOTATION + END_DOC + integer :: p,q,r,s,s1,s2 + do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + do p = 1, mo_num + tc_two_rdm(p,q,s,r) = tc_two_rdm_chemist(p,s,q,r) + enddo + enddo + enddo + enddo + do s2 = 1, 2 + do s1 = 1, 2 + do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + do p = 1, mo_num + tc_two_rdm_s1s2(p,q,s,r,s1,s2) = tc_two_rdm_chemist_s1s2(p,s,q,r,s1,s2) + enddo + enddo + enddo + enddo + enddo + enddo + +END_PROVIDER diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index fee492b4..fe396381 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -220,6 +220,12 @@ doc: Threshold to determine if diagonal elements of the bi-orthogonal condition interface: ezfio,provider,ocaml default: 1.e-6 +[thresh_lr_angle] +type: double precision +doc: Maximum value of the angle between the couple of left and right orbital for the rotations +interface: ezfio,provider,ocaml +default: 20.0 + [thresh_biorthog_nondiag] type: Threshold doc: Threshold to determine if non-diagonal elements of L.T x R are close enouph to 0 diff --git a/src/tc_scf/molden_lr_mos.irp.f b/src/tc_scf/molden_lr_mos.irp.f index b86009ee..98c7b230 100644 --- a/src/tc_scf/molden_lr_mos.irp.f +++ b/src/tc_scf/molden_lr_mos.irp.f @@ -72,7 +72,7 @@ subroutine molden_lr write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00' do k = 1, ao_prim_num(i_ao) i_prim +=1 - write(i_unit_output,'(E20.10,2X,E20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) + write(i_unit_output,'(ES20.10,2X,ES20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) enddo l = i_ao do while ( ao_l(l) == ao_l(i_ao) ) @@ -170,7 +170,7 @@ subroutine molden_lr write (i_unit_output,*) 'Spin= Alpha' write (i_unit_output,*) 'Occup=', mo_occ(i) do j=1,ao_num - write(i_unit_output, '(I6,2X,E20.10)') j, mo_r_coef(iorder(j),i) + write(i_unit_output, '(I6,2X,ES20.10)') j, mo_r_coef(iorder(j),i) enddo write (i_unit_output,*) 'Sym= 1' @@ -178,7 +178,7 @@ subroutine molden_lr write (i_unit_output,*) 'Spin= Alpha' write (i_unit_output,*) 'Occup=', mo_occ(i) do j=1,ao_num - write(i_unit_output, '(I6,2X,E20.10)') j, mo_l_coef(iorder(j),i) + write(i_unit_output, '(I6,2X,ES20.10)') j, mo_l_coef(iorder(j),i) enddo enddo close(i_unit_output) @@ -235,7 +235,7 @@ subroutine molden_l() write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00' do k = 1, ao_prim_num(i_ao) i_prim +=1 - write(i_unit_output,'(E20.10,2X,E20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) + write(i_unit_output,'(ES20.10,2X,ES20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) enddo l = i_ao do while ( ao_l(l) == ao_l(i_ao) ) @@ -333,7 +333,7 @@ subroutine molden_l() write (i_unit_output,*) 'Spin= Alpha' write (i_unit_output,*) 'Occup=', mo_occ(i) do j=1,ao_num - write(i_unit_output, '(I6,2X,E20.10)') j, mo_l_coef(iorder(j),i) + write(i_unit_output, '(I6,2X,ES20.10)') j, mo_l_coef(iorder(j),i) enddo enddo close(i_unit_output) @@ -390,7 +390,7 @@ subroutine molden_r() write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00' do k = 1, ao_prim_num(i_ao) i_prim +=1 - write(i_unit_output,'(E20.10,2X,E20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) + write(i_unit_output,'(ES20.10,2X,ES20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) enddo l = i_ao do while ( ao_l(l) == ao_l(i_ao) ) @@ -488,7 +488,7 @@ subroutine molden_r() write (i_unit_output,*) 'Spin= Alpha' write (i_unit_output,*) 'Occup=', mo_occ(i) do j=1,ao_num - write(i_unit_output, '(I6,2X,E20.10)') j, mo_r_coef(iorder(j),i) + write(i_unit_output, '(I6,2X,ES20.10)') j, mo_r_coef(iorder(j),i) enddo enddo close(i_unit_output) diff --git a/src/tc_scf/routines_rotates.irp.f b/src/tc_scf/routines_rotates.irp.f index 755c35b9..588382b5 100644 --- a/src/tc_scf/routines_rotates.irp.f +++ b/src/tc_scf/routines_rotates.irp.f @@ -140,7 +140,11 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) ! compute the overlap between the left and rescaled right call build_s_matrix(ao_num, mo_num, mo_r_coef_new, mo_r_coef_new, ao_overlap, s_mat) ! call give_degen(fock_diag,mo_num,thr_deg,list_degen,n_degen_list) - call give_degen_full_list(fock_diag, mo_num, thr_deg, list_degen, n_degen_list) + if(n_core_orb.ne.0)then + call give_degen_full_listcore(fock_diag, mo_num, list_core, n_core_orb, thr_deg, list_degen, n_degen_list) + else + call give_degen_full_list(fock_diag, mo_num, thr_deg, list_degen, n_degen_list) + endif print *, ' fock_matrix_mo' do i = 1, mo_num print *, i, fock_diag(i), angle_left_right(i) @@ -152,6 +156,8 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) ! n_degen = ilast - ifirst +1 n_degen = list_degen(i,0) + if(n_degen .ge. 1000)n_degen = 1 ! convention for core orbitals + if(n_degen .eq. 1) cycle allocate(stmp(n_degen,n_degen), smat2(n_degen,n_degen)) @@ -279,7 +285,7 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles) allocate(new_angles(mo_num)) new_angles(1:mo_num) = dabs(angle_left_right(1:mo_num)) max_angle = maxval(new_angles) - good_angles = max_angle.lt.45.d0 + good_angles = max_angle.lt.thresh_lr_angle print *, ' max_angle = ', max_angle deallocate(new_angles) @@ -397,11 +403,11 @@ subroutine print_energy_and_mos(good_angles) print *, ' TC SCF energy gradient = ', grad_non_hermit print *, ' Max angle Left/right = ', max_angle_left_right - if(max_angle_left_right .lt. 45.d0) then + if(max_angle_left_right .lt. thresh_lr_angle) then print *, ' Maximum angle BELOW 45 degrees, everthing is OK !' good_angles = .true. - else if(max_angle_left_right .gt. 45.d0 .and. max_angle_left_right .lt. 75.d0) then - print *, ' Maximum angle between 45 and 75 degrees, this is not the best for TC-CI calculations ...' + else if(max_angle_left_right .gt. thresh_lr_angle .and. max_angle_left_right .lt. 75.d0) then + print *, ' Maximum angle between thresh_lr_angle and 75 degrees, this is not the best for TC-CI calculations ...' good_angles = .false. else if(max_angle_left_right .gt. 75.d0) then print *, ' Maximum angle between ABOVE 75 degrees, YOU WILL CERTAINLY FIND TROUBLES IN TC-CI calculations ...' diff --git a/src/tools/molden.irp.f b/src/tools/molden.irp.f index 830a141e..e5902a6f 100644 --- a/src/tools/molden.irp.f +++ b/src/tools/molden.irp.f @@ -44,7 +44,7 @@ program molden write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00' do k = 1, ao_prim_num(i_ao) i_prim +=1 - write(i_unit_output,'(E20.10,2X,E20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) + write(i_unit_output,'(ES20.10,2X,ES20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) enddo l = i_ao do while ( ao_l(l) == ao_l(i_ao) ) @@ -142,7 +142,7 @@ program molden write (i_unit_output,*) 'Spin= Alpha' write (i_unit_output,*) 'Occup=', mo_occ(i) do j=1,ao_num - write(i_unit_output, '(I6,2X,E20.10)') j, mo_coef(iorder(j),i) + write(i_unit_output, '(I6,2X,ES20.10)') j, mo_coef(iorder(j),i) enddo enddo close(i_unit_output) diff --git a/src/tools/print_ci_vectors.irp.f b/src/tools/print_ci_vectors.irp.f index 97dfdc0b..d5f86213 100644 --- a/src/tools/print_ci_vectors.irp.f +++ b/src/tools/print_ci_vectors.irp.f @@ -28,7 +28,7 @@ subroutine routine do i = 1, N_det print *, 'Determinant ', i call debug_det(psi_det(1,1,i),N_int) - print '(4E20.12,X)', (psi_coef(i,k), k=1,N_states) + print '(4ES20.12,X)', (psi_coef(i,k), k=1,N_states) print *, '' print *, '' enddo diff --git a/src/two_body_rdm/state_av_act_2rdm.irp.f b/src/two_body_rdm/state_av_act_2rdm.irp.f index cd417a9d..ea636212 100644 --- a/src/two_body_rdm/state_av_act_2rdm.irp.f +++ b/src/two_body_rdm/state_av_act_2rdm.irp.f @@ -17,12 +17,12 @@ state_weights = state_average_weight integer :: ispin ! condition for alpha/beta spin - print*,'' - print*,'' - print*,'' - print*,'providint state_av_act_2_rdm_ab_mo ' +! print*,'' +! print*,'' +! print*,'' +! print*,'Providing state_av_act_2_rdm_ab_mo ' ispin = 3 - print*,'ispin = ',ispin +! print*,'ispin = ',ispin state_av_act_2_rdm_ab_mo = 0.d0 call wall_time(wall_1) double precision :: wall_1, wall_2 diff --git a/src/two_body_rdm/state_av_full_orb_2_rdm.irp.f b/src/two_body_rdm/state_av_full_orb_2_rdm.irp.f index 2e44665d..5fb9e475 100644 --- a/src/two_body_rdm/state_av_full_orb_2_rdm.irp.f +++ b/src/two_body_rdm/state_av_full_orb_2_rdm.irp.f @@ -4,7 +4,7 @@ state_av_full_occ_2_rdm_ab_mo = 0.d0 integer :: i,j,k,l,iorb,jorb,korb,lorb BEGIN_DOC -! state_av_full_occ_2_rdm_ab_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of alpha/beta + beta/alpha electrons +! state_av_full_occ_2_rdm_ab_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of alpha/beta + beta/alpha electrons ! ! = \sum_{istate} w(istate) * ! @@ -12,11 +12,19 @@ ! ! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\alpha} * N_{\beta} * 2 ! -! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" -! -! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero - END_DOC +! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" +! +! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero + END_DOC + PROVIDE n_core_orb list_core state_av_full_occ_2_rdm_ab_mo = 0.d0 + !$OMP PARALLEL PRIVATE(i,j,k,l,iorb,jorb,korb,lorb) & + !$OMP DEFAULT(NONE) SHARED(n_act_orb, n_inact_orb, n_core_orb, & + !$OMP list_core, list_act, list_inact, no_core_density, & + !$OMP one_e_dm_mo_alpha_average, one_e_dm_mo_beta_average, & + !$OMP state_av_act_2_rdm_ab_mo, state_av_full_occ_2_rdm_ab_mo) + + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -25,15 +33,17 @@ korb = list_act(k) do l = 1, n_act_orb lorb = list_act(l) - ! alph beta alph beta - state_av_full_occ_2_rdm_ab_mo(lorb,korb,jorb,iorb) = & + ! alph beta alph beta + state_av_full_occ_2_rdm_ab_mo(lorb,korb,jorb,iorb) = & state_av_act_2_rdm_ab_mo(l,k,j,i) enddo enddo enddo enddo - !! BETA ACTIVE - ALPHA inactive - !! + !$OMP END DO + !! BETA ACTIVE - ALPHA inactive + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -45,9 +55,11 @@ enddo enddo enddo + !$OMP END DO - !! ALPHA ACTIVE - BETA inactive - !! + !! ALPHA ACTIVE - BETA inactive + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -59,9 +71,11 @@ enddo enddo enddo + !$OMP END DO - !! ALPHA INACTIVE - BETA INACTIVE - !! + !! ALPHA INACTIVE - BETA INACTIVE + !! + !$OMP DO do j = 1, n_inact_orb jorb = list_inact(j) do k = 1, n_inact_orb @@ -70,13 +84,15 @@ state_av_full_occ_2_rdm_ab_mo(korb,jorb,korb,jorb) = 2.D0 enddo enddo + !$OMP END DO !!!!!!!!!!!! -!!!!!!!!!!!! if "no_core_density" then you don't put the core part -!!!!!!!!!!!! CAN BE USED +!!!!!!!!!!!! if "no_core_density" then you don't put the core part +!!!!!!!!!!!! CAN BE USED if (.not.no_core_density)then - !! BETA ACTIVE - ALPHA CORE - !! + !! BETA ACTIVE - ALPHA CORE + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -88,9 +104,11 @@ enddo enddo enddo - + !$OMP END DO + !! ALPHA ACTIVE - BETA CORE - !! + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -102,9 +120,11 @@ enddo enddo enddo + !$OMP END DO - !! ALPHA CORE - BETA CORE - !! + !! ALPHA CORE - BETA CORE + !! + !$OMP DO do j = 1, n_core_orb jorb = list_core(j) do k = 1, n_core_orb @@ -113,9 +133,11 @@ state_av_full_occ_2_rdm_ab_mo(korb,jorb,korb,jorb) = 2.D0 enddo enddo + !$OMP END DO endif - END_PROVIDER + !$OMP END PARALLEL + END_PROVIDER BEGIN_PROVIDER [double precision, state_av_full_occ_2_rdm_aa_mo, (n_core_inact_act_orb,n_core_inact_act_orb,n_core_inact_act_orb,n_core_inact_act_orb)] @@ -123,7 +145,7 @@ state_av_full_occ_2_rdm_aa_mo = 0.d0 integer :: i,j,k,l,iorb,jorb,korb,lorb BEGIN_DOC -! state_av_full_occ_2_rdm_aa_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of alpha/alpha electrons +! state_av_full_occ_2_rdm_aa_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of alpha/alpha electrons ! ! = \sum_{istate} w(istate) * ! @@ -131,13 +153,20 @@ ! ! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\alpha} * (N_{\alpha} - 1) ! -! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" +! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" ! -! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero - END_DOC +! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero + END_DOC + PROVIDE n_core_orb list_core + !$OMP PARALLEL PRIVATE(i,j,k,l,iorb,jorb,korb,lorb) & + !$OMP DEFAULT(NONE) SHARED(n_act_orb, n_inact_orb, n_core_orb, & + !$OMP list_core, list_act, list_inact, no_core_density, & + !$OMP one_e_dm_mo_alpha_average, one_e_dm_mo_beta_average, & + !$OMP state_av_act_2_rdm_aa_mo, state_av_full_occ_2_rdm_aa_mo) !! PURE ACTIVE PART ALPHA-ALPHA - !! + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -152,74 +181,84 @@ enddo enddo enddo - !! ALPHA ACTIVE - ALPHA inactive - !! + !$OMP END DO + !! ALPHA ACTIVE - ALPHA inactive + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_inact_orb korb = list_inact(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_aa_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_aa_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_aa_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) enddo enddo enddo + !$OMP END DO - !! ALPHA INACTIVE - ALPHA INACTIVE + !! ALPHA INACTIVE - ALPHA INACTIVE + !$OMP DO do j = 1, n_inact_orb jorb = list_inact(j) do k = 1, n_inact_orb korb = list_inact(k) - state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_aa_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_aa_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO !!!!!!!!!! -!!!!!!!!!! if "no_core_density" then you don't put the core part -!!!!!!!!!! CAN BE USED +!!!!!!!!!! if "no_core_density" then you don't put the core part +!!!!!!!!!! CAN BE USED if (.not.no_core_density)then - !! ALPHA ACTIVE - ALPHA CORE + !! ALPHA ACTIVE - ALPHA CORE + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_core_orb korb = list_core(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_aa_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_aa_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_aa_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) enddo enddo enddo - !! ALPHA CORE - ALPHA CORE - + !$OMP END DO + !! ALPHA CORE - ALPHA CORE + + !$OMP DO do j = 1, n_core_orb jorb = list_core(j) do k = 1, n_core_orb korb = list_core(k) - state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_aa_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_aa_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_aa_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO endif - END_PROVIDER + !$OMP END PARALLEL + END_PROVIDER BEGIN_PROVIDER [double precision, state_av_full_occ_2_rdm_bb_mo, (n_core_inact_act_orb,n_core_inact_act_orb,n_core_inact_act_orb,n_core_inact_act_orb)] implicit none state_av_full_occ_2_rdm_bb_mo = 0.d0 integer :: i,j,k,l,iorb,jorb,korb,lorb BEGIN_DOC -! state_av_full_occ_2_rdm_bb_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of beta/beta electrons +! state_av_full_occ_2_rdm_bb_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of beta/beta electrons ! ! = \sum_{istate} w(istate) * ! @@ -227,13 +266,20 @@ ! ! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{\beta} * (N_{\beta} - 1) ! -! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" +! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" ! -! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero - END_DOC +! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero + END_DOC + PROVIDE n_core_orb list_core + !$OMP PARALLEL PRIVATE(i,j,k,l,iorb,jorb,korb,lorb) & + !$OMP DEFAULT(NONE) SHARED(n_act_orb, n_inact_orb, n_core_orb, & + !$OMP list_core, list_act, list_inact, no_core_density, & + !$OMP one_e_dm_mo_alpha_average, one_e_dm_mo_beta_average, & + !$OMP state_av_act_2_rdm_bb_mo, state_av_full_occ_2_rdm_bb_mo) !! PURE ACTIVE PART beta-beta - !! + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -242,80 +288,90 @@ korb = list_act(k) do l = 1, n_act_orb lorb = list_act(l) - state_av_full_occ_2_rdm_bb_mo(lorb,korb,jorb,iorb) = & + state_av_full_occ_2_rdm_bb_mo(lorb,korb,jorb,iorb) = & state_av_act_2_rdm_bb_mo(l,k,j,i) enddo enddo enddo enddo - !! beta ACTIVE - beta inactive - !! + !$OMP END DO + !! beta ACTIVE - beta inactive + !! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_inact_orb korb = list_inact(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_bb_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_bb_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_bb_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) enddo enddo enddo + !$OMP END DO - !! beta INACTIVE - beta INACTIVE + !! beta INACTIVE - beta INACTIVE + !$OMP DO do j = 1, n_inact_orb jorb = list_inact(j) do k = 1, n_inact_orb korb = list_inact(k) - state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_bb_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_bb_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO !!!!!!!!!!!! -!!!!!!!!!!!! if "no_core_density" then you don't put the core part -!!!!!!!!!!!! CAN BE USED +!!!!!!!!!!!! if "no_core_density" then you don't put the core part +!!!!!!!!!!!! CAN BE USED if (.not.no_core_density)then - !! beta ACTIVE - beta CORE + !! beta ACTIVE - beta CORE + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_core_orb korb = list_core(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_bb_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_bb_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_bb_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) enddo enddo enddo - !! beta CORE - beta CORE - + !$OMP END DO + !! beta CORE - beta CORE + + !$OMP DO do j = 1, n_core_orb jorb = list_core(j) do k = 1, n_core_orb korb = list_core(k) - state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_bb_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_bb_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_bb_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO endif + !$OMP END PARALLEL - END_PROVIDER + END_PROVIDER BEGIN_PROVIDER [double precision, state_av_full_occ_2_rdm_spin_trace_mo, (n_core_inact_act_orb,n_core_inact_act_orb,n_core_inact_act_orb,n_core_inact_act_orb)] implicit none state_av_full_occ_2_rdm_spin_trace_mo = 0.d0 integer :: i,j,k,l,iorb,jorb,korb,lorb BEGIN_DOC -! state_av_full_occ_2_rdm_bb_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of beta/beta electrons +! state_av_full_occ_2_rdm_bb_mo(i,j,k,l) = STATE AVERAGE physicist notation for 2RDM of beta/beta electrons ! ! = \sum_{istate} w(istate) * \sum_{sigma,sigma'} ! @@ -324,14 +380,22 @@ ! ! THE NORMALIZATION (i.e. sum of diagonal elements) IS SET TO N_{elec} * (N_{elec} - 1) ! -! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" +! !!!!! WARNING !!!!! ALL SLATER DETERMINANTS IN PSI_DET MUST BELONG TO AN ACTIVE SPACE DEFINED BY "list_act" ! -! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero - END_DOC +! !!!!! WARNING !!!!! IF "no_core_density" then all elements involving at least one CORE MO is set to zero + END_DOC - !!!!!!!!!!!!!!!! - !!!!!!!!!!!!!!!! + PROVIDE n_core_orb list_core + + !$OMP PARALLEL PRIVATE(i,j,k,l,iorb,jorb,korb,lorb) & + !$OMP DEFAULT(NONE) SHARED(n_act_orb, n_inact_orb, n_core_orb, & + !$OMP list_core, list_act, list_inact, no_core_density, & + !$OMP one_e_dm_mo_alpha_average, one_e_dm_mo_beta_average, & + !$OMP state_av_act_2_rdm_spin_trace_mo, state_av_full_occ_2_rdm_spin_trace_mo) + !!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!! !! PURE ACTIVE PART SPIN-TRACE + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -340,128 +404,146 @@ korb = list_act(k) do l = 1, n_act_orb lorb = list_act(l) - state_av_full_occ_2_rdm_spin_trace_mo(lorb,korb,jorb,iorb) += & + state_av_full_occ_2_rdm_spin_trace_mo(lorb,korb,jorb,iorb) += & state_av_act_2_rdm_spin_trace_mo(l,k,j,i) enddo enddo enddo enddo + !$OMP END DO - !!!!!!!!!!!!!!!! - !!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!! !!!!! BETA-BETA !!!!! - !! beta ACTIVE - beta inactive + !! beta ACTIVE - beta inactive + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_inact_orb korb = list_inact(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) enddo enddo enddo - !! beta INACTIVE - beta INACTIVE + !$OMP END DO + !! beta INACTIVE - beta INACTIVE + !$OMP DO do j = 1, n_inact_orb jorb = list_inact(j) do k = 1, n_inact_orb korb = list_inact(k) - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO if (.not.no_core_density)then - !! beta ACTIVE - beta CORE + !! beta ACTIVE - beta CORE + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_core_orb korb = list_core(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) enddo enddo enddo - !! beta CORE - beta CORE + !$OMP END DO + !! beta CORE - beta CORE + !$OMP DO do j = 1, n_core_orb jorb = list_core(j) do k = 1, n_core_orb korb = list_core(k) - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO endif - !!!!!!!!!!!!!!!! - !!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!! !!!!! ALPHA-ALPHA !!!!! - !! ALPHA ACTIVE - ALPHA inactive + !! ALPHA ACTIVE - ALPHA inactive + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_inact_orb korb = list_inact(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) enddo enddo enddo - !! ALPHA INACTIVE - ALPHA INACTIVE + !$OMP END DO + !! ALPHA INACTIVE - ALPHA INACTIVE + !$OMP DO do j = 1, n_inact_orb jorb = list_inact(j) do k = 1, n_inact_orb korb = list_inact(k) - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO if (.not.no_core_density)then - !! ALPHA ACTIVE - ALPHA CORE + !! ALPHA ACTIVE - ALPHA CORE + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_core_orb korb = list_core(k) - ! 1 2 1 2 : DIRECT TERM + ! 1 2 1 2 : DIRECT TERM state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) - ! 1 2 1 2 : EXCHANGE TERM + ! 1 2 1 2 : EXCHANGE TERM state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,korb,iorb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,iorb,korb) += -1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) enddo enddo enddo - !! ALPHA CORE - ALPHA CORE + !$OMP END DO + !! ALPHA CORE - ALPHA CORE + !$OMP DO do j = 1, n_core_orb jorb = list_core(j) do k = 1, n_core_orb korb = list_core(k) - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 - state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,jorb) += 1.0d0 + state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,jorb,korb) -= 1.0d0 enddo enddo + !$OMP END DO endif - !!!!!!!!!!!!!!!! - !!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!! + !!!!!!!!!!!!!!!! !!!!! ALPHA-BETA + BETA-ALPHA !!!!! + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb @@ -474,14 +556,16 @@ ! beta alph beta alph state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_beta_average(jorb,iorb) ! BETA INACTIVE - ALPHA ACTIVE - ! beta alph beta alpha + ! beta alph beta alpha state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) - ! alph beta alph beta + ! alph beta alph beta state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0d0 * one_e_dm_mo_alpha_average(jorb,iorb) enddo enddo enddo - !! ALPHA INACTIVE - BETA INACTIVE + !$OMP END DO + !! ALPHA INACTIVE - BETA INACTIVE + !$OMP DO do j = 1, n_inact_orb jorb = list_inact(j) do k = 1, n_inact_orb @@ -491,31 +575,35 @@ state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,jorb,korb) += 1.0d0 enddo enddo + !$OMP END DO !!!!!!!!!!!! -!!!!!!!!!!!! if "no_core_density" then you don't put the core part -!!!!!!!!!!!! CAN BE USED +!!!!!!!!!!!! if "no_core_density" then you don't put the core part +!!!!!!!!!!!! CAN BE USED if (.not.no_core_density)then + !$OMP DO do i = 1, n_act_orb iorb = list_act(i) do j = 1, n_act_orb jorb = list_act(j) do k = 1, n_core_orb korb = list_core(k) - !! BETA ACTIVE - ALPHA CORE + !! BETA ACTIVE - ALPHA CORE ! alph beta alph beta state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0D0 * one_e_dm_mo_beta_average(jorb,iorb) - ! beta alph beta alph + ! beta alph beta alph state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0D0 * one_e_dm_mo_beta_average(jorb,iorb) - !! ALPHA ACTIVE - BETA CORE + !! ALPHA ACTIVE - BETA CORE ! alph beta alph beta state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,iorb,korb) += 1.0D0 * one_e_dm_mo_alpha_average(jorb,iorb) - ! beta alph beta alph + ! beta alph beta alph state_av_full_occ_2_rdm_spin_trace_mo(korb,jorb,korb,iorb) += 1.0D0 * one_e_dm_mo_alpha_average(jorb,iorb) enddo enddo enddo - !! ALPHA CORE - BETA CORE + !$OMP END DO + !! ALPHA CORE - BETA CORE + !$OMP DO do j = 1, n_core_orb jorb = list_core(j) do k = 1, n_core_orb @@ -525,7 +613,9 @@ state_av_full_occ_2_rdm_spin_trace_mo(jorb,korb,jorb,korb) += 1.0D0 enddo enddo + !$OMP END DO endif + !$OMP END PARALLEL - END_PROVIDER + END_PROVIDER diff --git a/src/two_body_rdm/two_e_dm_mo.irp.f b/src/two_body_rdm/two_e_dm_mo.irp.f index 99be1f54..04c44f61 100644 --- a/src/two_body_rdm/two_e_dm_mo.irp.f +++ b/src/two_body_rdm/two_e_dm_mo.irp.f @@ -16,6 +16,9 @@ BEGIN_PROVIDER [double precision, two_e_dm_mo, (mo_num,mo_num,mo_num,mo_num)] two_e_dm_mo = 0.d0 integer :: i,j,k,l,iorb,jorb,korb,lorb,istate + !$OMP PARALLEL DO PRIVATE(i,j,k,l,iorb,jorb,korb,lorb) & + !$OMP DEFAULT(NONE) SHARED(n_core_inact_act_orb, list_core_inact_act, & + !$OMP two_e_dm_mo, state_av_full_occ_2_rdm_spin_trace_mo) do l=1,n_core_inact_act_orb lorb = list_core_inact_act(l) do k=1,n_core_inact_act_orb @@ -29,7 +32,7 @@ BEGIN_PROVIDER [double precision, two_e_dm_mo, (mo_num,mo_num,mo_num,mo_num)] enddo enddo enddo - two_e_dm_mo(:,:,:,:) = two_e_dm_mo(:,:,:,:) + !$OMP END PARALLEL DO END_PROVIDER diff --git a/src/utils/block_diag_degen_core.irp.f b/src/utils/block_diag_degen_core.irp.f new file mode 100644 index 00000000..5d46bd87 --- /dev/null +++ b/src/utils/block_diag_degen_core.irp.f @@ -0,0 +1,244 @@ + +subroutine diag_mat_per_fock_degen_core(fock_diag, mat_ref, listcore,ncore, n, thr_d, thr_nd, thr_deg, leigvec, reigvec, eigval) + + + BEGIN_DOC + ! + ! subroutine that diagonalizes a matrix mat_ref BY BLOCK + ! + ! the blocks are defined by the elements having the SAME DEGENERACIES in the entries "fock_diag" + ! + ! the elements of listcore are untouched + ! + ! examples : all elements having degeneracy 1 in fock_diag (i.e. not being degenerated) will be treated together + ! + ! : all elements having degeneracy 2 in fock_diag (i.e. two elements are equal) will be treated together + ! + ! : all elements having degeneracy 3 in fock_diag (i.e. two elements are equal) will be treated together + ! + ! etc... the advantage is to guarentee no spurious mixing because of numerical problems. + ! + END_DOC + + implicit none + integer, intent(in) :: n,ncore, listcore(ncore) + double precision, intent(in) :: fock_diag(n), mat_ref(n,n), thr_d, thr_nd, thr_deg + double precision, intent(out) :: leigvec(n,n), reigvec(n,n), eigval(n) + + integer :: n_degen_list, n_degen,size_mat, i, j, k, icount, m, index_degen + integer :: ii, jj, i_good, j_good, n_real + integer :: icount_eigval + logical, allocatable :: is_ok(:) + integer, allocatable :: list_degen(:,:), list_same_degen(:) + integer, allocatable :: iorder(:), list_degen_sorted(:) + double precision, allocatable :: leigvec_unsrtd(:,:), reigvec_unsrtd(:,:), eigval_unsrtd(:) + double precision, allocatable :: mat_tmp(:,:), eigval_tmp(:), leigvec_tmp(:,:), reigvec_tmp(:,:) + + allocate(leigvec_unsrtd(n,n), reigvec_unsrtd(n,n), eigval_unsrtd(n)) + leigvec_unsrtd = 0.d0 + reigvec_unsrtd = 0.d0 + eigval_unsrtd = 0.d0 + + ! obtain degeneracies + allocate(list_degen(n,0:n)) + call give_degen_full_listcore(fock_diag, n, listcore, ncore, thr_deg, list_degen, n_degen_list) + + allocate(iorder(n_degen_list), list_degen_sorted(n_degen_list)) + do i = 1, n_degen_list + n_degen = list_degen(i,0) + list_degen_sorted(i) = n_degen + iorder(i) = i + enddo + + ! sort by number of degeneracies + call isort(list_degen_sorted, iorder, n_degen_list) + + allocate(is_ok(n_degen_list)) + is_ok = .True. + icount_eigval = 0 + + ! loop over degeneracies + do i = 1, n_degen_list + if(.not.is_ok(i)) cycle + + is_ok(i) = .False. + n_degen = list_degen_sorted(i) + + + if(n_degen.ge.1000)then + print*,'core orbital ' + else + print *, ' diagonalizing for n_degen = ', n_degen + endif + + k = 1 + + ! group all the entries having the same degeneracies +!! do while (list_degen_sorted(i+k)==n_degen) + do m = i+1, n_degen_list + if(list_degen_sorted(m)==n_degen) then + is_ok(i+k) = .False. + k += 1 + endif + enddo + + print *, ' number of identical degeneracies = ', k + if(n_degen.ge.1000)then + n_degen = 1 + endif + size_mat = k*n_degen + print *, ' size_mat = ', size_mat + allocate(mat_tmp(size_mat,size_mat), list_same_degen(size_mat)) + allocate(eigval_tmp(size_mat), leigvec_tmp(size_mat,size_mat), reigvec_tmp(size_mat,size_mat)) + ! group all the elements sharing the same degeneracy + icount = 0 + do j = 1, k ! jth set of degeneracy + index_degen = iorder(i+j-1) + do m = 1, n_degen + icount += 1 + list_same_degen(icount) = list_degen(index_degen,m) + enddo + enddo + + print *, ' list of elements ' + do icount = 1, size_mat + print *, icount, list_same_degen(icount) + enddo + + ! you copy subset of matrix elements having all the same degeneracy in mat_tmp + do ii = 1, size_mat + i_good = list_same_degen(ii) + do jj = 1, size_mat + j_good = list_same_degen(jj) + mat_tmp(jj,ii) = mat_ref(j_good,i_good) + enddo + enddo + + call non_hrmt_bieig( size_mat, mat_tmp, thr_d, thr_nd & + , leigvec_tmp, reigvec_tmp & + , n_real, eigval_tmp ) + + do ii = 1, size_mat + icount_eigval += 1 + eigval_unsrtd(icount_eigval) = eigval_tmp(ii) ! copy eigenvalues + do jj = 1, size_mat ! copy the eigenvectors + j_good = list_same_degen(jj) + leigvec_unsrtd(j_good,icount_eigval) = leigvec_tmp(jj,ii) + reigvec_unsrtd(j_good,icount_eigval) = reigvec_tmp(jj,ii) + enddo + enddo + + deallocate(mat_tmp, list_same_degen) + deallocate(eigval_tmp, leigvec_tmp, reigvec_tmp) + enddo + + if(icount_eigval .ne. n) then + print *, ' pb !! (icount_eigval.ne.n)' + print *, ' icount_eigval,n', icount_eigval, n + stop + endif + + deallocate(iorder) + allocate(iorder(n)) + do i = 1, n + iorder(i) = i + enddo + call dsort(eigval_unsrtd, iorder, n) + + do i = 1, n + print*,'sorted eigenvalues ' + i_good = iorder(i) + eigval(i) = eigval_unsrtd(i) + print*,'i,eigval(i) = ',i,eigval(i) + do j = 1, n + leigvec(j,i) = leigvec_unsrtd(j,i_good) + reigvec(j,i) = reigvec_unsrtd(j,i_good) + enddo + enddo + + deallocate(leigvec_unsrtd, reigvec_unsrtd, eigval_unsrtd) + deallocate(list_degen) + deallocate(iorder, list_degen_sorted) + deallocate(is_ok) + +end + +! --- + +subroutine give_degen_full_listcore(A, n, listcore, ncore, thr, list_degen, n_degen_list) + + BEGIN_DOC + ! you enter with an array A(n) and spits out all the elements degenerated up to thr + ! + ! the elements of A(n) DON'T HAVE TO BE SORTED IN THE ENTRANCE: TOTALLY GENERAL + ! + ! list_degen(i,0) = number of degenerate entries + ! + ! list_degen(i,1) = index of the first degenerate entry + ! + ! list_degen(i,2:list_degen(i,0)) = list of all other dengenerate entries + ! + ! if list_degen(i,0) == 1 it means that there is no degeneracy for that element + ! + ! if list_degen(i,0) >= 1000 it means that it is core orbitals + END_DOC + + implicit none + + double precision, intent(in) :: A(n) + double precision, intent(in) :: thr + integer, intent(in) :: n,ncore, listcore(ncore) + integer, intent(out) :: list_degen(n,0:n), n_degen_list + integer :: i, j, icount, icheck,k + logical, allocatable :: is_ok(:) + + + allocate(is_ok(n)) + n_degen_list = 0 + is_ok = .True. + ! you first exclude the "core" orbitals + do i = 1, ncore + j=listcore(i) + is_ok(j) = .False. + enddo + do i = 1, n + if(.not.is_ok(i)) cycle + n_degen_list +=1 + is_ok(i) = .False. + list_degen(n_degen_list,1) = i + icount = 1 + do j = i+1, n + if(dabs(A(i)-A(j)).lt.thr.and.is_ok(j)) then + is_ok(j) = .False. + icount += 1 + list_degen(n_degen_list,icount) = j + endif + enddo + + list_degen(n_degen_list,0) = icount + enddo + ! you set all the core orbitals as separate entities + icheck = 0 + do i = 1, n_degen_list + icheck += list_degen(i,0) + enddo + if(icheck.ne.(n-ncore))then + print *, ' pb ! :: icheck.ne.n-ncore' + print *, icheck, n-ncore + stop + endif + k=1000 + do i = 1, ncore + n_degen_list+= 1 + j=listcore(i) + list_degen(n_degen_list,1) = i + list_degen(n_degen_list,0) = k + k+=1 + enddo + + + +end + +! --- + diff --git a/src/utils/c_functions.f90 b/src/utils/c_functions.f90 index 65d4ad62..a9c8900b 100644 --- a/src/utils/c_functions.f90 +++ b/src/utils/c_functions.f90 @@ -57,6 +57,12 @@ module c_functions end subroutine sscanf_sd_c end interface + interface + integer(kind=c_int) function mkl_serv_intel_cpu_true() bind(C) + use iso_c_binding + end function + end interface + contains integer function atoi(a) @@ -131,4 +137,3 @@ subroutine usleep(us) call usleep_c(u) end subroutine usleep - diff --git a/src/utils/fast_mkl.c b/src/utils/fast_mkl.c new file mode 100644 index 00000000..aa1f82f1 --- /dev/null +++ b/src/utils/fast_mkl.c @@ -0,0 +1,5 @@ +int mkl_serv_intel_cpu_true() { + return 1; +} + + diff --git a/src/utils/format_w_error.irp.f b/src/utils/format_w_error.irp.f index 7f7458b6..c253456e 100644 --- a/src/utils/format_w_error.irp.f +++ b/src/utils/format_w_error.irp.f @@ -39,7 +39,7 @@ subroutine format_w_error(value,error,size_nb,max_nb_digits,format_value,str_err write(str_size,'(I3)') size_nb ! Error - write(str_exp,'(1pE20.0)') error + write(str_exp,'(ES20.0)') error str_error = trim(adjustl(str_exp)) ! Number of digit: Y (FX.Y) from the exponent diff --git a/src/utils/fortran_mmap.c b/src/utils/fortran_mmap.c index 52df2476..e8d85a2f 100644 --- a/src/utils/fortran_mmap.c +++ b/src/utils/fortran_mmap.c @@ -9,7 +9,6 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) { - int i; int fd; int result; void* map; @@ -22,11 +21,7 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) perror("Error opening mmap file for reading"); exit(EXIT_FAILURE); } - map = mmap(NULL, bytes, PROT_READ, MAP_SHARED | MAP_HUGETLB, fd, 0); - if (map == MAP_FAILED) { - /* try again without huge pages */ - map = mmap(NULL, bytes, PROT_READ, MAP_SHARED, fd, 0); - } + map = mmap(NULL, bytes, PROT_READ, MAP_SHARED, fd, 0); } else { @@ -53,16 +48,12 @@ void* mmap_fortran(char* filename, size_t bytes, int* file_descr, int read_only) exit(EXIT_FAILURE); } - map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_HUGETLB, fd, 0); - if (map == MAP_FAILED) { - /* try again without huge pages */ - map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - } + map = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); } if (map == MAP_FAILED) { close(fd); - printf("%s:\n", filename); + printf("%s: %lu\n", filename, bytes); perror("Error mmapping the file"); exit(EXIT_FAILURE); } diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f index 76a539a6..314ad4f6 100644 --- a/src/utils/linear_algebra.irp.f +++ b/src/utils/linear_algebra.irp.f @@ -1565,7 +1565,7 @@ subroutine nullify_small_elements(m,n,A,LDA,thresh) ! Remove tiny elements do j=1,n do i=1,m - if ( dabs(A(i,j) * amax) < thresh ) then + if ( (dabs(A(i,j) * amax) < thresh).or.(dabs(A(i,j)) < 1.d-99) ) then A(i,j) = 0.d0 endif enddo @@ -1661,7 +1661,15 @@ subroutine restore_symmetry(m,n,A,LDA,thresh) ! Update i i = i + 1 enddo - copy(i:) = 0.d0 + + ! To nullify the remaining elements that are below the threshold + if (i == sze) then + if (-copy(i) <= thresh) then + copy(i) = 0d0 + endif + else + copy(i:) = 0.d0 + endif !$OMP PARALLEL if (sze>10000) & !$OMP SHARED(m,sze,copy_sign,copy,key,A,ii,jj) & diff --git a/src/utils/map_functions.irp.f b/src/utils/map_functions.irp.f index cd3b28a8..97d0e8bf 100644 --- a/src/utils/map_functions.irp.f +++ b/src/utils/map_functions.irp.f @@ -11,6 +11,10 @@ subroutine map_save_to_disk(filename,map) integer*8 :: n_elements n_elements = int(map % n_elements,8) + if (n_elements <= 0) then + print *, 'Unable to write map to disk: n_elements = ', n_elements + stop -1 + endif if (map % consolidated) then diff --git a/src/utils/memory.irp.f b/src/utils/memory.irp.f index 115b2cbe..41ec0428 100644 --- a/src/utils/memory.irp.f +++ b/src/utils/memory.irp.f @@ -4,8 +4,10 @@ BEGIN_PROVIDER [ integer, qp_max_mem ] ! Maximum memory in Gb END_DOC character*(128) :: env + integer, external :: get_total_available_memory - qp_max_mem = 2000 + qp_max_mem = get_total_available_memory() + call write_int(6,qp_max_mem,'Total available memory (GB)') call getenv('QP_MAXMEM',env) if (trim(env) /= '') then call lock_io() @@ -97,16 +99,15 @@ subroutine check_mem(rss_in,routine) END_DOC double precision, intent(in) :: rss_in character*(*) :: routine - double precision :: rss - !$OMP CRITICAL - call resident_memory(rss) - rss += rss_in - if (int(rss)+1 > qp_max_mem) then + double precision :: mem + call total_memory(mem) + mem += rss_in + if (mem > qp_max_mem) then + call print_memory_usage() print *, 'Not enough memory: aborting in ', routine - print *, int(rss)+1, ' GB required' + print *, mem, ' GB required' stop -1 endif - !$OMP END CRITICAL end subroutine print_memory_usage() @@ -122,3 +123,35 @@ subroutine print_memory_usage() '.. >>>>> [ RES MEM : ', rss , & ' GB ] [ VIRT MEM : ', mem, ' GB ] <<<<< ..' end + +integer function get_total_available_memory() result(res) + implicit none + BEGIN_DOC +! Returns the total available memory on the current machine + END_DOC + + character(len=128) :: line + integer :: status + integer :: iunit + integer*8, parameter :: KB = 1024 + integer*8, parameter :: GiB = 1024**3 + integer, external :: getUnitAndOpen + + iunit = getUnitAndOpen('/proc/meminfo','r') + + res = 512 + do + read(iunit, '(A)', END=10) line + if (line(1:10) == "MemTotal: ") then + read(line(11:), *, ERR=20) res + res = int((res*KB) / GiB,4) + exit + 20 continue + end if + end do + 10 continue + close(iunit) + +end function get_total_available_memory + + diff --git a/src/utils/mmap.f90 b/src/utils/mmap.f90 index 49147283..41e60224 100644 --- a/src/utils/mmap.f90 +++ b/src/utils/mmap.f90 @@ -46,7 +46,13 @@ module mmap_module integer(c_size_t) :: length integer(c_int) :: fd_ - length = PRODUCT( shape(:) ) * bytes + integer :: i + + length = int(bytes,8) + do i=1,size(shape) + length = length * shape(i) + enddo + if (read_only) then map = c_mmap_fortran( trim(filename)//char(0), length, fd_, 1) else @@ -66,7 +72,12 @@ module mmap_module integer(c_size_t) :: length integer(c_int) :: fd_ - length = PRODUCT( shape(:) ) * bytes + integer :: i + + length = int(bytes,8) + do i=1,size(shape) + length = length * shape(i) + enddo fd_ = fd call c_munmap_fortran( length, fd_, map) end subroutine @@ -82,7 +93,12 @@ module mmap_module integer(c_size_t) :: length integer(c_int) :: fd_ - length = PRODUCT( shape(:) ) * bytes + integer :: i + + length = int(bytes,8) + do i=1,size(shape) + length = length * shape(i) + enddo fd_ = fd call c_msync_fortran( length, fd_, map) end subroutine diff --git a/src/utils_cc/mo_integrals_cc.irp.f b/src/utils_cc/mo_integrals_cc.irp.f index dafcf7af..b2b68d05 100644 --- a/src/utils_cc/mo_integrals_cc.irp.f +++ b/src/utils_cc/mo_integrals_cc.irp.f @@ -48,32 +48,31 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v) integer :: i1,i2,i3,i4,idx1,idx2,idx3,idx4,k if (do_ao_cholesky) then - double precision, allocatable :: buffer(:,:,:) - !$OMP PARALLEL & - !$OMP SHARED(n1,n2,n3,n4,list1,list2,list3,list4,v,mo_num,cholesky_mo_transp,cholesky_ao_num) & - !$OMP PRIVATE(i1,i2,i3,i4,idx1,idx2,idx3,idx4,k,buffer)& - !$OMP DEFAULT(NONE) - allocate(buffer(mo_num,mo_num,mo_num)) - !$OMP DO + double precision, allocatable :: buffer(:,:,:,:) + double precision, allocatable :: v1(:,:,:), v2(:,:,:) + allocate(v1(cholesky_mo_num,n1,n3), v2(cholesky_mo_num,n2,n4)) + allocate(buffer(n1,n3,n2,n4)) + + call gen_v_space_chol(n1,n3,list1,list3,v1,cholesky_mo_num) + call gen_v_space_chol(n2,n4,list2,list4,v2,cholesky_mo_num) + + call dgemm('T','N', n1*n3, n2*n4, cholesky_mo_num, 1.d0, & + v1, cholesky_mo_num, & + v2, cholesky_mo_num, 0.d0, buffer, n1*n3) + + deallocate(v1,v2) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) do i4 = 1, n4 - idx4 = list4(i4) - call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, & - cholesky_mo_transp, cholesky_ao_num, & - cholesky_mo_transp(1,1,idx4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num) - do i2 = 1, n2 - idx2 = list2(i2) - do i3 = 1, n3 - idx3 = list3(i3) + do i3 = 1, n3 + do i2 = 1, n2 do i1 = 1, n1 - idx1 = list1(i1) - v(i1,i2,i3,i4) = buffer(idx1,idx3,idx2) + v(i1,i2,i3,i4) = buffer(i1,i3,i2,i4) enddo enddo enddo enddo - !$OMP END DO - deallocate(buffer) - !$OMP END PARALLEL + !$OMP END PARALLEL DO else double precision :: get_two_e_integral @@ -105,6 +104,30 @@ subroutine gen_v_space(n1,n2,n3,n4,list1,list2,list3,list4,v) end +subroutine gen_v_space_chol(n1,n3,list1,list3,v,ldv) + + implicit none + + integer, intent(in) :: n1,n3,ldv + integer, intent(in) :: list1(n1),list3(n3) + double precision, intent(out) :: v(ldv,n1,n3) + + integer :: i1,i3,idx1,idx3,k + + !$OMP PARALLEL DO PRIVATE(i1,i3,idx1,idx3,k) + do i3=1,n3 + idx3 = list3(i3) + do i1=1,n1 + idx1 = list1(i1) + do k=1,cholesky_mo_num + v(k,i1,i3) = cholesky_mo_transp(k,idx1,idx3) + enddo + enddo + enddo + !$OMP END PARALLEL DO + +end + ! full BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)] @@ -112,16 +135,17 @@ BEGIN_PROVIDER [double precision, cc_space_v, (mo_num,mo_num,mo_num,mo_num)] if (do_ao_cholesky) then integer :: i1,i2,i3,i4 double precision, allocatable :: buffer(:,:,:) + call set_multiple_levels_omp(.False.) !$OMP PARALLEL & - !$OMP SHARED(cc_space_v,mo_num,cholesky_mo_transp,cholesky_ao_num) & + !$OMP SHARED(cc_space_v,mo_num,cholesky_mo_transp,cholesky_mo_num) & !$OMP PRIVATE(i1,i2,i3,i4,k,buffer)& !$OMP DEFAULT(NONE) allocate(buffer(mo_num,mo_num,mo_num)) !$OMP DO do i4 = 1, mo_num - call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_ao_num, 1.d0, & - cholesky_mo_transp, cholesky_ao_num, & - cholesky_mo_transp(1,1,i4), cholesky_ao_num, 0.d0, buffer, mo_num*mo_num) + call dgemm('T','N', mo_num*mo_num, mo_num, cholesky_mo_num, 1.d0, & + cholesky_mo_transp, cholesky_mo_num, & + cholesky_mo_transp(1,1,i4), cholesky_mo_num, 0.d0, buffer, mo_num*mo_num) do i2 = 1, mo_num do i3 = 1, mo_num do i1 = 1, mo_num @@ -166,7 +190,40 @@ BEGIN_PROVIDER [double precision, cc_space_v_oooo, (cc_nOa, cc_nOa, cc_nOa, cc_n implicit none - call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_oooo) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_oooo,1) + n2 = size(cc_space_v_oooo,2) + n3 = size(cc_space_v_oooo,3) + n4 = size(cc_space_v_oooo,4) + + double precision, allocatable :: buffer(:,:,:,:) + allocate(buffer(n1,n3,n2,n4)) + + call dgemm('T','N', n1*n3, n2*n4, cholesky_mo_num, 1.d0, & + cc_space_v_oo_chol, cholesky_mo_num, & + cc_space_v_oo_chol, cholesky_mo_num, 0.d0, buffer, n1*n3) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_oooo(i1,i2,i3,i4) = buffer(i1,i3,i2,i4) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(buffer) + + else + call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_oooo) + endif END_PROVIDER @@ -176,7 +233,40 @@ BEGIN_PROVIDER [double precision, cc_space_v_vooo, (cc_nVa, cc_nOa, cc_nOa, cc_n implicit none - call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_vooo) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_vooo,1) + n2 = size(cc_space_v_vooo,2) + n3 = size(cc_space_v_vooo,3) + n4 = size(cc_space_v_vooo,4) + + double precision, allocatable :: buffer(:,:,:,:) + allocate(buffer(n1,n3,n2,n4)) + + call dgemm('T','N', n1*n3, n2*n4, cholesky_mo_num, 1.d0, & + cc_space_v_vo_chol, cholesky_mo_num, & + cc_space_v_oo_chol, cholesky_mo_num, 0.d0, buffer, n1*n3) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_vooo(i1,i2,i3,i4) = buffer(i1,i3,i2,i4) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(buffer) + + else + call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_occ, cc_space_v_vooo) + endif END_PROVIDER @@ -186,7 +276,32 @@ BEGIN_PROVIDER [double precision, cc_space_v_ovoo, (cc_nOa, cc_nVa, cc_nOa, cc_n implicit none - call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_ovoo) + + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_ovoo,1) + n2 = size(cc_space_v_ovoo,2) + n3 = size(cc_space_v_ovoo,3) + n4 = size(cc_space_v_ovoo,4) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_ovoo(i1,i2,i3,i4) = cc_space_v_vooo(i2,i1,i4,i3) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_ovoo) + endif END_PROVIDER @@ -196,7 +311,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_oovo, (cc_nOa, cc_nOa, cc_nVa, cc_n implicit none - call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_oovo) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_oovo,1) + n2 = size(cc_space_v_oovo,2) + n3 = size(cc_space_v_oovo,3) + n4 = size(cc_space_v_oovo,4) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_oovo(i1,i2,i3,i4) = cc_space_v_vooo(i3,i2,i1,i4) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nOa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_oovo) + endif END_PROVIDER @@ -206,7 +345,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_ooov, (cc_nOa, cc_nOa, cc_nOa, cc_n implicit none - call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_ooov) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_oovo,1) + n2 = size(cc_space_v_oovo,2) + n3 = size(cc_space_v_oovo,3) + n4 = size(cc_space_v_oovo,4) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_ooov(i1,i2,i3,i4) = cc_space_v_ovoo(i1,i4,i3,i2) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + call gen_v_space(cc_nOa,cc_nOa,cc_nOa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_ooov) + endif END_PROVIDER @@ -216,7 +379,40 @@ BEGIN_PROVIDER [double precision, cc_space_v_vvoo, (cc_nVa, cc_nVa, cc_nOa, cc_n implicit none - call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_vvoo) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_vvoo,1) + n2 = size(cc_space_v_vvoo,2) + n3 = size(cc_space_v_vvoo,3) + n4 = size(cc_space_v_vvoo,4) + + double precision, allocatable :: buffer(:,:,:,:) + allocate(buffer(n1,n3,n2,n4)) + + call dgemm('T','N', n1*n3, n2*n4, cholesky_mo_num, 1.d0, & + cc_space_v_vo_chol, cholesky_mo_num, & + cc_space_v_vo_chol, cholesky_mo_num, 0.d0, buffer, n1*n3) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_vvoo(i1,i2,i3,i4) = buffer(i1,i3,i2,i4) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(buffer) + + else + call gen_v_space(cc_nVa,cc_nVa,cc_nOa,cc_nOa, cc_list_vir,cc_list_vir,cc_list_occ,cc_list_occ, cc_space_v_vvoo) + endif END_PROVIDER @@ -226,7 +422,40 @@ BEGIN_PROVIDER [double precision, cc_space_v_vovo, (cc_nVa, cc_nOa, cc_nVa, cc_n implicit none - call gen_v_space(cc_nVa,cc_nOa,cc_nVa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_vovo) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_vovo,1) + n2 = size(cc_space_v_vovo,2) + n3 = size(cc_space_v_vovo,3) + n4 = size(cc_space_v_vovo,4) + + double precision, allocatable :: buffer(:,:,:,:) + allocate(buffer(n1,n3,n2,n4)) + + call dgemm('T','N', n1*n3, n2*n4, cholesky_mo_num, 1.d0, & + cc_space_v_vv_chol, cholesky_mo_num, & + cc_space_v_oo_chol, cholesky_mo_num, 0.d0, buffer, n1*n3) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_vovo(i1,i2,i3,i4) = buffer(i1,i3,i2,i4) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + deallocate(buffer) + + else + call gen_v_space(cc_nVa,cc_nOa,cc_nVa,cc_nOa, cc_list_vir,cc_list_occ,cc_list_vir,cc_list_occ, cc_space_v_vovo) + endif END_PROVIDER @@ -236,7 +465,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_voov, (cc_nVa, cc_nOa, cc_nOa, cc_n implicit none - call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nVa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_voov) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_voov,1) + n2 = size(cc_space_v_voov,2) + n3 = size(cc_space_v_voov,3) + n4 = size(cc_space_v_voov,4) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_voov(i1,i2,i3,i4) = cc_space_v_vvoo(i1,i4,i3,i2) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + call gen_v_space(cc_nVa,cc_nOa,cc_nOa,cc_nVa, cc_list_vir,cc_list_occ,cc_list_occ,cc_list_vir, cc_space_v_voov) + endif END_PROVIDER @@ -246,7 +499,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_ovvo, (cc_nOa, cc_nVa, cc_nVa, cc_n implicit none - call gen_v_space(cc_nOa,cc_nVa,cc_nVa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_vir,cc_list_occ, cc_space_v_ovvo) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_ovvo,1) + n2 = size(cc_space_v_ovvo,2) + n3 = size(cc_space_v_ovvo,3) + n4 = size(cc_space_v_ovvo,4) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_ovvo(i1,i2,i3,i4) = cc_space_v_vvoo(i3,i2,i1,i4) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + call gen_v_space(cc_nOa,cc_nVa,cc_nVa,cc_nOa, cc_list_occ,cc_list_vir,cc_list_vir,cc_list_occ, cc_space_v_ovvo) + endif END_PROVIDER @@ -256,7 +533,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_ovov, (cc_nOa, cc_nVa, cc_nOa, cc_n implicit none - call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nVa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_vir, cc_space_v_ovov) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_ovov,1) + n2 = size(cc_space_v_ovov,2) + n3 = size(cc_space_v_ovov,3) + n4 = size(cc_space_v_ovov,4) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_ovov(i1,i2,i3,i4) = cc_space_v_vovo(i2,i1,i4,i3) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + call gen_v_space(cc_nOa,cc_nVa,cc_nOa,cc_nVa, cc_list_occ,cc_list_vir,cc_list_occ,cc_list_vir, cc_space_v_ovov) + endif END_PROVIDER @@ -266,7 +567,31 @@ BEGIN_PROVIDER [double precision, cc_space_v_oovv, (cc_nOa, cc_nOa, cc_nVa, cc_n implicit none - call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, cc_space_v_oovv) + if (do_ao_cholesky) then + + integer :: i1, i2, i3, i4 + integer :: n1, n2, n3, n4 + + n1 = size(cc_space_v_oovv,1) + n2 = size(cc_space_v_oovv,2) + n3 = size(cc_space_v_oovv,3) + n4 = size(cc_space_v_oovv,4) + + !$OMP PARALLEL DO PRIVATE(i1,i2,i3,i4) COLLAPSE(2) + do i4 = 1, n4 + do i3 = 1, n3 + do i2 = 1, n2 + do i1 = 1, n1 + cc_space_v_oovv(i1,i2,i3,i4) = cc_space_v_vvoo(i3,i4,i1,i2) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + else + call gen_v_space(cc_nOa,cc_nOa,cc_nVa,cc_nVa, cc_list_occ,cc_list_occ,cc_list_vir,cc_list_vir, cc_space_v_oovv) + endif END_PROVIDER @@ -320,6 +645,38 @@ BEGIN_PROVIDER [double precision, cc_space_v_vvvv, (cc_nVa, cc_nVa, cc_nVa, cc_n END_PROVIDER +BEGIN_PROVIDER [double precision, cc_space_v_vv_chol, (cholesky_mo_num, cc_nVa, cc_nVa)] + + implicit none + + call gen_v_space_chol(cc_nVa, cc_nVa, cc_list_vir, cc_list_vir, cc_space_v_vv_chol, cholesky_mo_num) + +END_PROVIDER + +BEGIN_PROVIDER [double precision, cc_space_v_vo_chol, (cholesky_mo_num, cc_nVa, cc_nOa)] + + implicit none + + call gen_v_space_chol(cc_nVa, cc_nOa, cc_list_vir, cc_list_occ, cc_space_v_vo_chol, cholesky_mo_num) + +END_PROVIDER + +BEGIN_PROVIDER [double precision, cc_space_v_ov_chol, (cholesky_mo_num, cc_nOa, cc_nVa)] + + implicit none + + call gen_v_space_chol(cc_nOa, cc_nVa, cc_list_occ, cc_list_vir, cc_space_v_ov_chol, cholesky_mo_num) + +END_PROVIDER + +BEGIN_PROVIDER [double precision, cc_space_v_oo_chol, (cholesky_mo_num, cc_nOa, cc_nOa)] + + implicit none + + call gen_v_space_chol(cc_nOa, cc_nOa, cc_list_occ, cc_list_occ, cc_space_v_oo_chol, cholesky_mo_num) + +END_PROVIDER + ! ppqq BEGIN_PROVIDER [double precision, cc_space_v_ppqq, (cc_n_mo, cc_n_mo)] diff --git a/src/utils_trust_region/rotation_matrix_iterative.irp.f b/src/utils_trust_region/rotation_matrix_iterative.irp.f index f268df04..db3d5c99 100644 --- a/src/utils_trust_region/rotation_matrix_iterative.irp.f +++ b/src/utils_trust_region/rotation_matrix_iterative.irp.f @@ -73,7 +73,7 @@ subroutine rotation_matrix_iterative(m,X,R) !print*,'R' !do i = 1, m - ! write(*,'(10(E12.5))') R(i,:) + ! write(*,'(10(ES12.5))') R(i,:) !enddo do i = 1, m @@ -82,7 +82,7 @@ subroutine rotation_matrix_iterative(m,X,R) !print*,'RRT' !do i = 1, m - ! write(*,'(10(E12.5))') RRT(i,:) + ! write(*,'(10(ES12.5))') RRT(i,:) !enddo max_elem = 0d0 diff --git a/src/utils_trust_region/trust_region_optimal_lambda.irp.f b/src/utils_trust_region/trust_region_optimal_lambda.irp.f index b7dcf875..e98bbfb7 100644 --- a/src/utils_trust_region/trust_region_optimal_lambda.irp.f +++ b/src/utils_trust_region/trust_region_optimal_lambda.irp.f @@ -336,7 +336,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 endif - !write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 + !write(*,'(a,ES12.5,a,ES12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 ! Newton's step y = -(1d0/DABS(d_2))*d_1 @@ -345,7 +345,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) if (DABS(y) > alpha) then y = alpha * (y/DABS(y)) ! preservation of the sign of y endif - !write(*,'(a,E12.5)') ' Step length: ', y + !write(*,'(a,ES12.5)') ' Step length: ', y ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2 @@ -414,7 +414,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) else alpha = 0.25d0 * alpha endif - !write(*,'(a,E12.5)') ' New trust length alpha: ', alpha + !write(*,'(a,ES12.5)') ' New trust length alpha: ', alpha ! cancellaion of the step if rho < 0.1 if (rho_2 < thresh_rho_2) then !0.1d0) then From c3164f34614b79fd6a603baf210073fb1a0a74ff Mon Sep 17 00:00:00 2001 From: eginer Date: Fri, 15 Sep 2023 11:48:34 +0200 Subject: [PATCH 291/337] erased Abdallahs commit --- external/ezfio | 2 +- external/irpf90 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/external/ezfio b/external/ezfio index ed1df9f3..d5805497 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit ed1df9f3c1f51752656ca98da5693a4119add05c +Subproject commit d5805497fa0ef30e70e055cde1ecec2963303e93 diff --git a/external/irpf90 b/external/irpf90 index 33ca5e10..0007f72f 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit 33ca5e1018f3bbb5e695e6ee558f5dac0753b271 +Subproject commit 0007f72f677fe7d61c5e1ed461882cb239517102 From 6ed31002d2b84d2f4db905f235262f026f139a2a Mon Sep 17 00:00:00 2001 From: eginer Date: Fri, 15 Sep 2023 12:01:18 +0200 Subject: [PATCH 292/337] added NEED in cas tc bi --- src/casscf_tc_bi/NEED | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 src/casscf_tc_bi/NEED diff --git a/src/casscf_tc_bi/NEED b/src/casscf_tc_bi/NEED new file mode 100644 index 00000000..b4c958e6 --- /dev/null +++ b/src/casscf_tc_bi/NEED @@ -0,0 +1,3 @@ +determinants +tc_bi_ortho +fci_tc_bi From babf1c0da43a2cae0c6b84699b64b020650f9c9a Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Sat, 16 Sep 2023 00:28:18 +0200 Subject: [PATCH 293/337] noL tested for Ne and O --- src/bi_ort_ints/no_dressing_energy.irp.f | 66 +++++++ src/bi_ort_ints/no_dressing_naive.irp.f | 20 +-- src/bi_ort_ints/no_dressing_v0.irp.f | 30 +--- src/bi_ort_ints/one_e_bi_ort.irp.f | 14 +- src/bi_ort_ints/three_body_ints_bi_ort.irp.f | 2 +- src/bi_ort_ints/total_twoe_pot.irp.f | 13 +- src/tc_bi_ortho/slater_tc_opt.irp.f | 48 ++--- src/tc_bi_ortho/slater_tc_opt_diag.irp.f | 118 ++++++------ src/tc_bi_ortho/slater_tc_opt_double.irp.f | 94 ++++++---- src/tc_bi_ortho/slater_tc_opt_single.irp.f | 178 ++++++++++--------- src/tc_bi_ortho/slater_tc_slow.irp.f | 89 ++++------ src/tc_bi_ortho/tc_hmat.irp.f | 33 ++-- src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 3 + 13 files changed, 409 insertions(+), 299 deletions(-) create mode 100644 src/bi_ort_ints/no_dressing_energy.irp.f diff --git a/src/bi_ort_ints/no_dressing_energy.irp.f b/src/bi_ort_ints/no_dressing_energy.irp.f new file mode 100644 index 00000000..30b2fa04 --- /dev/null +++ b/src/bi_ort_ints/no_dressing_energy.irp.f @@ -0,0 +1,66 @@ + +! --- + +BEGIN_PROVIDER [double precision, energy_1e_noL_HF] + + implicit none + integer :: i + + PROVIDE mo_bi_ortho_tc_one_e + + energy_1e_noL_HF = 0.d0 + do i = 1, elec_beta_num + energy_1e_noL_HF += mo_bi_ortho_tc_one_e(i,i) + enddo + do i = 1, elec_alpha_num + energy_1e_noL_HF += mo_bi_ortho_tc_one_e(i,i) + enddo + + print*, "energy_1e_noL_HF = ", energy_1e_noL_HF + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, energy_2e_noL_HF] + + implicit none + integer :: i, j + + PROVIDE mo_bi_ortho_tc_two_e + + energy_2e_noL_HF = 0.d0 + ! down-down & down-down + do i = 1, elec_beta_num + do j = 1, elec_beta_num + energy_2e_noL_HF += (mo_bi_ortho_tc_two_e(i,j,i,j) - mo_bi_ortho_tc_two_e(j,i,i,j)) + enddo + enddo + ! down-down & up-up + do i = 1, elec_beta_num + do j = 1, elec_alpha_num + energy_2e_noL_HF += mo_bi_ortho_tc_two_e(i,j,i,j) + enddo + enddo + ! up-up & down-down + do i = 1, elec_alpha_num + do j = 1, elec_beta_num + energy_2e_noL_HF += mo_bi_ortho_tc_two_e(i,j,i,j) + enddo + enddo + ! up-up & up-up + do i = 1, elec_alpha_num + do j = 1, elec_alpha_num + energy_2e_noL_HF += (mo_bi_ortho_tc_two_e(i,j,i,j) - mo_bi_ortho_tc_two_e(j,i,i,j)) + enddo + enddo + + ! 0.5 x is in the Slater-Condon rules and not in the integrals + energy_2e_noL_HF = 0.5d0 * energy_2e_noL_HF + + print*, "energy_2e_noL_HF = ", energy_2e_noL_HF + +END_PROVIDER + +! --- + diff --git a/src/bi_ort_ints/no_dressing_naive.irp.f b/src/bi_ort_ints/no_dressing_naive.irp.f index a0c488b3..abc80632 100644 --- a/src/bi_ort_ints/no_dressing_naive.irp.f +++ b/src/bi_ort_ints/no_dressing_naive.irp.f @@ -89,7 +89,7 @@ BEGIN_PROVIDER [double precision, noL_0e_naive] !$OMP END DO !$OMP END PARALLEL - noL_0e_naive = -1.d0 * (-sum(tmp)) / 6.d0 + noL_0e_naive = -1.d0 * (sum(tmp)) / 6.d0 deallocate(tmp) @@ -182,9 +182,8 @@ BEGIN_PROVIDER [double precision, noL_1e_naive, (mo_num, mo_num)] , j, sigma_j, s, sigma_s, i, sigma_i & , I_pij_jsi) - ! x (-1) because integrals are over -L ! x 0.5 because we consider 0.5 (up + down) - noL_1e_naive(p,s) = noL_1e_naive(p,s) + 0.25d0 * (I_pij_sji - I_pij_sij + I_pij_jis - I_pij_ijs + I_pij_isj - I_pij_jsi) + noL_1e_naive(p,s) = noL_1e_naive(p,s) - 0.25d0 * (I_pij_sji - I_pij_sij + I_pij_jis - I_pij_ijs + I_pij_isj - I_pij_jsi) enddo ! j enddo ! i enddo ! s @@ -254,9 +253,8 @@ BEGIN_PROVIDER [double precision, noL_1e_naive, (mo_num, mo_num)] , j, sigma_j, s, sigma_s, i, sigma_i & , I_pij_jsi) - ! x (-1) because integrals are over -L ! x 0.5 because we consider 0.5 (up + down) - noL_1e_naive(p,s) = noL_1e_naive(p,s) + 0.25d0 * (I_pij_sji - I_pij_sij + I_pij_jis - I_pij_ijs + I_pij_isj - I_pij_jsi) + noL_1e_naive(p,s) = noL_1e_naive(p,s) - 0.25d0 * (I_pij_sji - I_pij_sij + I_pij_jis - I_pij_ijs + I_pij_isj - I_pij_jsi) enddo ! j enddo ! i enddo ! s @@ -335,9 +333,8 @@ BEGIN_PROVIDER [double precision, noL_2e_naive, (mo_num, mo_num, mo_num, mo_num) , t, sigma_t, s, sigma_s, i, sigma_i & , I_ipq_tsi) - ! x (-1) because integrals are over -L ! x 0.25 because we consider 0.25 (up-up + up-down + down-up + down-down) - noL_2e_naive(p,q,s,t) = noL_2e_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) + noL_2e_naive(p,q,s,t) = noL_2e_naive(p,q,s,t) - 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) enddo ! i enddo ! p enddo ! q @@ -389,9 +386,8 @@ BEGIN_PROVIDER [double precision, noL_2e_naive, (mo_num, mo_num, mo_num, mo_num) , t, sigma_t, s, sigma_s, i, sigma_i & , I_ipq_tsi) - ! x (-1) because integrals are over -L ! x 0.25 because we consider 0.25 (up-up + up-down + down-up + down-down) - noL_2e_naive(p,q,s,t) = noL_2e_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) + noL_2e_naive(p,q,s,t) = noL_2e_naive(p,q,s,t) - 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) enddo ! i enddo ! p enddo ! q @@ -443,9 +439,8 @@ BEGIN_PROVIDER [double precision, noL_2e_naive, (mo_num, mo_num, mo_num, mo_num) , t, sigma_t, s, sigma_s, i, sigma_i & , I_ipq_tsi) - ! x (-1) because integrals are over -L ! x 0.25 because we consider 0.25 (up-up + up-down + down-up + down-down) - noL_2e_naive(p,q,s,t) = noL_2e_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) + noL_2e_naive(p,q,s,t) = noL_2e_naive(p,q,s,t) - 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) enddo ! i enddo ! p enddo ! q @@ -497,9 +492,8 @@ BEGIN_PROVIDER [double precision, noL_2e_naive, (mo_num, mo_num, mo_num, mo_num) , t, sigma_t, s, sigma_s, i, sigma_i & , I_ipq_tsi) - ! x (-1) because integrals are over -L ! x 0.25 because we consider 0.25 (up-up + up-down + down-up + down-down) - noL_2e_naive(p,q,s,t) = noL_2e_naive(p,q,s,t) + 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) + noL_2e_naive(p,q,s,t) = noL_2e_naive(p,q,s,t) - 0.125d0 * (I_ipq_ist - I_ipq_sit - I_ipq_tsi) enddo ! i enddo ! p enddo ! q diff --git a/src/bi_ort_ints/no_dressing_v0.irp.f b/src/bi_ort_ints/no_dressing_v0.irp.f index efcf51db..3b252f8e 100644 --- a/src/bi_ort_ints/no_dressing_v0.irp.f +++ b/src/bi_ort_ints/no_dressing_v0.irp.f @@ -40,7 +40,7 @@ BEGIN_PROVIDER [double precision, noL_0e] !$OMP END DO !$OMP END PARALLEL - noL_0e = -1.d0 * (-sum(tmp)) / 6.d0 + noL_0e = -1.d0 * (sum(tmp)) / 6.d0 deallocate(tmp) @@ -114,7 +114,7 @@ BEGIN_PROVIDER [double precision, noL_0e] !$OMP END DO !$OMP END PARALLEL - noL_0e = -1.d0 * (-sum(tmp)) / 6.d0 + noL_0e = -1.d0 * (sum(tmp)) / 6.d0 deallocate(tmp) @@ -131,12 +131,6 @@ END_PROVIDER BEGIN_PROVIDER [double precision, noL_1e, (mo_num, mo_num)] - BEGIN_DOC - ! - ! x (-1) because integrals are over -L - ! - END_DOC - implicit none integer :: p, s, i, j double precision :: I_pij_sij, I_pij_isj, I_pij_ijs, I_pij_sji, I_pij_jsi, I_pij_jis @@ -167,7 +161,7 @@ BEGIN_PROVIDER [double precision, noL_1e, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) - noL_1e(p,s) = noL_1e(p,s) - (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) + noL_1e(p,s) = noL_1e(p,s) + (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) enddo enddo enddo @@ -197,7 +191,7 @@ BEGIN_PROVIDER [double precision, noL_1e, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) - noL_1e(p,s) = noL_1e(p,s) - (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) + noL_1e(p,s) = noL_1e(p,s) + (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) enddo ! j enddo ! i @@ -211,7 +205,7 @@ BEGIN_PROVIDER [double precision, noL_1e, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(p, i, j, i, s, j, I_pij_isj) call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) - noL_1e(p,s) = noL_1e(p,s) + 0.5d0 * (2.d0*I_pij_sji - I_pij_jsi + 2.d0*I_pij_jis - 4.d0*I_pij_sij + 2.d0*I_pij_isj - I_pij_ijs) + noL_1e(p,s) = noL_1e(p,s) - 0.5d0 * (2.d0*I_pij_sji - I_pij_jsi + 2.d0*I_pij_jis - 4.d0*I_pij_sij + 2.d0*I_pij_isj - I_pij_ijs) enddo ! j do j = elec_beta_num+1, elec_alpha_num @@ -221,7 +215,7 @@ BEGIN_PROVIDER [double precision, noL_1e, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) - noL_1e(p,s) = noL_1e(p,s) - 0.5d0 * (I_pij_sij - I_pij_isj + I_pij_ijs - I_pij_sji) + noL_1e(p,s) = noL_1e(p,s) + 0.5d0 * (I_pij_sij - I_pij_isj + I_pij_ijs - I_pij_sji) enddo ! j enddo ! i @@ -241,12 +235,6 @@ END_PROVIDER BEGIN_PROVIDER [double precision, noL_2e, (mo_num, mo_num, mo_num, mo_num)] - BEGIN_DOC - ! - ! x (-1) because integrals are over -L - ! - END_DOC - implicit none integer :: p, q, s, t, i double precision :: I_ipq_sit, I_ipq_tsi, I_ipq_ist @@ -276,7 +264,7 @@ BEGIN_PROVIDER [double precision, noL_2e, (mo_num, mo_num, mo_num, mo_num)] call give_integrals_3_body_bi_ort(i, p, q, t, s, i, I_ipq_tsi) call give_integrals_3_body_bi_ort(i, p, q, i, s, t, I_ipq_ist) - noL_2e(p,q,s,t) = noL_2e(p,q,s,t) - 0.5d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) + noL_2e(p,q,s,t) = noL_2e(p,q,s,t) + 0.5d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) enddo enddo enddo @@ -306,7 +294,7 @@ BEGIN_PROVIDER [double precision, noL_2e, (mo_num, mo_num, mo_num, mo_num)] call give_integrals_3_body_bi_ort(i, p, q, t, s, i, I_ipq_tsi) call give_integrals_3_body_bi_ort(i, p, q, i, s, t, I_ipq_ist) - noL_2e(p,q,s,t) = noL_2e(p,q,s,t) - 0.5d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) + noL_2e(p,q,s,t) = noL_2e(p,q,s,t) + 0.5d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) enddo ! i do i = elec_beta_num+1, elec_alpha_num @@ -315,7 +303,7 @@ BEGIN_PROVIDER [double precision, noL_2e, (mo_num, mo_num, mo_num, mo_num)] call give_integrals_3_body_bi_ort(i, p, q, t, s, i, I_ipq_tsi) call give_integrals_3_body_bi_ort(i, p, q, i, s, t, I_ipq_ist) - noL_2e(p,q,s,t) = noL_2e(p,q,s,t) - 0.25d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) + noL_2e(p,q,s,t) = noL_2e(p,q,s,t) + 0.25d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) enddo ! i enddo ! p diff --git a/src/bi_ort_ints/one_e_bi_ort.irp.f b/src/bi_ort_ints/one_e_bi_ort.irp.f index 7c2ac860..0ecc2a84 100644 --- a/src/bi_ort_ints/one_e_bi_ort.irp.f +++ b/src/bi_ort_ints/one_e_bi_ort.irp.f @@ -53,12 +53,14 @@ END_PROVIDER BEGIN_PROVIDER [double precision, mo_bi_orth_bipole_x , (mo_num,mo_num)] &BEGIN_PROVIDER [double precision, mo_bi_orth_bipole_y , (mo_num,mo_num)] &BEGIN_PROVIDER [double precision, mo_bi_orth_bipole_z , (mo_num,mo_num)] - BEGIN_DOC - ! array of the integrals of Left MO_i * x Right MO_j - ! array of the integrals of Left MO_i * y Right MO_j - ! array of the integrals of Left MO_i * z Right MO_j - END_DOC - implicit none + + BEGIN_DOC + ! array of the integrals of Left MO_i * x Right MO_j + ! array of the integrals of Left MO_i * y Right MO_j + ! array of the integrals of Left MO_i * z Right MO_j + END_DOC + + implicit none call ao_to_mo_bi_ortho( & ao_dipole_x, & diff --git a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f index cb5c08cf..56d8146f 100644 --- a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f +++ b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f @@ -126,7 +126,7 @@ subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral) BEGIN_DOC ! - ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS + ! < n l k | L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS ! END_DOC diff --git a/src/bi_ort_ints/total_twoe_pot.irp.f b/src/bi_ort_ints/total_twoe_pot.irp.f index 49f613b5..37a31a51 100644 --- a/src/bi_ort_ints/total_twoe_pot.irp.f +++ b/src/bi_ort_ints/total_twoe_pot.irp.f @@ -258,7 +258,8 @@ BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e, (mo_num, mo_num, mo_num, if(noL_standard) then PROVIDE noL_2e - mo_bi_ortho_tc_two_e = mo_bi_ortho_tc_two_e + noL_2e + ! x 2 because of the Slater-Condon rules convention + mo_bi_ortho_tc_two_e = mo_bi_ortho_tc_two_e + 2.d0 * noL_2e FREE noL_2e endif @@ -272,9 +273,11 @@ END_PROVIDER &BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_two_e_jj_anti, (mo_num,mo_num)] BEGIN_DOC - ! mo_bi_ortho_tc_two_e_jj(i,j) = J_ij = + ! + ! mo_bi_ortho_tc_two_e_jj (i,j) = J_ij = ! mo_bi_ortho_tc_two_e_jj_exchange(i,j) = K_ij = - ! mo_bi_ortho_tc_two_e_jj_anti(i,j) = J_ij - K_ij + ! mo_bi_ortho_tc_two_e_jj_anti (i,j) = J_ij - K_ij + ! END_DOC implicit none @@ -285,9 +288,9 @@ END_PROVIDER do i = 1, mo_num do j = 1, mo_num - mo_bi_ortho_tc_two_e_jj(i,j) = mo_bi_ortho_tc_two_e(j,i,j,i) + mo_bi_ortho_tc_two_e_jj (i,j) = mo_bi_ortho_tc_two_e(j,i,j,i) mo_bi_ortho_tc_two_e_jj_exchange(i,j) = mo_bi_ortho_tc_two_e(i,j,j,i) - mo_bi_ortho_tc_two_e_jj_anti(i,j) = mo_bi_ortho_tc_two_e_jj(i,j) - mo_bi_ortho_tc_two_e_jj_exchange(i,j) + mo_bi_ortho_tc_two_e_jj_anti (i,j) = mo_bi_ortho_tc_two_e_jj(i,j) - mo_bi_ortho_tc_two_e_jj_exchange(i,j) enddo enddo diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index c3a5ba6b..f7c9b7b3 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -55,7 +55,7 @@ subroutine htilde_mu_mat_opt_bi_ortho_tot(key_j, key_i, Nint, htot) integer, intent(in) :: Nint integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2) double precision, intent(out) :: htot - double precision :: hmono, htwoe, hthree + double precision :: hmono, htwoe, hthree call htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot) @@ -90,26 +90,33 @@ subroutine htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, hthree = 0.d0 call get_excitation_degree(key_i, key_j, degree, Nint) - if(.not.pure_three_body_h_tc)then - if(degree.gt.2) return - if(degree == 0) then - call diag_htilde_mu_mat_fock_bi_ortho (Nint, key_i, hmono, htwoe, hthree, htot) - else if (degree == 1) then - call single_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i , hmono, htwoe, hthree, htot) - else if(degree == 2) then - call double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) - endif + + if(.not.pure_three_body_h_tc) then + + if(degree .gt. 2) return + + if(degree == 0) then + call diag_htilde_mu_mat_fock_bi_ortho (Nint, key_i, hmono, htwoe, hthree, htot) + else if (degree == 1) then + call single_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i , hmono, htwoe, hthree, htot) + else if(degree == 2) then + call double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) + endif + else - if(degree.gt.3) return - if(degree == 0) then - call diag_htilde_mu_mat_fock_bi_ortho (Nint, key_i, hmono, htwoe, hthree, htot) - else if (degree == 1) then - call single_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i , hmono, htwoe, hthree, htot) - else if(degree == 2) then - call double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) - else - call triple_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) - endif + + if(degree .gt. 3) return + + if(degree == 0) then + call diag_htilde_mu_mat_fock_bi_ortho (Nint, key_i, hmono, htwoe, hthree, htot) + else if (degree == 1) then + call single_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i , hmono, htwoe, hthree, htot) + else if(degree == 2) then + call double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) + else + call triple_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) + endif + endif if(degree==0) then @@ -161,3 +168,4 @@ subroutine htilde_mu_mat_opt_bi_ortho_no_3e(key_j, key_i, Nint, htot) end ! --- + diff --git a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f index 367d90dd..cc1a0603 100644 --- a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f @@ -7,7 +7,9 @@ &BEGIN_PROVIDER [ double precision, ref_tc_energy_3e] BEGIN_DOC + ! ! Various component of the TC energy for the reference "HF" Slater determinant + ! END_DOC implicit none @@ -41,7 +43,9 @@ END_PROVIDER subroutine diag_htilde_mu_mat_fock_bi_ortho(Nint, det_in, hmono, htwoe, hthree, htot) BEGIN_DOC + ! ! Computes $\langle i|H|i \rangle$. + ! END_DOC implicit none @@ -63,7 +67,7 @@ subroutine diag_htilde_mu_mat_fock_bi_ortho(Nint, det_in, hmono, htwoe, hthree, nexc(1) = 0 nexc(2) = 0 - do i=1,Nint + do i = 1, Nint hole(i,1) = xor(det_in(i,1),ref_bitmask(i,1)) hole(i,2) = xor(det_in(i,2),ref_bitmask(i,2)) particle(i,1) = iand(hole(i,1),det_in(i,1)) @@ -124,6 +128,7 @@ end subroutine ac_tc_operator(iorb, ispin, key, hmono, htwoe, hthree, Nint, na, nb) BEGIN_DOC + ! ! Routine that computes one- and two-body energy corresponding ! ! to the ADDITION of an electron in an orbital 'iorb' of spin 'ispin' @@ -133,6 +138,7 @@ subroutine ac_tc_operator(iorb, ispin, key, hmono, htwoe, hthree, Nint, na, nb) ! in output, the determinant key is changed by the ADDITION of that electron ! ! and the quantities hmono,htwoe,hthree are INCREMENTED + ! END_DOC use bitmasks @@ -188,8 +194,8 @@ subroutine ac_tc_operator(iorb, ispin, key, hmono, htwoe, hthree, Nint, na, nb) enddo if(three_body_h_tc .and. (elec_num.gt.2) .and. three_e_3_idx_term) then - !!!!! 3-e part + !! same-spin/same-spin do j = 1, na jj = occ(j,ispin) @@ -220,16 +226,19 @@ subroutine ac_tc_operator(iorb, ispin, key, hmono, htwoe, hthree, Nint, na, nb) enddo endif - na = na+1 + na = na + 1 end ! --- -subroutine a_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb) +subroutine a_tc_operator(iorb, ispin, key, hmono, htwoe, hthree, Nint, na, nb) + use bitmasks implicit none + BEGIN_DOC + ! ! Routine that computes one- and two-body energy corresponding ! ! to the REMOVAL of an electron in an orbital 'iorb' of spin 'ispin' @@ -239,17 +248,19 @@ subroutine a_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb) ! in output, the determinant key is changed by the REMOVAL of that electron ! ! and the quantities hmono,htwoe,hthree are INCREMENTED + ! END_DOC - integer, intent(in) :: iorb, ispin, Nint - integer, intent(inout) :: na, nb + + integer, intent(in) :: iorb, ispin, Nint + integer, intent(inout) :: na, nb integer(bit_kind), intent(inout) :: key(Nint,2) - double precision, intent(inout) :: hmono,htwoe,hthree + double precision, intent(inout) :: hmono,htwoe,hthree - double precision :: direct_int, exchange_int - integer :: occ(Nint*bit_kind_size,2) - integer :: other_spin - integer :: k,l,i,jj,mm,j,m - integer :: tmp(2) + double precision :: direct_int, exchange_int + integer :: occ(Nint*bit_kind_size,2) + integer :: other_spin + integer :: k, l, i, jj, mm, j, m + integer :: tmp(2) ASSERT (iorb > 0) ASSERT (ispin > 0) @@ -269,60 +280,63 @@ subroutine a_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb) hmono = hmono - mo_bi_ortho_tc_one_e(iorb,iorb) ! Same spin - do i=1,na - htwoe= htwoe- mo_bi_ortho_tc_two_e_jj_anti(occ(i,ispin),iorb) + do i = 1, na + htwoe = htwoe - mo_bi_ortho_tc_two_e_jj_anti(occ(i,ispin),iorb) enddo ! Opposite spin - do i=1,nb - htwoe= htwoe- mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb) + do i = 1, nb + htwoe = htwoe - mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb) enddo - if(three_body_h_tc.and.elec_num.gt.2.and.three_e_3_idx_term)then - !!!!! 3-e part - !! same-spin/same-spin - do j = 1, na - jj = occ(j,ispin) - do m = j+1, na - mm = occ(m,ispin) - hthree -= three_e_diag_parrallel_spin_prov(mm,jj,iorb) + if(three_body_h_tc .and. elec_num.gt.2 .and. three_e_3_idx_term) then + !!!!! 3-e part + + !! same-spin/same-spin + do j = 1, na + jj = occ(j,ispin) + do m = j+1, na + mm = occ(m,ispin) + hthree -= three_e_diag_parrallel_spin_prov(mm,jj,iorb) + enddo enddo - enddo - !! same-spin/oposite-spin - do j = 1, na - jj = occ(j,ispin) - do m = 1, nb - mm = occ(m,other_spin) - direct_int = three_e_3_idx_direct_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR - exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR - hthree -= (direct_int - exchange_int) - enddo - enddo - !! oposite-spin/opposite-spin + !! same-spin/oposite-spin + do j = 1, na + jj = occ(j,ispin) + do m = 1, nb + mm = occ(m,other_spin) + direct_int = three_e_3_idx_direct_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR + exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR + hthree -= (direct_int - exchange_int) + enddo + enddo + !! oposite-spin/opposite-spin do j = 1, nb - jj = occ(j,other_spin) - do m = j+1, nb - mm = occ(m,other_spin) - direct_int = three_e_3_idx_direct_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR - exchange_int = three_e_3_idx_exch23_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR - hthree -= (direct_int - exchange_int) - enddo + jj = occ(j,other_spin) + do m = j+1, nb + mm = occ(m,other_spin) + direct_int = three_e_3_idx_direct_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR + exchange_int = three_e_3_idx_exch23_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR + hthree -= (direct_int - exchange_int) + enddo enddo endif end +! --- subroutine diag_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, det_in,htot) - implicit none + BEGIN_DOC ! Computes $\langle i|H|i \rangle$. WITHOUT ANY CONTRIBUTIONS FROM 3E TERMS END_DOC - integer,intent(in) :: Nint - integer(bit_kind),intent(in) :: det_in(Nint,2) - double precision, intent(out) :: htot - double precision :: hmono,htwoe + implicit none + integer, intent(in) :: Nint + integer(bit_kind), intent(in) :: det_in(Nint,2) + double precision, intent(out) :: htot + double precision :: hmono, htwoe integer(bit_kind) :: hole(Nint,2) integer(bit_kind) :: particle(Nint,2) integer :: i, nexc(2), ispin @@ -349,15 +363,15 @@ subroutine diag_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, det_in,htot) nexc(2) = nexc(2) + popcnt(hole(i,2)) enddo - if (nexc(1)+nexc(2) == 0) then + if(nexc(1)+nexc(2) == 0) then hmono = ref_tc_energy_1e htwoe = ref_tc_energy_2e - htot = ref_tc_energy_tot + htot = ref_tc_energy_tot return endif !call debug_det(det_in,Nint) - integer :: tmp(2) + integer :: tmp(2) !DIR$ FORCEINLINE call bitstring_to_list_ab(particle, occ_particle, tmp, Nint) ASSERT (tmp(1) == nexc(1)) ! Number of particles alpha @@ -367,8 +381,8 @@ subroutine diag_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, det_in,htot) ASSERT (tmp(1) == nexc(1)) ! Number of holes alpha ASSERT (tmp(2) == nexc(2)) ! Number of holes beta - det_tmp = ref_bitmask + hmono = ref_tc_energy_1e htwoe = ref_tc_energy_2e do ispin=1,2 diff --git a/src/tc_bi_ortho/slater_tc_opt_double.irp.f b/src/tc_bi_ortho/slater_tc_opt_double.irp.f index bd59583f..4067473c 100644 --- a/src/tc_bi_ortho/slater_tc_opt_double.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_double.irp.f @@ -1,4 +1,6 @@ +! --- + subroutine double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) BEGIN_DOC @@ -29,55 +31,77 @@ subroutine double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree = 0.d0 htot = 0.d0 - if(degree.ne.2)then - return + if(degree .ne. 2) then + return endif - integer :: degree_i,degree_j - call get_excitation_degree(ref_bitmask,key_i,degree_i,N_int) - call get_excitation_degree(ref_bitmask,key_j,degree_j,N_int) + + integer :: degree_i, degree_j + call get_excitation_degree(ref_bitmask, key_i, degree_i, N_int) + call get_excitation_degree(ref_bitmask, key_j, degree_j, N_int) call get_double_excitation(key_i, key_j, exc, phase, Nint) call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2) - if(s1.ne.s2)then - ! opposite spin two-body - htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) - if(three_body_h_tc.and.elec_num.gt.2)then - if(.not.double_normal_ord.and.three_e_5_idx_term)then - if(degree_i>degree_j)then - call three_comp_two_e_elem(key_j,h1,h2,p1,p2,s1,s2,hthree) - else - call three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree) + if(s1 .ne. s2) then + ! opposite spin two-body + + htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) + + if(three_body_h_tc .and. (elec_num .gt. 2)) then + ! add 3-e term + + if(.not.double_normal_ord .and. three_e_5_idx_term) then + ! 5-idx approx + + if(degree_i > degree_j) then + call three_comp_two_e_elem(key_j,h1,h2,p1,p2,s1,s2,hthree) + else + call three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree) + endif + + elseif(double_normal_ord) then + ! noL a la Manu + + htwoe += normal_two_body_bi_orth(p2,h2,p1,h1) endif - elseif(double_normal_ord)then - htwoe += normal_two_body_bi_orth(p2,h2,p1,h1) - endif endif + else - ! same spin two-body - ! direct terms - htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) - ! exchange terms - htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) - if(three_body_h_tc.and.elec_num.gt.2)then - if(.not.double_normal_ord.and.three_e_5_idx_term)then - if(degree_i>degree_j)then - call three_comp_two_e_elem(key_j,h1,h2,p1,p2,s1,s2,hthree) - else - call three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree) - endif - elseif(double_normal_ord)then - htwoe -= normal_two_body_bi_orth(h2,p1,h1,p2) - htwoe += normal_two_body_bi_orth(h1,p1,h2,p2) + ! same spin two-body + + ! direct terms + htwoe = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) + + ! exchange terms + htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) + + if(three_body_h_tc .and. (elec_num .gt. 2)) then + ! add 3-e term + + if(.not.double_normal_ord.and.three_e_5_idx_term)then + ! 5-idx approx + + if(degree_i > degree_j) then + call three_comp_two_e_elem(key_j,h1,h2,p1,p2,s1,s2,hthree) + else + call three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree) + endif + + elseif(double_normal_ord) then + ! noL a la Manu + + htwoe -= normal_two_body_bi_orth(h2,p1,h1,p2) + htwoe += normal_two_body_bi_orth(h1,p1,h2,p2) + endif endif - endif endif + hthree *= phase htwoe *= phase - htot = htwoe + hthree + htot = htwoe + hthree end - +! --- subroutine three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree) implicit none diff --git a/src/tc_bi_ortho/slater_tc_opt_single.irp.f b/src/tc_bi_ortho/slater_tc_opt_single.irp.f index ddcd1e66..81bf69f4 100644 --- a/src/tc_bi_ortho/slater_tc_opt_single.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt_single.irp.f @@ -1,12 +1,16 @@ +! --- + +subroutine single_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) -subroutine single_htilde_mu_mat_fock_bi_ortho (Nint, key_j, key_i, hmono, htwoe, hthree, htot) BEGIN_DOC + ! ! for single excitation ONLY FOR ONE- AND TWO-BODY TERMS !! !! WARNING !! ! ! Non hermitian !! + ! END_DOC use bitmasks @@ -31,93 +35,105 @@ subroutine single_htilde_mu_mat_fock_bi_ortho (Nint, key_j, key_i, hmono, htwoe htwoe = 0.d0 hthree = 0.d0 htot = 0.d0 + call get_excitation_degree(key_i, key_j, degree, Nint) - if(degree.ne.1)then - return + if(degree .ne. 1) then + return endif + call bitstring_to_list_ab(key_i, occ, Ne, Nint) - call get_single_excitation(key_i, key_j, exc, phase, Nint) - call decode_exc(exc,1,h1,p1,h2,p2,s1,s2) - call get_single_excitation_from_fock_tc(key_i,key_j,h1,p1,s1,phase,hmono,htwoe,hthree,htot) -end - - -subroutine get_single_excitation_from_fock_tc(key_i,key_j,h,p,spin,phase,hmono,htwoe,hthree,htot) - use bitmasks - implicit none - integer,intent(in) :: h,p,spin - double precision, intent(in) :: phase - integer(bit_kind), intent(in) :: key_i(N_int,2), key_j(N_int,2) - double precision, intent(out) :: hmono,htwoe,hthree,htot - integer(bit_kind) :: differences(N_int,2) - integer(bit_kind) :: hole(N_int,2) - integer(bit_kind) :: partcl(N_int,2) - integer :: occ_hole(N_int*bit_kind_size,2) - integer :: occ_partcl(N_int*bit_kind_size,2) - integer :: n_occ_ab_hole(2),n_occ_ab_partcl(2) - integer :: i0,i - double precision :: buffer_c(mo_num),buffer_x(mo_num) - do i=1, mo_num - buffer_c(i) = tc_2e_3idx_coulomb_integrals(i,p,h) - buffer_x(i) = tc_2e_3idx_exchange_integrals(i,p,h) - enddo - do i = 1, N_int - differences(i,1) = xor(key_i(i,1),ref_closed_shell_bitmask(i,1)) - differences(i,2) = xor(key_i(i,2),ref_closed_shell_bitmask(i,2)) - hole(i,1) = iand(differences(i,1),ref_closed_shell_bitmask(i,1)) - hole(i,2) = iand(differences(i,2),ref_closed_shell_bitmask(i,2)) - partcl(i,1) = iand(differences(i,1),key_i(i,1)) - partcl(i,2) = iand(differences(i,2),key_i(i,2)) - enddo - call bitstring_to_list_ab(hole, occ_hole, n_occ_ab_hole, N_int) - call bitstring_to_list_ab(partcl, occ_partcl, n_occ_ab_partcl, N_int) - hmono = mo_bi_ortho_tc_one_e(p,h) - htwoe = fock_op_2_e_tc_closed_shell(p,h) - ! holes :: direct terms - do i0 = 1, n_occ_ab_hole(1) - i = occ_hole(i0,1) - htwoe -= buffer_c(i) - enddo - do i0 = 1, n_occ_ab_hole(2) - i = occ_hole(i0,2) - htwoe -= buffer_c(i) - enddo - - ! holes :: exchange terms - do i0 = 1, n_occ_ab_hole(spin) - i = occ_hole(i0,spin) - htwoe += buffer_x(i) - enddo - - ! particles :: direct terms - do i0 = 1, n_occ_ab_partcl(1) - i = occ_partcl(i0,1) - htwoe += buffer_c(i) - enddo - do i0 = 1, n_occ_ab_partcl(2) - i = occ_partcl(i0,2) - htwoe += buffer_c(i) - enddo - - ! particles :: exchange terms - do i0 = 1, n_occ_ab_partcl(spin) - i = occ_partcl(i0,spin) - htwoe -= buffer_x(i) - enddo - hthree = 0.d0 - if (three_body_h_tc.and.elec_num.gt.2.and.three_e_4_idx_term)then - call three_comp_fock_elem(key_i,h,p,spin,hthree) - endif - - - htwoe = htwoe * phase - hmono = hmono * phase - hthree = hthree * phase - htot = htwoe + hmono + hthree + call decode_exc(exc, 1, h1, p1, h2, p2, s1, s2) + call get_single_excitation_from_fock_tc(key_i, key_j, h1, p1, s1, phase, hmono, htwoe, hthree, htot) end +! --- + +subroutine get_single_excitation_from_fock_tc(key_i, key_j, h, p, spin, phase, hmono, htwoe, hthree, htot) + + use bitmasks + + implicit none + integer, intent(in) :: h, p, spin + double precision, intent(in) :: phase + integer(bit_kind), intent(in) :: key_i(N_int,2), key_j(N_int,2) + double precision, intent(out) :: hmono, htwoe, hthree, htot + + integer(bit_kind) :: differences(N_int,2) + integer(bit_kind) :: hole(N_int,2) + integer(bit_kind) :: partcl(N_int,2) + integer :: occ_hole(N_int*bit_kind_size,2) + integer :: occ_partcl(N_int*bit_kind_size,2) + integer :: n_occ_ab_hole(2),n_occ_ab_partcl(2) + integer :: i0,i + double precision :: buffer_c(mo_num),buffer_x(mo_num) + + do i = 1, mo_num + buffer_c(i) = tc_2e_3idx_coulomb_integrals (i,p,h) + buffer_x(i) = tc_2e_3idx_exchange_integrals(i,p,h) + enddo + + do i = 1, N_int + differences(i,1) = xor(key_i(i,1), ref_closed_shell_bitmask(i,1)) + differences(i,2) = xor(key_i(i,2), ref_closed_shell_bitmask(i,2)) + hole (i,1) = iand(differences(i,1), ref_closed_shell_bitmask(i,1)) + hole (i,2) = iand(differences(i,2), ref_closed_shell_bitmask(i,2)) + partcl (i,1) = iand(differences(i,1), key_i(i,1)) + partcl (i,2) = iand(differences(i,2), key_i(i,2)) + enddo + + call bitstring_to_list_ab(hole, occ_hole, n_occ_ab_hole, N_int) + call bitstring_to_list_ab(partcl, occ_partcl, n_occ_ab_partcl, N_int) + hmono = mo_bi_ortho_tc_one_e(p,h) + htwoe = fock_op_2_e_tc_closed_shell(p,h) + + ! holes :: direct terms + do i0 = 1, n_occ_ab_hole(1) + i = occ_hole(i0,1) + htwoe -= buffer_c(i) + enddo + do i0 = 1, n_occ_ab_hole(2) + i = occ_hole(i0,2) + htwoe -= buffer_c(i) + enddo + + ! holes :: exchange terms + do i0 = 1, n_occ_ab_hole(spin) + i = occ_hole(i0,spin) + htwoe += buffer_x(i) + enddo + + ! particles :: direct terms + do i0 = 1, n_occ_ab_partcl(1) + i = occ_partcl(i0,1) + htwoe += buffer_c(i) + enddo + do i0 = 1, n_occ_ab_partcl(2) + i = occ_partcl(i0,2) + htwoe += buffer_c(i) + enddo + + ! particles :: exchange terms + do i0 = 1, n_occ_ab_partcl(spin) + i = occ_partcl(i0,spin) + htwoe -= buffer_x(i) + enddo + + hthree = 0.d0 + if (three_body_h_tc .and. elec_num.gt.2 .and. three_e_4_idx_term) then + call three_comp_fock_elem(key_i, h, p, spin, hthree) + endif + + htwoe = htwoe * phase + hmono = hmono * phase + hthree = hthree * phase + htot = htwoe + hmono + hthree + +end + +! --- + subroutine three_comp_fock_elem(key_i,h_fock,p_fock,ispin_fock,hthree) implicit none integer,intent(in) :: h_fock,p_fock,ispin_fock diff --git a/src/tc_bi_ortho/slater_tc_slow.irp.f b/src/tc_bi_ortho/slater_tc_slow.irp.f index 83a56d2d..b1751069 100644 --- a/src/tc_bi_ortho/slater_tc_slow.irp.f +++ b/src/tc_bi_ortho/slater_tc_slow.irp.f @@ -81,8 +81,14 @@ subroutine htilde_mu_mat_bi_ortho_slow(key_j, key_i, Nint, hmono, htwoe, hthree, endif htot = hmono + htwoe + hthree + if(degree==0) then htot += nuclear_repulsion + + if(noL_standard) then + PROVIDE noL_0e + htot += noL_0e + endif endif end @@ -92,7 +98,9 @@ end subroutine diag_htilde_mu_mat_bi_ortho_slow(Nint, key_i, hmono, htwoe, htot) BEGIN_DOC - ! diagonal element of htilde ONLY FOR ONE- AND TWO-BODY TERMS + ! + ! diagonal element of htilde ONLY FOR ONE- AND TWO-BODY TERMS + ! END_DOC use bitmasks @@ -108,78 +116,53 @@ subroutine diag_htilde_mu_mat_bi_ortho_slow(Nint, key_i, hmono, htwoe, htot) PROVIDE mo_bi_ortho_tc_two_e -! PROVIDE mo_two_e_integrals_tc_int_in_map mo_bi_ortho_tc_two_e -! -! PROVIDE mo_integrals_erf_map core_energy nuclear_repulsion core_bitmask -! PROVIDE core_fock_operator -! -! PROVIDE j1b_gauss + hmono = 0.d0 + htwoe = 0.d0 + htot = 0.d0 -! if(core_tc_op)then -! print*,'core_tc_op not already taken into account for bi ortho' -! print*,'stopping ...' -! stop -! do i = 1, Nint -! key_i_core(i,1) = xor(key_i(i,1),core_bitmask(i,1)) -! key_i_core(i,2) = xor(key_i(i,2),core_bitmask(i,2)) -! enddo -! call bitstring_to_list_ab(key_i_core, occ, Ne, Nint) -! hmono = core_energy - nuclear_repulsion -! else - call bitstring_to_list_ab(key_i, occ, Ne, Nint) - hmono = 0.d0 -! endif - htwoe= 0.d0 - htot = 0.d0 + call bitstring_to_list_ab(key_i, occ, Ne, Nint) do ispin = 1, 2 - do i = 1, Ne(ispin) ! - ii = occ(i,ispin) - hmono += mo_bi_ortho_tc_one_e(ii,ii) - -! if(core_tc_op)then -! print*,'core_tc_op not already taken into account for bi ortho' -! print*,'stopping ...' -! stop -! hmono += core_fock_operator(ii,ii) ! add the usual Coulomb - Exchange from the core -! endif - enddo + do i = 1, Ne(ispin) + ii = occ(i,ispin) + hmono += mo_bi_ortho_tc_one_e(ii,ii) + enddo enddo - - ! alpha/beta two-body - ispin = 1 - jspin = 2 - do i = 1, Ne(ispin) ! electron 1 (so it can be associated to mu(r1)) + ! alpha/beta two-body + ispin = 1 + jspin = 2 + do i = 1, Ne(ispin) ! electron 1 (so it can be associated to mu(r1)) ii = occ(i,ispin) do j = 1, Ne(jspin) ! electron 2 - jj = occ(j,jspin) - htwoe += mo_bi_ortho_tc_two_e(jj,ii,jj,ii) + jj = occ(j,jspin) + htwoe += mo_bi_ortho_tc_two_e(jj,ii,jj,ii) enddo - enddo + enddo - ! alpha/alpha two-body - do i = 1, Ne(ispin) + ! alpha/alpha two-body + do i = 1, Ne(ispin) ii = occ(i,ispin) do j = i+1, Ne(ispin) - jj = occ(j,ispin) - htwoe += mo_bi_ortho_tc_two_e(ii,jj,ii,jj) - mo_bi_ortho_tc_two_e(ii,jj,jj,ii) + jj = occ(j,ispin) + htwoe += mo_bi_ortho_tc_two_e(ii,jj,ii,jj) - mo_bi_ortho_tc_two_e(ii,jj,jj,ii) enddo - enddo + enddo - ! beta/beta two-body - do i = 1, Ne(jspin) + ! beta/beta two-body + do i = 1, Ne(jspin) ii = occ(i,jspin) do j = i+1, Ne(jspin) - jj = occ(j,jspin) - htwoe += mo_bi_ortho_tc_two_e(ii,jj,ii,jj) - mo_bi_ortho_tc_two_e(ii,jj,jj,ii) + jj = occ(j,jspin) + htwoe += mo_bi_ortho_tc_two_e(ii,jj,ii,jj) - mo_bi_ortho_tc_two_e(ii,jj,jj,ii) enddo - enddo + enddo + htot = hmono + htwoe end - +! --- subroutine double_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot) diff --git a/src/tc_bi_ortho/tc_hmat.irp.f b/src/tc_bi_ortho/tc_hmat.irp.f index ceabf853..5fb0a620 100644 --- a/src/tc_bi_ortho/tc_hmat.irp.f +++ b/src/tc_bi_ortho/tc_hmat.irp.f @@ -1,10 +1,14 @@ - BEGIN_PROVIDER [double precision, htilde_matrix_elmt_bi_ortho, (N_det,N_det)] +! --- + +BEGIN_PROVIDER [double precision, htilde_matrix_elmt_bi_ortho, (N_det,N_det)] BEGIN_DOC + ! ! htilde_matrix_elmt_bi_ortho(j,i) = ! ! WARNING !!!!!!!!! IT IS NOT HERMITIAN !!!!!!!!! + ! END_DOC implicit none @@ -17,28 +21,33 @@ j = 1 call htilde_mu_mat_opt_bi_ortho_tot(psi_det(1,1,j), psi_det(1,1,i), N_int, htot) - !$OMP PARALLEL DO SCHEDULE(GUIDED) DEFAULT(NONE) PRIVATE(i,j, htot) & - !$OMP SHARED (N_det, psi_det, N_int,htilde_matrix_elmt_bi_ortho) - do i = 1, N_det - do j = 1, N_det - ! < J |Htilde | I > - call htilde_mu_mat_opt_bi_ortho_tot(psi_det(1,1,j), psi_det(1,1,i), N_int, htot) + !$OMP PARALLEL DO SCHEDULE(GUIDED) DEFAULT(NONE) PRIVATE(i,j, htot) & + !$OMP SHARED (N_det, psi_det, N_int, htilde_matrix_elmt_bi_ortho) + do i = 1, N_det + do j = 1, N_det + ! < J |Htilde | I > + call htilde_mu_mat_opt_bi_ortho_tot(psi_det(1,1,j), psi_det(1,1,i), N_int, htot) - htilde_matrix_elmt_bi_ortho(j,i) = htot - enddo + htilde_matrix_elmt_bi_ortho(j,i) = htot enddo - !$OMP END PARALLEL DO + enddo + !$OMP END PARALLEL DO END_PROVIDER ! --- BEGIN_PROVIDER [double precision, htilde_matrix_elmt_bi_ortho_tranp, (N_det,N_det)] - implicit none - integer ::i,j + + implicit none + integer ::i,j + do i = 1, N_det do j = 1, N_det htilde_matrix_elmt_bi_ortho_tranp(j,i) = htilde_matrix_elmt_bi_ortho(i,j) enddo enddo END_PROVIDER + +! --- + diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index b55419a8..f515e31d 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -557,6 +557,8 @@ subroutine test_no_1() print*, ' accu (%) = ', 100.d0*accu/norm + PROVIDE energy_1e_noL_HF + return end @@ -572,6 +574,7 @@ subroutine test_no_2() PROVIDE noL_2e_naive PROVIDE noL_2e + PROVIDE energy_2e_noL_HF thr = 1d-8 From 06871d4041fbd7229186f4bc59b489fb5616296b Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Sat, 16 Sep 2023 14:31:52 +0200 Subject: [PATCH 294/337] DGEMM for noL_2e --- src/bi_ort_ints/no_dressing.irp.f | 624 +++++++++++++++++++ src/bi_ort_ints/no_dressing_v0.irp.f | 324 ---------- src/bi_ort_ints/three_body_ints_bi_ort.irp.f | 43 +- src/bi_ortho_mos/mos_rl.irp.f | 1 - src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 10 +- 5 files changed, 640 insertions(+), 362 deletions(-) create mode 100644 src/bi_ort_ints/no_dressing.irp.f delete mode 100644 src/bi_ort_ints/no_dressing_v0.irp.f diff --git a/src/bi_ort_ints/no_dressing.irp.f b/src/bi_ort_ints/no_dressing.irp.f new file mode 100644 index 00000000..cdebca07 --- /dev/null +++ b/src/bi_ort_ints/no_dressing.irp.f @@ -0,0 +1,624 @@ + +! --- + +BEGIN_PROVIDER [double precision, noL_0e] + + implicit none + integer :: i, j, k + double precision :: I_ijk_ijk, I_ijk_kij, I_ijk_jik, I_ijk_jki, I_ijk_ikj, I_ijk_kji + double precision :: t0, t1 + double precision, allocatable :: tmp(:) + + call wall_time(t0) + print*, " Providing noL_0e ..." + + if(elec_alpha_num .eq. elec_beta_num) then + + allocate(tmp(elec_beta_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, k, & + !$OMP I_ijk_ijk, I_ijk_kij, I_ijk_jik) & + !$OMP SHARED (elec_beta_num, tmp) + + !$OMP DO + do i = 1, elec_beta_num + + tmp(i) = 0.d0 + do j = 1, elec_beta_num + do k = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(i, j, k, i, j, k, I_ijk_ijk) + call give_integrals_3_body_bi_ort(i, j, k, k, i, j, I_ijk_kij) + call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) + + tmp(i) = tmp(i) + 4.d0 * (2.d0 * I_ijk_ijk + I_ijk_kij - 3.d0 * I_ijk_jik) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + noL_0e = -1.d0 * (sum(tmp)) / 6.d0 + + deallocate(tmp) + + else + + allocate(tmp(elec_alpha_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i, j, k, & + !$OMP I_ijk_ijk, I_ijk_kij, I_ijk_jik, & + !$OMP I_ijk_jki, I_ijk_ikj, I_ijk_kji) & + !$OMP SHARED (elec_beta_num, elec_alpha_num, tmp) + + !$OMP DO + do i = 1, elec_beta_num + + tmp(i) = 0.d0 + do j = 1, elec_beta_num + do k = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(i, j, k, i, j, k, I_ijk_ijk) + call give_integrals_3_body_bi_ort(i, j, k, k, i, j, I_ijk_kij) + call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) + + tmp(i) = tmp(i) + 4.d0 * (2.d0 * I_ijk_ijk + I_ijk_kij - 3.d0 * I_ijk_jik) + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do i = elec_beta_num+1, elec_alpha_num + + tmp(i) = 0.d0 + do j = elec_beta_num+1, elec_alpha_num + do k = elec_beta_num+1, elec_alpha_num + + call give_integrals_3_body_bi_ort(i, j, k, i, j, k, I_ijk_ijk) + call give_integrals_3_body_bi_ort(i, j, k, k, i, j, I_ijk_kij) + call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) + + tmp(i) = tmp(i) + I_ijk_ijk + 2.d0 * I_ijk_kij - 3.d0 * I_ijk_jik + enddo ! k + enddo ! j + + do j = 1, elec_beta_num + do k = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(i, j, k, i, j, k, I_ijk_ijk) + call give_integrals_3_body_bi_ort(i, j, k, j, k, i, I_ijk_jki) + call give_integrals_3_body_bi_ort(i, j, k, i, k, j, I_ijk_ikj) + call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) + call give_integrals_3_body_bi_ort(i, j, k, k, j, i, I_ijk_kji) + + tmp(i) = tmp(i) + 6.d0 * (2.d0 * I_ijk_ijk + I_ijk_jki - I_ijk_ikj - I_ijk_jik - I_ijk_kji) + enddo ! k + + do k = elec_beta_num+1, elec_alpha_num + + call give_integrals_3_body_bi_ort(i, j, k, i, j, k, I_ijk_ijk) + call give_integrals_3_body_bi_ort(i, j, k, j, k, i, I_ijk_jki) + call give_integrals_3_body_bi_ort(i, j, k, i, k, j, I_ijk_ikj) + call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) + call give_integrals_3_body_bi_ort(i, j, k, k, j, i, I_ijk_kji) + + tmp(i) = tmp(i) + 3.d0 * (2.d0 * I_ijk_ijk + 2.d0 * I_ijk_jki - I_ijk_ikj - I_ijk_jik - 2.d0 * I_ijk_kji) + enddo ! k + enddo ! j + enddo ! i + !$OMP END DO + !$OMP END PARALLEL + + noL_0e = -1.d0 * (sum(tmp)) / 6.d0 + + deallocate(tmp) + + endif + + call wall_time(t1) + print*, " Wall time for noL_0e (min) = ", (t1 - t0)/60.d0 + + print*, " noL_0e = ", noL_0e + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, noL_1e, (mo_num, mo_num)] + + implicit none + integer :: p, s, i, j + double precision :: I_pij_sij, I_pij_isj, I_pij_ijs, I_pij_sji, I_pij_jsi, I_pij_jis + double precision :: t0, t1 + + call wall_time(t0) + print*, " Providing noL_1e ..." + + if(elec_alpha_num .eq. elec_beta_num) then + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p, s, i, j, & + !$OMP I_pij_sij, I_pij_isj, I_pij_ijs, & + !$OMP I_pij_sji) & + !$OMP SHARED (mo_num, elec_beta_num, noL_1e) + + !$OMP DO COLLAPSE(2) + do s = 1, mo_num + do p = 1, mo_num + + noL_1e(p,s) = 0.d0 + do i = 1, elec_beta_num + do j = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(p, i, j, s, i, j, I_pij_sij) + call give_integrals_3_body_bi_ort(p, i, j, i, s, j, I_pij_isj) + call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) + call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) + + noL_1e(p,s) = noL_1e(p,s) + (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + else + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p, s, i, j, & + !$OMP I_pij_sij, I_pij_isj, I_pij_ijs, & + !$OMP I_pij_sji, I_pij_jsi, I_pij_jis) & + !$OMP SHARED (mo_num, elec_beta_num, elec_alpha_num, noL_1e) + + !$OMP DO COLLAPSE(2) + do s = 1, mo_num + do p = 1, mo_num + + noL_1e(p,s) = 0.d0 + do i = 1, elec_beta_num + do j = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(p, i, j, s, i, j, I_pij_sij) + call give_integrals_3_body_bi_ort(p, i, j, i, s, j, I_pij_isj) + call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) + call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) + + noL_1e(p,s) = noL_1e(p,s) + (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) + enddo ! j + enddo ! i + + do i = elec_beta_num+1, elec_alpha_num + do j = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) + call give_integrals_3_body_bi_ort(p, i, j, j, s, i, I_pij_jsi) + call give_integrals_3_body_bi_ort(p, i, j, j, i, s, I_pij_jis) + call give_integrals_3_body_bi_ort(p, i, j, s, i, j, I_pij_sij) + call give_integrals_3_body_bi_ort(p, i, j, i, s, j, I_pij_isj) + call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) + + noL_1e(p,s) = noL_1e(p,s) - 0.5d0 * (2.d0*I_pij_sji - I_pij_jsi + 2.d0*I_pij_jis - 4.d0*I_pij_sij + 2.d0*I_pij_isj - I_pij_ijs) + enddo ! j + + do j = elec_beta_num+1, elec_alpha_num + + call give_integrals_3_body_bi_ort(p, i, j, s, i, j, I_pij_sij) + call give_integrals_3_body_bi_ort(p, i, j, i, s, j, I_pij_isj) + call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) + call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) + + noL_1e(p,s) = noL_1e(p,s) + 0.5d0 * (I_pij_sij - I_pij_isj + I_pij_ijs - I_pij_sji) + enddo ! j + enddo ! i + + enddo ! p + enddo ! s + !$OMP END DO + !$OMP END PARALLEL + + endif + + call wall_time(t1) + print*, " Wall time for noL_1e (min) = ", (t1 - t0)/60.d0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, noL_2e_v0, (mo_num, mo_num, mo_num, mo_num)] + + implicit none + integer :: p, q, s, t, i + double precision :: I_ipq_sit, I_ipq_tsi, I_ipq_ist + double precision :: t0, t1 + + call wall_time(t0) + print*, " Providing noL_2e_v0 ..." + + if(elec_alpha_num .eq. elec_beta_num) then + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p, q, s, t, i, & + !$OMP I_ipq_sit, I_ipq_tsi, I_ipq_ist) & + !$OMP SHARED (mo_num, elec_beta_num, noL_2e_v0) + + !$OMP DO COLLAPSE(4) + do t = 1, mo_num + do s = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + noL_2e_v0(p,q,s,t) = 0.d0 + do i = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(i, p, q, s, i, t, I_ipq_sit) + call give_integrals_3_body_bi_ort(i, p, q, t, s, i, I_ipq_tsi) + call give_integrals_3_body_bi_ort(i, p, q, i, s, t, I_ipq_ist) + + noL_2e_v0(p,q,s,t) = noL_2e_v0(p,q,s,t) + 0.5d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + else + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (p, q, s, t, i, & + !$OMP I_ipq_sit, I_ipq_tsi, I_ipq_ist) & + !$OMP SHARED (mo_num, elec_beta_num, elec_alpha_num, noL_2e_v0) + + !$OMP DO COLLAPSE(4) + do t = 1, mo_num + do s = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + noL_2e_v0(p,q,s,t) = 0.d0 + do i = 1, elec_beta_num + + call give_integrals_3_body_bi_ort(i, p, q, s, i, t, I_ipq_sit) + call give_integrals_3_body_bi_ort(i, p, q, t, s, i, I_ipq_tsi) + call give_integrals_3_body_bi_ort(i, p, q, i, s, t, I_ipq_ist) + + noL_2e_v0(p,q,s,t) = noL_2e_v0(p,q,s,t) + 0.5d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) + enddo ! i + + do i = elec_beta_num+1, elec_alpha_num + + call give_integrals_3_body_bi_ort(i, p, q, s, i, t, I_ipq_sit) + call give_integrals_3_body_bi_ort(i, p, q, t, s, i, I_ipq_tsi) + call give_integrals_3_body_bi_ort(i, p, q, i, s, t, I_ipq_ist) + + noL_2e_v0(p,q,s,t) = noL_2e_v0(p,q,s,t) + 0.25d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) + enddo ! i + + enddo ! p + enddo ! q + enddo ! s + enddo ! t + !$OMP END DO + !$OMP END PARALLEL + + endif + + call wall_time(t1) + print*, " Wall time for noL_2e_v0 (min) = ", (t1 - t0)/60.d0 + +END_PROVIDER + +! --- + +BEGIN_PROVIDER [double precision, noL_2e, (mo_num, mo_num, mo_num, mo_num)] + + implicit none + integer :: p, q, s, t, i, ipoint + double precision :: t0, t1 + double precision, allocatable :: tmp_O(:), tmp_J(:,:) + double precision, allocatable :: tmp_A(:,:,:), tmp_B(:,:,:) + double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:,:,:) + double precision, allocatable :: tmp(:,:,:,:) + + PROVIDE int2_grad1_u12_bimo_t + PROVIDE mos_l_in_r_array_transp mos_r_in_r_array_transp + + call wall_time(t0) + print*, " Providing noL_2e ..." + + if(elec_alpha_num .eq. elec_beta_num) then + + allocate(tmp_O(n_points_final_grid), tmp_J(n_points_final_grid,3)) + allocate(tmp_A(n_points_final_grid,3,mo_num), tmp_B(n_points_final_grid,3,mo_num)) + allocate(tmp1(n_points_final_grid,4,mo_num,mo_num), tmp2(n_points_final_grid,4,mo_num,mo_num)) + allocate(tmp(mo_num,mo_num,mo_num,mo_num)) + + tmp_O = 0.d0 + tmp_J = 0.d0 + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_O(ipoint) = tmp_O(ipoint) + final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp_J(ipoint,1) = tmp_J(ipoint,1) + final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_J(ipoint,2) = tmp_J(ipoint,2) + final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_J(ipoint,3) = tmp_J(ipoint,3) + final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) + enddo + enddo + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, i, ipoint) & + !$OMP SHARED(mo_num, elec_beta_num, n_points_final_grid, & + !$OMP final_weight_at_r_vector, mos_l_in_r_array_transp, & + !$OMP mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmp_A, tmp_B) + + !$OMP DO + do p = 1, mo_num + + tmp_A(:,:,p) = 0.d0 + tmp_B(:,:,p) = 0.d0 + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_A(ipoint,1,p) = tmp_A(ipoint,1,p) + final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * int2_grad1_u12_bimo_t(ipoint,1,p,i) + tmp_A(ipoint,2,p) = tmp_A(ipoint,2,p) + final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * int2_grad1_u12_bimo_t(ipoint,2,p,i) + tmp_A(ipoint,3,p) = tmp_A(ipoint,3,p) + final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * int2_grad1_u12_bimo_t(ipoint,3,p,i) + tmp_B(ipoint,1,p) = tmp_B(ipoint,1,p) + final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,i) * int2_grad1_u12_bimo_t(ipoint,1,i,p) + tmp_B(ipoint,2,p) = tmp_B(ipoint,2,p) + final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,i) * int2_grad1_u12_bimo_t(ipoint,2,i,p) + tmp_B(ipoint,3,p) = tmp_B(ipoint,3,p) + final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,i) * int2_grad1_u12_bimo_t(ipoint,3,i,p) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, s, i, ipoint) & + !$OMP SHARED(mo_num, elec_beta_num, n_points_final_grid, & + !$OMP final_weight_at_r_vector, mos_l_in_r_array_transp, & + !$OMP mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmp_A, tmp_B, tmp_O, tmp_J, tmp1, tmp2) + + !$OMP DO COLLAPSE(2) + do s = 1, mo_num + do p = 1, mo_num + + do ipoint = 1, n_points_final_grid + + tmp1(ipoint,1,p,s) = mos_r_in_r_array_transp(ipoint,s) * tmp_A(ipoint,1,p) & + + mos_l_in_r_array_transp(ipoint,p) * tmp_B(ipoint,1,s) & + - tmp_O(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p,s) & + - 2.d0 * mos_l_in_r_array_transp(ipoint,p) * mos_r_in_r_array_transp(ipoint,s) * tmp_J(ipoint,1) + tmp1(ipoint,2,p,s) = mos_r_in_r_array_transp(ipoint,s) * tmp_A(ipoint,2,p) & + + mos_l_in_r_array_transp(ipoint,p) * tmp_B(ipoint,2,s) & + - tmp_O(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p,s) & + - 2.d0 * mos_l_in_r_array_transp(ipoint,p) * mos_r_in_r_array_transp(ipoint,s) * tmp_J(ipoint,2) + tmp1(ipoint,3,p,s) = mos_r_in_r_array_transp(ipoint,s) * tmp_A(ipoint,3,p) & + + mos_l_in_r_array_transp(ipoint,p) * tmp_B(ipoint,3,s) & + - tmp_O(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p,s) & + - 2.d0 * mos_l_in_r_array_transp(ipoint,p) * mos_r_in_r_array_transp(ipoint,s) * tmp_J(ipoint,3) + + tmp2(ipoint,1,p,s) = int2_grad1_u12_bimo_t(ipoint,1,p,s) + tmp2(ipoint,2,p,s) = int2_grad1_u12_bimo_t(ipoint,2,p,s) + tmp2(ipoint,3,p,s) = int2_grad1_u12_bimo_t(ipoint,3,p,s) + tmp2(ipoint,4,p,s) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p) * mos_r_in_r_array_transp(ipoint,s) + + enddo ! ipoint + + tmp1(:,4,p,s) = 0.d0 + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,4,p,s) = tmp1(ipoint,4,p,s) + int2_grad1_u12_bimo_t(ipoint,1,p,i) * int2_grad1_u12_bimo_t(ipoint,1,i,s) & + + int2_grad1_u12_bimo_t(ipoint,2,p,i) * int2_grad1_u12_bimo_t(ipoint,2,i,s) & + + int2_grad1_u12_bimo_t(ipoint,3,p,i) * int2_grad1_u12_bimo_t(ipoint,3,i,s) + enddo ! ipoint + enddo ! i + + enddo ! p + enddo ! s + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmp_O, tmp_J, tmp_A, tmp_B) + + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 4*n_points_final_grid, 0.5d0 & + , tmp1(1,1,1,1), 4*n_points_final_grid, tmp2(1,1,1,1), 4*n_points_final_grid & + , 1.d0, tmp(1,1,1,1), mo_num*mo_num) + + deallocate(tmp1, tmp2) + + call sum_a_at(tmp, mo_num*mo_num) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(t, s, q, p) & + !$OMP SHARED(mo_num, tmp, noL_2e) + + !$OMP DO COLLAPSE(3) + do t = 1, mo_num + do s = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + noL_2e(p,q,s,t) = tmp(p,s,q,t) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmp) + + else + + allocate(tmp_O(n_points_final_grid), tmp_J(n_points_final_grid,3)) + allocate(tmp_A(n_points_final_grid,3,mo_num), tmp_B(n_points_final_grid,3,mo_num)) + allocate(tmp1(n_points_final_grid,4,mo_num,mo_num), tmp2(n_points_final_grid,4,mo_num,mo_num)) + allocate(tmp(mo_num,mo_num,mo_num,mo_num)) + + tmp_O = 0.d0 + tmp_J = 0.d0 + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_O(ipoint) = tmp_O(ipoint) + final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp_J(ipoint,1) = tmp_J(ipoint,1) + final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_J(ipoint,2) = tmp_J(ipoint,2) + final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_J(ipoint,3) = tmp_J(ipoint,3) + final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) + enddo + enddo + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + tmp_O(ipoint) = tmp_O(ipoint) + 0.5d0 * final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp_J(ipoint,1) = tmp_J(ipoint,1) + 0.5d0 * final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_J(ipoint,2) = tmp_J(ipoint,2) + 0.5d0 * final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_J(ipoint,3) = tmp_J(ipoint,3) + 0.5d0 * final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,i) + enddo + enddo + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, i, ipoint) & + !$OMP SHARED(mo_num, elec_alpha_num, elec_beta_num, n_points_final_grid, & + !$OMP final_weight_at_r_vector, mos_l_in_r_array_transp, & + !$OMP mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmp_A, tmp_B) + + !$OMP DO + do p = 1, mo_num + + tmp_A(:,:,p) = 0.d0 + tmp_B(:,:,p) = 0.d0 + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_A(ipoint,1,p) = tmp_A(ipoint,1,p) + final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * int2_grad1_u12_bimo_t(ipoint,1,p,i) + tmp_A(ipoint,2,p) = tmp_A(ipoint,2,p) + final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * int2_grad1_u12_bimo_t(ipoint,2,p,i) + tmp_A(ipoint,3,p) = tmp_A(ipoint,3,p) + final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * int2_grad1_u12_bimo_t(ipoint,3,p,i) + tmp_B(ipoint,1,p) = tmp_B(ipoint,1,p) + final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,i) * int2_grad1_u12_bimo_t(ipoint,1,i,p) + tmp_B(ipoint,2,p) = tmp_B(ipoint,2,p) + final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,i) * int2_grad1_u12_bimo_t(ipoint,2,i,p) + tmp_B(ipoint,3,p) = tmp_B(ipoint,3,p) + final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,i) * int2_grad1_u12_bimo_t(ipoint,3,i,p) + enddo + enddo + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + tmp_A(ipoint,1,p) = tmp_A(ipoint,1,p) + 0.5d0 * final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * int2_grad1_u12_bimo_t(ipoint,1,p,i) + tmp_A(ipoint,2,p) = tmp_A(ipoint,2,p) + 0.5d0 * final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * int2_grad1_u12_bimo_t(ipoint,2,p,i) + tmp_A(ipoint,3,p) = tmp_A(ipoint,3,p) + 0.5d0 * final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * int2_grad1_u12_bimo_t(ipoint,3,p,i) + tmp_B(ipoint,1,p) = tmp_B(ipoint,1,p) + 0.5d0 * final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,i) * int2_grad1_u12_bimo_t(ipoint,1,i,p) + tmp_B(ipoint,2,p) = tmp_B(ipoint,2,p) + 0.5d0 * final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,i) * int2_grad1_u12_bimo_t(ipoint,2,i,p) + tmp_B(ipoint,3,p) = tmp_B(ipoint,3,p) + 0.5d0 * final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,i) * int2_grad1_u12_bimo_t(ipoint,3,i,p) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, s, i, ipoint) & + !$OMP SHARED(mo_num, elec_alpha_num, elec_beta_num, n_points_final_grid, & + !$OMP final_weight_at_r_vector, mos_l_in_r_array_transp, & + !$OMP mos_r_in_r_array_transp, int2_grad1_u12_bimo_t, & + !$OMP tmp_A, tmp_B, tmp_O, tmp_J, tmp1, tmp2) + + !$OMP DO COLLAPSE(2) + do s = 1, mo_num + do p = 1, mo_num + + do ipoint = 1, n_points_final_grid + + tmp1(ipoint,1,p,s) = mos_r_in_r_array_transp(ipoint,s) * tmp_A(ipoint,1,p) & + + mos_l_in_r_array_transp(ipoint,p) * tmp_B(ipoint,1,s) & + - tmp_O(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,p,s) & + - 2.d0 * mos_l_in_r_array_transp(ipoint,p) * mos_r_in_r_array_transp(ipoint,s) * tmp_J(ipoint,1) + tmp1(ipoint,2,p,s) = mos_r_in_r_array_transp(ipoint,s) * tmp_A(ipoint,2,p) & + + mos_l_in_r_array_transp(ipoint,p) * tmp_B(ipoint,2,s) & + - tmp_O(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,p,s) & + - 2.d0 * mos_l_in_r_array_transp(ipoint,p) * mos_r_in_r_array_transp(ipoint,s) * tmp_J(ipoint,2) + tmp1(ipoint,3,p,s) = mos_r_in_r_array_transp(ipoint,s) * tmp_A(ipoint,3,p) & + + mos_l_in_r_array_transp(ipoint,p) * tmp_B(ipoint,3,s) & + - tmp_O(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,p,s) & + - 2.d0 * mos_l_in_r_array_transp(ipoint,p) * mos_r_in_r_array_transp(ipoint,s) * tmp_J(ipoint,3) + + tmp2(ipoint,1,p,s) = int2_grad1_u12_bimo_t(ipoint,1,p,s) + tmp2(ipoint,2,p,s) = int2_grad1_u12_bimo_t(ipoint,2,p,s) + tmp2(ipoint,3,p,s) = int2_grad1_u12_bimo_t(ipoint,3,p,s) + tmp2(ipoint,4,p,s) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p) * mos_r_in_r_array_transp(ipoint,s) + + enddo ! ipoint + + tmp1(:,4,p,s) = 0.d0 + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,4,p,s) = tmp1(ipoint,4,p,s) + int2_grad1_u12_bimo_t(ipoint,1,p,i) * int2_grad1_u12_bimo_t(ipoint,1,i,s) & + + int2_grad1_u12_bimo_t(ipoint,2,p,i) * int2_grad1_u12_bimo_t(ipoint,2,i,s) & + + int2_grad1_u12_bimo_t(ipoint,3,p,i) * int2_grad1_u12_bimo_t(ipoint,3,i,s) + enddo ! ipoint + enddo ! i + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,4,p,s) = tmp1(ipoint,4,p,s) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,p,i) * int2_grad1_u12_bimo_t(ipoint,1,i,s) & + + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,p,i) * int2_grad1_u12_bimo_t(ipoint,2,i,s) & + + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,p,i) * int2_grad1_u12_bimo_t(ipoint,3,i,s) + enddo ! ipoint + enddo ! i + + enddo ! p + enddo ! s + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmp_O, tmp_J, tmp_A, tmp_B) + + + call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 4*n_points_final_grid, 0.5d0 & + , tmp1(1,1,1,1), 4*n_points_final_grid, tmp2(1,1,1,1), 4*n_points_final_grid & + , 1.d0, tmp(1,1,1,1), mo_num*mo_num) + + deallocate(tmp1, tmp2) + + call sum_a_at(tmp, mo_num*mo_num) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(t, s, q, p) & + !$OMP SHARED(mo_num, tmp, noL_2e) + + !$OMP DO COLLAPSE(3) + do t = 1, mo_num + do s = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + noL_2e(p,q,s,t) = tmp(p,s,q,t) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmp) + + endif + + call wall_time(t1) + print*, " Wall time for noL_2e (min) = ", (t1 - t0)/60.d0 + +END_PROVIDER + +! --- + diff --git a/src/bi_ort_ints/no_dressing_v0.irp.f b/src/bi_ort_ints/no_dressing_v0.irp.f deleted file mode 100644 index 3b252f8e..00000000 --- a/src/bi_ort_ints/no_dressing_v0.irp.f +++ /dev/null @@ -1,324 +0,0 @@ - -! --- - -BEGIN_PROVIDER [double precision, noL_0e] - - implicit none - integer :: i, j, k - double precision :: I_ijk_ijk, I_ijk_kij, I_ijk_jik, I_ijk_jki, I_ijk_ikj, I_ijk_kji - double precision :: t0, t1 - double precision, allocatable :: tmp(:) - - call wall_time(t0) - print*, " Providing noL_0e ..." - - if(elec_alpha_num .eq. elec_beta_num) then - - allocate(tmp(elec_beta_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, j, k, & - !$OMP I_ijk_ijk, I_ijk_kij, I_ijk_jik) & - !$OMP SHARED (elec_beta_num, tmp) - - !$OMP DO - do i = 1, elec_beta_num - - tmp(i) = 0.d0 - do j = 1, elec_beta_num - do k = 1, elec_beta_num - - call give_integrals_3_body_bi_ort(i, j, k, i, j, k, I_ijk_ijk) - call give_integrals_3_body_bi_ort(i, j, k, k, i, j, I_ijk_kij) - call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) - - tmp(i) = tmp(i) + 4.d0 * (2.d0 * I_ijk_ijk + I_ijk_kij - 3.d0 * I_ijk_jik) - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - noL_0e = -1.d0 * (sum(tmp)) / 6.d0 - - deallocate(tmp) - - else - - allocate(tmp(elec_alpha_num)) - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, j, k, & - !$OMP I_ijk_ijk, I_ijk_kij, I_ijk_jik, & - !$OMP I_ijk_jki, I_ijk_ikj, I_ijk_kji) & - !$OMP SHARED (elec_beta_num, elec_alpha_num, tmp) - - !$OMP DO - do i = 1, elec_beta_num - - tmp(i) = 0.d0 - do j = 1, elec_beta_num - do k = 1, elec_beta_num - - call give_integrals_3_body_bi_ort(i, j, k, i, j, k, I_ijk_ijk) - call give_integrals_3_body_bi_ort(i, j, k, k, i, j, I_ijk_kij) - call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) - - tmp(i) = tmp(i) + 4.d0 * (2.d0 * I_ijk_ijk + I_ijk_kij - 3.d0 * I_ijk_jik) - enddo - enddo - enddo - !$OMP END DO - - !$OMP DO - do i = elec_beta_num+1, elec_alpha_num - - tmp(i) = 0.d0 - do j = elec_beta_num+1, elec_alpha_num - do k = elec_beta_num+1, elec_alpha_num - - call give_integrals_3_body_bi_ort(i, j, k, i, j, k, I_ijk_ijk) - call give_integrals_3_body_bi_ort(i, j, k, k, i, j, I_ijk_kij) - call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) - - tmp(i) = tmp(i) + I_ijk_ijk + 2.d0 * I_ijk_kij - 3.d0 * I_ijk_jik - enddo ! k - enddo ! j - - do j = 1, elec_beta_num - do k = 1, elec_beta_num - - call give_integrals_3_body_bi_ort(i, j, k, i, j, k, I_ijk_ijk) - call give_integrals_3_body_bi_ort(i, j, k, j, k, i, I_ijk_jki) - call give_integrals_3_body_bi_ort(i, j, k, i, k, j, I_ijk_ikj) - call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) - call give_integrals_3_body_bi_ort(i, j, k, k, j, i, I_ijk_kji) - - tmp(i) = tmp(i) + 6.d0 * (2.d0 * I_ijk_ijk + I_ijk_jki - I_ijk_ikj - I_ijk_jik - I_ijk_kji) - enddo ! k - - do k = elec_beta_num+1, elec_alpha_num - - call give_integrals_3_body_bi_ort(i, j, k, i, j, k, I_ijk_ijk) - call give_integrals_3_body_bi_ort(i, j, k, j, k, i, I_ijk_jki) - call give_integrals_3_body_bi_ort(i, j, k, i, k, j, I_ijk_ikj) - call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) - call give_integrals_3_body_bi_ort(i, j, k, k, j, i, I_ijk_kji) - - tmp(i) = tmp(i) + 3.d0 * (2.d0 * I_ijk_ijk + 2.d0 * I_ijk_jki - I_ijk_ikj - I_ijk_jik - 2.d0 * I_ijk_kji) - enddo ! k - enddo ! j - enddo ! i - !$OMP END DO - !$OMP END PARALLEL - - noL_0e = -1.d0 * (sum(tmp)) / 6.d0 - - deallocate(tmp) - - endif - - call wall_time(t1) - print*, " Wall time for noL_0e (min) = ", (t1 - t0)/60.d0 - - print*, " noL_0e = ", noL_0e - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [double precision, noL_1e, (mo_num, mo_num)] - - implicit none - integer :: p, s, i, j - double precision :: I_pij_sij, I_pij_isj, I_pij_ijs, I_pij_sji, I_pij_jsi, I_pij_jis - double precision :: t0, t1 - - call wall_time(t0) - print*, " Providing noL_1e ..." - - if(elec_alpha_num .eq. elec_beta_num) then - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p, s, i, j, & - !$OMP I_pij_sij, I_pij_isj, I_pij_ijs, & - !$OMP I_pij_sji) & - !$OMP SHARED (mo_num, elec_beta_num, noL_1e) - - !$OMP DO COLLAPSE(2) - do s = 1, mo_num - do p = 1, mo_num - - noL_1e(p,s) = 0.d0 - do i = 1, elec_beta_num - do j = 1, elec_beta_num - - call give_integrals_3_body_bi_ort(p, i, j, s, i, j, I_pij_sij) - call give_integrals_3_body_bi_ort(p, i, j, i, s, j, I_pij_isj) - call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) - call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) - - noL_1e(p,s) = noL_1e(p,s) + (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - else - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p, s, i, j, & - !$OMP I_pij_sij, I_pij_isj, I_pij_ijs, & - !$OMP I_pij_sji, I_pij_jsi, I_pij_jis) & - !$OMP SHARED (mo_num, elec_beta_num, elec_alpha_num, noL_1e) - - !$OMP DO COLLAPSE(2) - do s = 1, mo_num - do p = 1, mo_num - - noL_1e(p,s) = 0.d0 - do i = 1, elec_beta_num - do j = 1, elec_beta_num - - call give_integrals_3_body_bi_ort(p, i, j, s, i, j, I_pij_sij) - call give_integrals_3_body_bi_ort(p, i, j, i, s, j, I_pij_isj) - call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) - call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) - - noL_1e(p,s) = noL_1e(p,s) + (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) - enddo ! j - enddo ! i - - do i = elec_beta_num+1, elec_alpha_num - do j = 1, elec_beta_num - - call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) - call give_integrals_3_body_bi_ort(p, i, j, j, s, i, I_pij_jsi) - call give_integrals_3_body_bi_ort(p, i, j, j, i, s, I_pij_jis) - call give_integrals_3_body_bi_ort(p, i, j, s, i, j, I_pij_sij) - call give_integrals_3_body_bi_ort(p, i, j, i, s, j, I_pij_isj) - call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) - - noL_1e(p,s) = noL_1e(p,s) - 0.5d0 * (2.d0*I_pij_sji - I_pij_jsi + 2.d0*I_pij_jis - 4.d0*I_pij_sij + 2.d0*I_pij_isj - I_pij_ijs) - enddo ! j - - do j = elec_beta_num+1, elec_alpha_num - - call give_integrals_3_body_bi_ort(p, i, j, s, i, j, I_pij_sij) - call give_integrals_3_body_bi_ort(p, i, j, i, s, j, I_pij_isj) - call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) - call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) - - noL_1e(p,s) = noL_1e(p,s) + 0.5d0 * (I_pij_sij - I_pij_isj + I_pij_ijs - I_pij_sji) - enddo ! j - enddo ! i - - enddo ! p - enddo ! s - !$OMP END DO - !$OMP END PARALLEL - - endif - - call wall_time(t1) - print*, " Wall time for noL_1e (min) = ", (t1 - t0)/60.d0 - -END_PROVIDER - -! --- - -BEGIN_PROVIDER [double precision, noL_2e, (mo_num, mo_num, mo_num, mo_num)] - - implicit none - integer :: p, q, s, t, i - double precision :: I_ipq_sit, I_ipq_tsi, I_ipq_ist - double precision :: t0, t1 - - call wall_time(t0) - print*, " Providing noL_2e ..." - - if(elec_alpha_num .eq. elec_beta_num) then - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p, q, s, t, i, & - !$OMP I_ipq_sit, I_ipq_tsi, I_ipq_ist) & - !$OMP SHARED (mo_num, elec_beta_num, noL_2e) - - !$OMP DO COLLAPSE(4) - do t = 1, mo_num - do s = 1, mo_num - do q = 1, mo_num - do p = 1, mo_num - - noL_2e(p,q,s,t) = 0.d0 - do i = 1, elec_beta_num - - call give_integrals_3_body_bi_ort(i, p, q, s, i, t, I_ipq_sit) - call give_integrals_3_body_bi_ort(i, p, q, t, s, i, I_ipq_tsi) - call give_integrals_3_body_bi_ort(i, p, q, i, s, t, I_ipq_ist) - - noL_2e(p,q,s,t) = noL_2e(p,q,s,t) + 0.5d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) - enddo - enddo - enddo - enddo - enddo - !$OMP END DO - !$OMP END PARALLEL - - else - - !$OMP PARALLEL & - !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (p, q, s, t, i, & - !$OMP I_ipq_sit, I_ipq_tsi, I_ipq_ist) & - !$OMP SHARED (mo_num, elec_beta_num, elec_alpha_num, noL_2e) - - !$OMP DO COLLAPSE(4) - do t = 1, mo_num - do s = 1, mo_num - do q = 1, mo_num - do p = 1, mo_num - - noL_2e(p,q,s,t) = 0.d0 - do i = 1, elec_beta_num - - call give_integrals_3_body_bi_ort(i, p, q, s, i, t, I_ipq_sit) - call give_integrals_3_body_bi_ort(i, p, q, t, s, i, I_ipq_tsi) - call give_integrals_3_body_bi_ort(i, p, q, i, s, t, I_ipq_ist) - - noL_2e(p,q,s,t) = noL_2e(p,q,s,t) + 0.5d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) - enddo ! i - - do i = elec_beta_num+1, elec_alpha_num - - call give_integrals_3_body_bi_ort(i, p, q, s, i, t, I_ipq_sit) - call give_integrals_3_body_bi_ort(i, p, q, t, s, i, I_ipq_tsi) - call give_integrals_3_body_bi_ort(i, p, q, i, s, t, I_ipq_ist) - - noL_2e(p,q,s,t) = noL_2e(p,q,s,t) + 0.25d0 * (I_ipq_sit + I_ipq_tsi - 2.d0*I_ipq_ist) - enddo ! i - - enddo ! p - enddo ! q - enddo ! s - enddo ! t - !$OMP END DO - !$OMP END PARALLEL - - endif - - call wall_time(t1) - print*, " Wall time for noL_2e (min) = ", (t1 - t0)/60.d0 - -END_PROVIDER - -! --- - diff --git a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f index 56d8146f..c30b9f25 100644 --- a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f +++ b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f @@ -74,7 +74,9 @@ subroutine give_integrals_3_body_bi_ort_spin( n, sigma_n, l, sigma_l, k, sigma_k BEGIN_DOC ! - ! < n l k | -L | m j i > with a BI-ORTHONORMAL SPIN-ORBITALS + ! < n l k | L | m j i > with a BI-ORTHONORMAL SPIN-ORBITALS + ! + ! /!\ L is defined without the 1/6 factor ! END_DOC @@ -128,6 +130,8 @@ subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral) ! ! < n l k | L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS ! + ! /!\ L is defined without the 1/6 factor + ! END_DOC implicit none @@ -169,7 +173,9 @@ subroutine give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, integral) BEGIN_DOC ! - ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS + ! < n l k | L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS + ! + ! /!\ L is defined without the 1/6 factor ! END_DOC @@ -182,35 +188,6 @@ subroutine give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, integral) integral = 0.d0 do ipoint = 1, n_points_final_grid weight = final_weight_at_r_vector(ipoint) -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -! integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & -! * ( x_W_ki_bi_ortho_erf_rk(ipoint,1,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,1,l,j) & -! + x_W_ki_bi_ortho_erf_rk(ipoint,2,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,2,l,j) & -! + x_W_ki_bi_ortho_erf_rk(ipoint,3,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,3,l,j) ) -! integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & -! * ( x_W_ki_bi_ortho_erf_rk(ipoint,1,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,1,k,i) & -! + x_W_ki_bi_ortho_erf_rk(ipoint,2,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,2,k,i) & -! + x_W_ki_bi_ortho_erf_rk(ipoint,3,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,3,k,i) ) -! integral += weight * mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,m) & -! * ( x_W_ki_bi_ortho_erf_rk(ipoint,1,l,j) * x_W_ki_bi_ortho_erf_rk(ipoint,1,k,i) & -! + x_W_ki_bi_ortho_erf_rk(ipoint,2,l,j) * x_W_ki_bi_ortho_erf_rk(ipoint,2,k,i) & -! + x_W_ki_bi_ortho_erf_rk(ipoint,3,l,j) * x_W_ki_bi_ortho_erf_rk(ipoint,3,k,i) ) - -! integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & -! * ( int2_grad1_u12_bimo(1,n,m,ipoint) * int2_grad1_u12_bimo(1,l,j,ipoint) & -! + int2_grad1_u12_bimo(2,n,m,ipoint) * int2_grad1_u12_bimo(2,l,j,ipoint) & -! + int2_grad1_u12_bimo(3,n,m,ipoint) * int2_grad1_u12_bimo(3,l,j,ipoint) ) -! integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & -! * ( int2_grad1_u12_bimo(1,n,m,ipoint) * int2_grad1_u12_bimo(1,k,i,ipoint) & -! + int2_grad1_u12_bimo(2,n,m,ipoint) * int2_grad1_u12_bimo(2,k,i,ipoint) & -! + int2_grad1_u12_bimo(3,n,m,ipoint) * int2_grad1_u12_bimo(3,k,i,ipoint) ) -! integral += weight * mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,m) & -! * ( int2_grad1_u12_bimo(1,l,j,ipoint) * int2_grad1_u12_bimo(1,k,i,ipoint) & -! + int2_grad1_u12_bimo(2,l,j,ipoint) * int2_grad1_u12_bimo(2,k,i,ipoint) & -! + int2_grad1_u12_bimo(3,l,j,ipoint) * int2_grad1_u12_bimo(3,k,i,ipoint) ) - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & * ( int2_grad1_u12_bimo_transp(n,m,1,ipoint) * int2_grad1_u12_bimo_transp(l,j,1,ipoint) & + int2_grad1_u12_bimo_transp(n,m,2,ipoint) * int2_grad1_u12_bimo_transp(l,j,2,ipoint) & @@ -234,7 +211,9 @@ subroutine give_integrals_3_body_bi_ort_ao(n, l, k, m, j, i, integral) BEGIN_DOC ! - ! < n l k | -L | m j i > with a BI-ORTHONORMAL ATOMIC ORBITALS + ! < n l k | L | m j i > with a BI-ORTHONORMAL ATOMIC ORBITALS + ! + ! /!\ L is defined without the 1/6 factor ! END_DOC diff --git a/src/bi_ortho_mos/mos_rl.irp.f b/src/bi_ortho_mos/mos_rl.irp.f index 13eedfb7..73913426 100644 --- a/src/bi_ortho_mos/mos_rl.irp.f +++ b/src/bi_ortho_mos/mos_rl.irp.f @@ -32,7 +32,6 @@ subroutine ao_to_mo_bi_ortho(A_ao, LDA_ao, A_mo, LDA_mo) , mo_l_coef, size(mo_l_coef, 1), T, size(T, 1) & , 0.d0, A_mo, LDA_mo ) -! call restore_symmetry(mo_num,mo_num,A_mo,size(A_mo,1),1.d-12) deallocate(T) end subroutine ao_to_mo_bi_ortho diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index f515e31d..8ad7ed7f 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -37,8 +37,8 @@ program tc_bi_ortho !call test_no() !call test_no_v0() - call test_no_0() - call test_no_1() + !call test_no_0() + !call test_no_1() call test_no_2() end @@ -544,7 +544,7 @@ subroutine test_no_1() ref = noL_1e_naive(j,i) contrib = dabs(new - ref) if(contrib .gt. thr) then - print*, ' problem on no_aaa_contraction' + print*, ' problem on noL_1e' print*, j, i print*, ref, new, contrib stop @@ -574,7 +574,7 @@ subroutine test_no_2() PROVIDE noL_2e_naive PROVIDE noL_2e - PROVIDE energy_2e_noL_HF + !PROVIDE energy_2e_noL_HF thr = 1d-8 @@ -589,7 +589,7 @@ subroutine test_no_2() ref = noL_2e_naive(l,k,j,i) contrib = dabs(new - ref) if(contrib .gt. thr) then - print*, ' problem on no_aaa_contraction' + print*, ' problem on noL_2e' print*, l, k, j, i print*, ref, new, contrib stop From dbaee4c85988c4e17f54a7e31897c57e67422323 Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Sun, 17 Sep 2023 16:41:20 +0200 Subject: [PATCH 295/337] DGEMM for noL_1e --- src/bi_ort_ints/no_dressing.irp.f | 613 ++++++++++++++++++++++++- src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 2 +- src/tc_keywords/EZFIO.cfg | 2 +- 3 files changed, 604 insertions(+), 13 deletions(-) diff --git a/src/bi_ort_ints/no_dressing.irp.f b/src/bi_ort_ints/no_dressing.irp.f index cdebca07..046e0906 100644 --- a/src/bi_ort_ints/no_dressing.irp.f +++ b/src/bi_ort_ints/no_dressing.irp.f @@ -129,7 +129,7 @@ END_PROVIDER ! --- -BEGIN_PROVIDER [double precision, noL_1e, (mo_num, mo_num)] +BEGIN_PROVIDER [double precision, noL_1e_v0, (mo_num, mo_num)] implicit none integer :: p, s, i, j @@ -137,7 +137,7 @@ BEGIN_PROVIDER [double precision, noL_1e, (mo_num, mo_num)] double precision :: t0, t1 call wall_time(t0) - print*, " Providing noL_1e ..." + print*, " Providing noL_1e_v0 ..." if(elec_alpha_num .eq. elec_beta_num) then @@ -146,13 +146,13 @@ BEGIN_PROVIDER [double precision, noL_1e, (mo_num, mo_num)] !$OMP PRIVATE (p, s, i, j, & !$OMP I_pij_sij, I_pij_isj, I_pij_ijs, & !$OMP I_pij_sji) & - !$OMP SHARED (mo_num, elec_beta_num, noL_1e) + !$OMP SHARED (mo_num, elec_beta_num, noL_1e_v0) !$OMP DO COLLAPSE(2) do s = 1, mo_num do p = 1, mo_num - noL_1e(p,s) = 0.d0 + noL_1e_v0(p,s) = 0.d0 do i = 1, elec_beta_num do j = 1, elec_beta_num @@ -161,7 +161,7 @@ BEGIN_PROVIDER [double precision, noL_1e, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) - noL_1e(p,s) = noL_1e(p,s) + (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) + noL_1e_v0(p,s) = noL_1e_v0(p,s) + (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) enddo enddo enddo @@ -176,13 +176,13 @@ BEGIN_PROVIDER [double precision, noL_1e, (mo_num, mo_num)] !$OMP PRIVATE (p, s, i, j, & !$OMP I_pij_sij, I_pij_isj, I_pij_ijs, & !$OMP I_pij_sji, I_pij_jsi, I_pij_jis) & - !$OMP SHARED (mo_num, elec_beta_num, elec_alpha_num, noL_1e) + !$OMP SHARED (mo_num, elec_beta_num, elec_alpha_num, noL_1e_v0) !$OMP DO COLLAPSE(2) do s = 1, mo_num do p = 1, mo_num - noL_1e(p,s) = 0.d0 + noL_1e_v0(p,s) = 0.d0 do i = 1, elec_beta_num do j = 1, elec_beta_num @@ -191,7 +191,7 @@ BEGIN_PROVIDER [double precision, noL_1e, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) - noL_1e(p,s) = noL_1e(p,s) + (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) + noL_1e_v0(p,s) = noL_1e_v0(p,s) + (2.d0*I_pij_sij - 2.d0*I_pij_isj + I_pij_ijs - I_pij_sji) enddo ! j enddo ! i @@ -205,7 +205,7 @@ BEGIN_PROVIDER [double precision, noL_1e, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(p, i, j, i, s, j, I_pij_isj) call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) - noL_1e(p,s) = noL_1e(p,s) - 0.5d0 * (2.d0*I_pij_sji - I_pij_jsi + 2.d0*I_pij_jis - 4.d0*I_pij_sij + 2.d0*I_pij_isj - I_pij_ijs) + noL_1e_v0(p,s) = noL_1e_v0(p,s) - 0.5d0 * (2.d0*I_pij_sji - I_pij_jsi + 2.d0*I_pij_jis - 4.d0*I_pij_sij + 2.d0*I_pij_isj - I_pij_ijs) enddo ! j do j = elec_beta_num+1, elec_alpha_num @@ -215,7 +215,7 @@ BEGIN_PROVIDER [double precision, noL_1e, (mo_num, mo_num)] call give_integrals_3_body_bi_ort(p, i, j, i, j, s, I_pij_ijs) call give_integrals_3_body_bi_ort(p, i, j, s, j, i, I_pij_sji) - noL_1e(p,s) = noL_1e(p,s) + 0.5d0 * (I_pij_sij - I_pij_isj + I_pij_ijs - I_pij_sji) + noL_1e_v0(p,s) = noL_1e_v0(p,s) + 0.5d0 * (I_pij_sij - I_pij_isj + I_pij_ijs - I_pij_sji) enddo ! j enddo ! i @@ -227,7 +227,7 @@ BEGIN_PROVIDER [double precision, noL_1e, (mo_num, mo_num)] endif call wall_time(t1) - print*, " Wall time for noL_1e (min) = ", (t1 - t0)/60.d0 + print*, " Wall time for noL_1e_v0 (min) = ", (t1 - t0)/60.d0 END_PROVIDER @@ -322,6 +322,597 @@ END_PROVIDER ! --- +BEGIN_PROVIDER [double precision, noL_1e, (mo_num, mo_num)] + + implicit none + integer :: p, s, i, j, ipoint + double precision :: t0, t1 + double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:), tmp3(:,:,:), tmp4(:,:,:) + double precision, allocatable :: tmp_L(:,:,:), tmp_R(:,:,:), tmp_M(:,:), tmp_S(:), tmp_O(:), tmp_J(:,:) + double precision, allocatable :: tmp_L0(:,:,:), tmp_R0(:,:,:) + double precision, allocatable :: tmp_M_priv(:,:), tmp_S_priv(:), tmp_O_priv(:), tmp_J_priv(:,:) + + + PROVIDE int2_grad1_u12_bimo_t + PROVIDE mos_l_in_r_array_transp mos_r_in_r_array_transp + + call wall_time(t0) + print*, " Providing noL_1e ..." + + if(elec_alpha_num .eq. elec_beta_num) then + + allocate(tmp_O(n_points_final_grid), tmp_J(n_points_final_grid,3)) + tmp_O = 0.d0 + tmp_J = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, ipoint, tmp_O_priv, tmp_J_priv) & + !$OMP SHARED(elec_beta_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_O, tmp_J) + + allocate(tmp_O_priv(n_points_final_grid), tmp_J_priv(n_points_final_grid,3)) + tmp_O_priv = 0.d0 + tmp_J_priv = 0.d0 + + !$OMP DO + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_O_priv(ipoint) = tmp_O_priv(ipoint) + mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp_J_priv(ipoint,1) = tmp_J_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_J_priv(ipoint,2) = tmp_J_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_J_priv(ipoint,3) = tmp_J_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,i) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tmp_O = tmp_O + tmp_O_priv + tmp_J = tmp_J + tmp_J_priv + !$OMP END CRITICAL + + deallocate(tmp_O_priv, tmp_J_priv) + !$OMP END PARALLEL + + ! --- + + allocate(tmp_M(n_points_final_grid,3), tmp_S(n_points_final_grid)) + tmp_M = 0.d0 + tmp_S = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, j, ipoint, tmp_M_priv, tmp_S_priv) & + !$OMP SHARED(elec_beta_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_M, tmp_S) + + allocate(tmp_M_priv(n_points_final_grid,3), tmp_S_priv(n_points_final_grid)) + tmp_M_priv = 0.d0 + tmp_S_priv = 0.d0 + + !$OMP DO COLLAPSE(2) + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + + tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tmp_M = tmp_M + tmp_M_priv + tmp_S = tmp_S + tmp_S_priv + !$OMP END CRITICAL + + deallocate(tmp_M_priv, tmp_S_priv) + !$OMP END PARALLEL + + ! --- + + allocate(tmp2(n_points_final_grid,4)) + allocate(tmp1(n_points_final_grid,4,mo_num,mo_num)) + + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,1) = final_weight_at_r_vector(ipoint) * (2.d0 * tmp_O(ipoint) * tmp_J(ipoint,1) - tmp_M(ipoint,1)) + tmp2(ipoint,2) = final_weight_at_r_vector(ipoint) * (2.d0 * tmp_O(ipoint) * tmp_J(ipoint,2) - tmp_M(ipoint,2)) + tmp2(ipoint,3) = final_weight_at_r_vector(ipoint) * (2.d0 * tmp_O(ipoint) * tmp_J(ipoint,3) - tmp_M(ipoint,3)) + tmp2(ipoint,4) = -final_weight_at_r_vector(ipoint) * tmp_O(ipoint) + + tmp_S(ipoint) = 2.d0 * (tmp_J(ipoint,1) * tmp_J(ipoint,1) + tmp_J(ipoint,2) * tmp_J(ipoint,2) + tmp_J(ipoint,3) * tmp_J(ipoint,3)) - tmp_S(ipoint) + enddo + + deallocate(tmp_O, tmp_M) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, s, i, ipoint) & + !$OMP SHARED(mo_num, elec_beta_num, n_points_final_grid, & + !$OMP int2_grad1_u12_bimo_t, tmp1) + + !$OMP DO COLLAPSE(2) + do s = 1, mo_num + do p = 1, mo_num + + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p,s) = int2_grad1_u12_bimo_t(ipoint,1,p,s) + tmp1(ipoint,2,p,s) = int2_grad1_u12_bimo_t(ipoint,2,p,s) + tmp1(ipoint,3,p,s) = int2_grad1_u12_bimo_t(ipoint,3,p,s) + enddo + + tmp1(:,4,p,s) = 0.d0 + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,4,p,s) = tmp1(ipoint,4,p,s) + int2_grad1_u12_bimo_t(ipoint,1,p,i) * int2_grad1_u12_bimo_t(ipoint,1,i,s) & + + int2_grad1_u12_bimo_t(ipoint,2,p,i) * int2_grad1_u12_bimo_t(ipoint,2,i,s) & + + int2_grad1_u12_bimo_t(ipoint,3,p,i) * int2_grad1_u12_bimo_t(ipoint,3,i,s) + enddo + enddo + + enddo ! p + enddo ! s + !$OMP END DO + !$OMP END PARALLEL + + call dgemv( 'T', 4*n_points_final_grid, mo_num*mo_num, 2.d0 & + , tmp1(1,1,1,1), size(tmp1, 1) * size(tmp1, 2) & + , tmp2(1,1), 1 & + , 0.d0, noL_1e(1,1), 1) + + deallocate(tmp1, tmp2) + + ! --- + + allocate(tmp_L(n_points_final_grid,3,mo_num)) + allocate(tmp_R(n_points_final_grid,3,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, i, ipoint) & + !$OMP SHARED(elec_beta_num, n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_L, tmp_R) + + !$OMP DO + do p = 1, mo_num + + tmp_L(:,1:3,p) = 0.d0 + tmp_R(:,1:3,p) = 0.d0 + + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + tmp_L(ipoint,1,p) = tmp_L(ipoint,1,p) + int2_grad1_u12_bimo_t(ipoint,1,p,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L(ipoint,2,p) = tmp_L(ipoint,2,p) + int2_grad1_u12_bimo_t(ipoint,2,p,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L(ipoint,3,p) = tmp_L(ipoint,3,p) + int2_grad1_u12_bimo_t(ipoint,3,p,i) * mos_l_in_r_array_transp(ipoint,i) + + tmp_R(ipoint,1,p) = tmp_R(ipoint,1,p) + int2_grad1_u12_bimo_t(ipoint,1,i,p) * mos_r_in_r_array_transp(ipoint,i) + tmp_R(ipoint,2,p) = tmp_R(ipoint,2,p) + int2_grad1_u12_bimo_t(ipoint,2,i,p) * mos_r_in_r_array_transp(ipoint,i) + tmp_R(ipoint,3,p) = tmp_R(ipoint,3,p) + int2_grad1_u12_bimo_t(ipoint,3,i,p) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + enddo ! p + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + allocate(tmp3(n_points_final_grid,5,mo_num)) + allocate(tmp4(n_points_final_grid,5,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, i, j, ipoint) & + !$OMP SHARED(elec_beta_num, n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp_L, tmp_R, tmp_J, tmp_S, tmp3, tmp4) + + !$OMP DO + do p = 1, mo_num + + do ipoint = 1, n_points_final_grid + + tmp3(ipoint,1,p) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p) + tmp3(ipoint,2,p) = -2.d0 * (tmp_L(ipoint,1,p) * tmp_J(ipoint,1) + tmp_L(ipoint,2,p) * tmp_J(ipoint,2) + tmp_L(ipoint,3,p) * tmp_J(ipoint,3)) + tmp3(ipoint,3,p) = final_weight_at_r_vector(ipoint) * tmp_L(ipoint,1,p) + tmp3(ipoint,4,p) = final_weight_at_r_vector(ipoint) * tmp_L(ipoint,2,p) + tmp3(ipoint,5,p) = final_weight_at_r_vector(ipoint) * tmp_L(ipoint,3,p) + + tmp4(ipoint,1,p) = -2.d0 * (tmp_R(ipoint,1,p) * tmp_J(ipoint,1) + tmp_R(ipoint,2,p) * tmp_J(ipoint,2) + tmp_R(ipoint,3,p) * tmp_J(ipoint,3)) & + + mos_r_in_r_array_transp(ipoint,p) * tmp_S(ipoint) + tmp4(ipoint,2,p) = final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,p) + tmp4(ipoint,3,p) = tmp_R(ipoint,1,p) + tmp4(ipoint,4,p) = tmp_R(ipoint,2,p) + tmp4(ipoint,5,p) = tmp_R(ipoint,3,p) + enddo + + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + tmp3(ipoint,2,p) = tmp3(ipoint,2,p) + mos_l_in_r_array_transp(ipoint,j) * ( int2_grad1_u12_bimo_t(ipoint,1,p,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & + + int2_grad1_u12_bimo_t(ipoint,2,p,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & + + int2_grad1_u12_bimo_t(ipoint,3,p,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) + + tmp4(ipoint,1,p) = tmp4(ipoint,1,p) + mos_r_in_r_array_transp(ipoint,i) * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,p) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,p) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,p) ) + enddo ! ipoint + enddo ! j + enddo ! i + + enddo ! p + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmp_L, tmp_R, tmp_J, tmp_S) + + call dgemm( 'T', 'N', mo_num, mo_num, 5*n_points_final_grid, 1.d0 & + , tmp3(1,1,1), 5*n_points_final_grid, tmp4(1,1,1), 5*n_points_final_grid & + , 1.d0, noL_1e(1,1), mo_num) + + deallocate(tmp3, tmp4) + + ! --- + + else + + allocate(tmp_O(n_points_final_grid), tmp_J(n_points_final_grid,3)) + tmp_O = 0.d0 + tmp_J = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, ipoint, tmp_O_priv, tmp_J_priv) & + !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_O, tmp_J) + + allocate(tmp_O_priv(n_points_final_grid), tmp_J_priv(n_points_final_grid,3)) + tmp_O_priv = 0.d0 + tmp_J_priv = 0.d0 + + !$OMP DO + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_O_priv(ipoint) = tmp_O_priv(ipoint) + mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp_J_priv(ipoint,1) = tmp_J_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_J_priv(ipoint,2) = tmp_J_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_J_priv(ipoint,3) = tmp_J_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,i) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP DO + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + tmp_O_priv(ipoint) = tmp_O_priv(ipoint) + 0.5d0 * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp_J_priv(ipoint,1) = tmp_J_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_J_priv(ipoint,2) = tmp_J_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_J_priv(ipoint,3) = tmp_J_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,i) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tmp_O = tmp_O + tmp_O_priv + tmp_J = tmp_J + tmp_J_priv + !$OMP END CRITICAL + + deallocate(tmp_O_priv, tmp_J_priv) + !$OMP END PARALLEL + + ! --- + + allocate(tmp_M(n_points_final_grid,3), tmp_S(n_points_final_grid)) + tmp_M = 0.d0 + tmp_S = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, j, ipoint, tmp_M_priv, tmp_S_priv) & + !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_M, tmp_S) + + allocate(tmp_M_priv(n_points_final_grid,3), tmp_S_priv(n_points_final_grid)) + tmp_M_priv = 0.d0 + tmp_S_priv = 0.d0 + + !$OMP DO COLLAPSE(2) + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + + tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP DO COLLAPSE(2) + do i = elec_beta_num+1, elec_alpha_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + + tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + + tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP DO COLLAPSE(2) + do i = elec_beta_num+1, elec_alpha_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + + tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tmp_M = tmp_M + tmp_M_priv + tmp_S = tmp_S + tmp_S_priv + !$OMP END CRITICAL + + deallocate(tmp_M_priv, tmp_S_priv) + !$OMP END PARALLEL + + ! --- + + allocate(tmp2(n_points_final_grid,4)) + allocate(tmp1(n_points_final_grid,4,mo_num,mo_num)) + + do ipoint = 1, n_points_final_grid + + tmp2(ipoint,1) = final_weight_at_r_vector(ipoint) * (2.d0 * tmp_O(ipoint) * tmp_J(ipoint,1) - tmp_M(ipoint,1)) + tmp2(ipoint,2) = final_weight_at_r_vector(ipoint) * (2.d0 * tmp_O(ipoint) * tmp_J(ipoint,2) - tmp_M(ipoint,2)) + tmp2(ipoint,3) = final_weight_at_r_vector(ipoint) * (2.d0 * tmp_O(ipoint) * tmp_J(ipoint,3) - tmp_M(ipoint,3)) + tmp2(ipoint,4) = -final_weight_at_r_vector(ipoint) * tmp_O(ipoint) + + tmp_S(ipoint) = 2.d0 * (tmp_J(ipoint,1) * tmp_J(ipoint,1) + tmp_J(ipoint,2) * tmp_J(ipoint,2) + tmp_J(ipoint,3) * tmp_J(ipoint,3)) - tmp_S(ipoint) + enddo + + deallocate(tmp_O, tmp_M) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, s, i, ipoint) & + !$OMP SHARED(mo_num, elec_beta_num, n_points_final_grid, & + !$OMP elec_alpha_num, int2_grad1_u12_bimo_t, tmp1) + + !$OMP DO COLLAPSE(2) + do s = 1, mo_num + do p = 1, mo_num + + do ipoint = 1, n_points_final_grid + tmp1(ipoint,1,p,s) = int2_grad1_u12_bimo_t(ipoint,1,p,s) + tmp1(ipoint,2,p,s) = int2_grad1_u12_bimo_t(ipoint,2,p,s) + tmp1(ipoint,3,p,s) = int2_grad1_u12_bimo_t(ipoint,3,p,s) + enddo + + tmp1(:,4,p,s) = 0.d0 + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,4,p,s) = tmp1(ipoint,4,p,s) + int2_grad1_u12_bimo_t(ipoint,1,p,i) * int2_grad1_u12_bimo_t(ipoint,1,i,s) & + + int2_grad1_u12_bimo_t(ipoint,2,p,i) * int2_grad1_u12_bimo_t(ipoint,2,i,s) & + + int2_grad1_u12_bimo_t(ipoint,3,p,i) * int2_grad1_u12_bimo_t(ipoint,3,i,s) + enddo + enddo + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + tmp1(ipoint,4,p,s) = tmp1(ipoint,4,p,s) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,p,i) * int2_grad1_u12_bimo_t(ipoint,1,i,s) & + + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,p,i) * int2_grad1_u12_bimo_t(ipoint,2,i,s) & + + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,p,i) * int2_grad1_u12_bimo_t(ipoint,3,i,s) + enddo + enddo + + enddo ! p + enddo ! s + !$OMP END DO + !$OMP END PARALLEL + + call dgemv( 'T', 4*n_points_final_grid, mo_num*mo_num, 2.d0 & + , tmp1(1,1,1,1), size(tmp1, 1) * size(tmp1, 2) & + , tmp2(1,1), 1 & + , 0.d0, noL_1e(1,1), 1) + + deallocate(tmp1, tmp2) + + ! --- + + allocate(tmp_L(n_points_final_grid,3,mo_num), tmp_L0(n_points_final_grid,3,mo_num)) + allocate(tmp_R(n_points_final_grid,3,mo_num), tmp_R0(n_points_final_grid,3,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, i, ipoint) & + !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_L0, tmp_R0, tmp_L, tmp_R) + + !$OMP DO + do p = 1, mo_num + + tmp_L0(:,1:3,p) = 0.d0 + tmp_R0(:,1:3,p) = 0.d0 + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + tmp_L0(ipoint,1,p) = tmp_L0(ipoint,1,p) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,p,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L0(ipoint,2,p) = tmp_L0(ipoint,2,p) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,p,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L0(ipoint,3,p) = tmp_L0(ipoint,3,p) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,p,i) * mos_l_in_r_array_transp(ipoint,i) + + tmp_R0(ipoint,1,p) = tmp_R0(ipoint,1,p) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,p) * mos_r_in_r_array_transp(ipoint,i) + tmp_R0(ipoint,2,p) = tmp_R0(ipoint,2,p) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,p) * mos_r_in_r_array_transp(ipoint,i) + tmp_R0(ipoint,3,p) = tmp_R0(ipoint,3,p) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,p) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + + tmp_L(:,1:3,p) = tmp_L0(:,1:3,p) + tmp_R(:,1:3,p) = tmp_R0(:,1:3,p) + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + tmp_L(ipoint,1,p) = tmp_L(ipoint,1,p) + int2_grad1_u12_bimo_t(ipoint,1,p,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L(ipoint,2,p) = tmp_L(ipoint,2,p) + int2_grad1_u12_bimo_t(ipoint,2,p,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L(ipoint,3,p) = tmp_L(ipoint,3,p) + int2_grad1_u12_bimo_t(ipoint,3,p,i) * mos_l_in_r_array_transp(ipoint,i) + + tmp_R(ipoint,1,p) = tmp_R(ipoint,1,p) + int2_grad1_u12_bimo_t(ipoint,1,i,p) * mos_r_in_r_array_transp(ipoint,i) + tmp_R(ipoint,2,p) = tmp_R(ipoint,2,p) + int2_grad1_u12_bimo_t(ipoint,2,i,p) * mos_r_in_r_array_transp(ipoint,i) + tmp_R(ipoint,3,p) = tmp_R(ipoint,3,p) + int2_grad1_u12_bimo_t(ipoint,3,i,p) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + + enddo ! p + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + allocate(tmp3(n_points_final_grid,8,mo_num)) + allocate(tmp4(n_points_final_grid,8,mo_num)) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(p, i, j, ipoint) & + !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, mo_num, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP tmp_L, tmp_L0, tmp_R, tmp_R0, tmp_J, tmp_S, tmp3, tmp4) + + !$OMP DO + do p = 1, mo_num + + do ipoint = 1, n_points_final_grid + + tmp3(ipoint,1,p) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,p) + tmp3(ipoint,2,p) = -2.d0 * (tmp_L(ipoint,1,p) * tmp_J(ipoint,1) + tmp_L(ipoint,2,p) * tmp_J(ipoint,2) + tmp_L(ipoint,3,p) * tmp_J(ipoint,3)) + tmp3(ipoint,3,p) = final_weight_at_r_vector(ipoint) * tmp_L(ipoint,1,p) + tmp3(ipoint,4,p) = final_weight_at_r_vector(ipoint) * tmp_L(ipoint,2,p) + tmp3(ipoint,5,p) = final_weight_at_r_vector(ipoint) * tmp_L(ipoint,3,p) + tmp3(ipoint,6,p) = final_weight_at_r_vector(ipoint) * tmp_L0(ipoint,1,p) + tmp3(ipoint,7,p) = final_weight_at_r_vector(ipoint) * tmp_L0(ipoint,2,p) + tmp3(ipoint,8,p) = final_weight_at_r_vector(ipoint) * tmp_L0(ipoint,3,p) + + tmp4(ipoint,1,p) = -2.d0 * (tmp_R(ipoint,1,p) * tmp_J(ipoint,1) + tmp_R(ipoint,2,p) * tmp_J(ipoint,2) + tmp_R(ipoint,3,p) * tmp_J(ipoint,3)) & + + mos_r_in_r_array_transp(ipoint,p) * tmp_S(ipoint) + tmp4(ipoint,2,p) = final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,p) + tmp4(ipoint,3,p) = tmp_R(ipoint,1,p) + tmp4(ipoint,4,p) = tmp_R(ipoint,2,p) + tmp4(ipoint,5,p) = tmp_R(ipoint,3,p) + tmp4(ipoint,6,p) = tmp_R0(ipoint,1,p) + tmp4(ipoint,7,p) = tmp_R0(ipoint,2,p) + tmp4(ipoint,8,p) = tmp_R0(ipoint,3,p) + enddo + + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + tmp3(ipoint,2,p) = tmp3(ipoint,2,p) + mos_l_in_r_array_transp(ipoint,j) * ( int2_grad1_u12_bimo_t(ipoint,1,p,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & + + int2_grad1_u12_bimo_t(ipoint,2,p,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & + + int2_grad1_u12_bimo_t(ipoint,3,p,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) + + tmp4(ipoint,1,p) = tmp4(ipoint,1,p) + mos_r_in_r_array_transp(ipoint,i) * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,p) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,p) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,p) ) + enddo ! ipoint + enddo ! j + enddo ! i + + do i = elec_beta_num+1, elec_alpha_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + tmp3(ipoint,2,p) = tmp3(ipoint,2,p) + 0.5d0 * mos_l_in_r_array_transp(ipoint,j) * ( int2_grad1_u12_bimo_t(ipoint,1,p,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & + + int2_grad1_u12_bimo_t(ipoint,2,p,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & + + int2_grad1_u12_bimo_t(ipoint,3,p,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) + tmp3(ipoint,2,p) = tmp3(ipoint,2,p) + 0.5d0 * mos_l_in_r_array_transp(ipoint,i) * ( int2_grad1_u12_bimo_t(ipoint,1,p,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,p,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,p,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) + + tmp4(ipoint,1,p) = tmp4(ipoint,1,p) + 0.5d0 * mos_r_in_r_array_transp(ipoint,i) * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,p) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,p) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,p) ) + tmp4(ipoint,1,p) = tmp4(ipoint,1,p) + 0.5d0 * mos_r_in_r_array_transp(ipoint,j) * ( int2_grad1_u12_bimo_t(ipoint,1,j,i) * int2_grad1_u12_bimo_t(ipoint,1,i,p) & + + int2_grad1_u12_bimo_t(ipoint,2,j,i) * int2_grad1_u12_bimo_t(ipoint,2,i,p) & + + int2_grad1_u12_bimo_t(ipoint,3,j,i) * int2_grad1_u12_bimo_t(ipoint,3,i,p) ) + enddo ! ipoint + enddo ! j + enddo ! i + + do i = elec_beta_num+1, elec_alpha_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + tmp3(ipoint,2,p) = tmp3(ipoint,2,p) + 0.5d0 * mos_l_in_r_array_transp(ipoint,j) * ( int2_grad1_u12_bimo_t(ipoint,1,p,i) * int2_grad1_u12_bimo_t(ipoint,1,i,j) & + + int2_grad1_u12_bimo_t(ipoint,2,p,i) * int2_grad1_u12_bimo_t(ipoint,2,i,j) & + + int2_grad1_u12_bimo_t(ipoint,3,p,i) * int2_grad1_u12_bimo_t(ipoint,3,i,j) ) + + tmp4(ipoint,1,p) = tmp4(ipoint,1,p) + 0.5d0 * mos_r_in_r_array_transp(ipoint,i) * ( int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,p) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,p) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,p) ) + enddo ! ipoint + enddo ! j + enddo ! i + + enddo ! p + !$OMP END DO + !$OMP END PARALLEL + + deallocate(tmp_L0, tmp_L, tmp_R0, tmp_R, tmp_J, tmp_S) + + call dgemm( 'T', 'N', mo_num, mo_num, 8*n_points_final_grid, 1.d0 & + , tmp3(1,1,1), 8*n_points_final_grid, tmp4(1,1,1), 8*n_points_final_grid & + , 1.d0, noL_1e(1,1), mo_num) + + deallocate(tmp3, tmp4) + + endif + + call wall_time(t1) + print*, " Wall time for noL_1e (min) = ", (t1 - t0)/60.d0 + +END_PROVIDER + +! --- + BEGIN_PROVIDER [double precision, noL_2e, (mo_num, mo_num, mo_num, mo_num)] implicit none diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index 8ad7ed7f..e827cf75 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -38,7 +38,7 @@ program tc_bi_ortho !call test_no_v0() !call test_no_0() - !call test_no_1() + call test_no_1() call test_no_2() end diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index fe396381..9b9aaca8 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -48,7 +48,7 @@ default: False [noL_standard] type: logical -doc: If |true|, standard normal-ordering for L +doc: If |true|, standard normal-ordering for L (to be used with three_body_h_tc |false|) interface: ezfio,provider,ocaml default: False From 4a335102a1d42e786fa3bf0bb82374df926dcbab Mon Sep 17 00:00:00 2001 From: AbdAmmar Date: Mon, 18 Sep 2023 12:03:29 +0200 Subject: [PATCH 296/337] OPTIM E_3e for TCSCF --- src/bi_ort_ints/no_dressing.irp.f | 419 ++++++++++++++++++++++++- src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 83 ++++- src/tc_scf/fock_three_hermit.irp.f | 414 +++++++++++++++++++++++- 3 files changed, 878 insertions(+), 38 deletions(-) diff --git a/src/bi_ort_ints/no_dressing.irp.f b/src/bi_ort_ints/no_dressing.irp.f index 046e0906..bd225274 100644 --- a/src/bi_ort_ints/no_dressing.irp.f +++ b/src/bi_ort_ints/no_dressing.irp.f @@ -1,7 +1,7 @@ ! --- -BEGIN_PROVIDER [double precision, noL_0e] +BEGIN_PROVIDER [double precision, noL_0e_v0] implicit none integer :: i, j, k @@ -10,7 +10,7 @@ BEGIN_PROVIDER [double precision, noL_0e] double precision, allocatable :: tmp(:) call wall_time(t0) - print*, " Providing noL_0e ..." + print*, " Providing noL_0e_v0 ..." if(elec_alpha_num .eq. elec_beta_num) then @@ -40,7 +40,7 @@ BEGIN_PROVIDER [double precision, noL_0e] !$OMP END DO !$OMP END PARALLEL - noL_0e = -1.d0 * (sum(tmp)) / 6.d0 + noL_0e_v0 = -1.d0 * (sum(tmp)) / 6.d0 deallocate(tmp) @@ -94,9 +94,8 @@ BEGIN_PROVIDER [double precision, noL_0e] call give_integrals_3_body_bi_ort(i, j, k, j, k, i, I_ijk_jki) call give_integrals_3_body_bi_ort(i, j, k, i, k, j, I_ijk_ikj) call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) - call give_integrals_3_body_bi_ort(i, j, k, k, j, i, I_ijk_kji) - tmp(i) = tmp(i) + 6.d0 * (2.d0 * I_ijk_ijk + I_ijk_jki - I_ijk_ikj - I_ijk_jik - I_ijk_kji) + tmp(i) = tmp(i) + 6.d0 * (2.d0 * I_ijk_ijk + I_ijk_jki - I_ijk_ikj - 2.d0 * I_ijk_jik) enddo ! k do k = elec_beta_num+1, elec_alpha_num @@ -104,26 +103,25 @@ BEGIN_PROVIDER [double precision, noL_0e] call give_integrals_3_body_bi_ort(i, j, k, i, j, k, I_ijk_ijk) call give_integrals_3_body_bi_ort(i, j, k, j, k, i, I_ijk_jki) call give_integrals_3_body_bi_ort(i, j, k, i, k, j, I_ijk_ikj) - call give_integrals_3_body_bi_ort(i, j, k, j, i, k, I_ijk_jik) call give_integrals_3_body_bi_ort(i, j, k, k, j, i, I_ijk_kji) - tmp(i) = tmp(i) + 3.d0 * (2.d0 * I_ijk_ijk + 2.d0 * I_ijk_jki - I_ijk_ikj - I_ijk_jik - 2.d0 * I_ijk_kji) + tmp(i) = tmp(i) + 6.d0 * (I_ijk_ijk + I_ijk_jki - I_ijk_ikj - I_ijk_kji) enddo ! k enddo ! j enddo ! i !$OMP END DO !$OMP END PARALLEL - noL_0e = -1.d0 * (sum(tmp)) / 6.d0 + noL_0e_v0 = -1.d0 * (sum(tmp)) / 6.d0 deallocate(tmp) endif call wall_time(t1) - print*, " Wall time for noL_0e (min) = ", (t1 - t0)/60.d0 + print*, " Wall time for noL_0e_v0 (min) = ", (t1 - t0)/60.d0 - print*, " noL_0e = ", noL_0e + print*, " noL_0e_v0 = ", noL_0e_v0 END_PROVIDER @@ -322,6 +320,403 @@ END_PROVIDER ! --- +BEGIN_PROVIDER [double precision, noL_0e] + + implicit none + integer :: i, j, k, ipoint + double precision :: t0, t1 + double precision, allocatable :: tmp(:) + double precision, allocatable :: tmp_L(:,:), tmp_R(:,:) + double precision, allocatable :: tmp_M(:,:), tmp_S(:), tmp_O(:), tmp_J(:,:) + double precision, allocatable :: tmp_M_priv(:,:), tmp_S_priv(:), tmp_O_priv(:), tmp_J_priv(:,:) + + + call wall_time(t0) + print*, " Providing noL_0e ..." + + if(elec_alpha_num .eq. elec_beta_num) then + + allocate(tmp(elec_beta_num)) + allocate(tmp_L(n_points_final_grid,3), tmp_R(n_points_final_grid,3)) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(j, i, ipoint, tmp_L, tmp_R) & + !$OMP SHARED(elec_beta_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp, final_weight_at_r_vector) + + !$OMP DO + do j = 1, elec_beta_num + + tmp_L = 0.d0 + tmp_R = 0.d0 + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + tmp_L(ipoint,1) = tmp_L(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L(ipoint,2) = tmp_L(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L(ipoint,3) = tmp_L(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) + + tmp_R(ipoint,1) = tmp_R(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_R(ipoint,2) = tmp_R(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_R(ipoint,3) = tmp_R(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + + tmp(j) = 0.d0 + do ipoint = 1, n_points_final_grid + tmp(j) = tmp(j) + final_weight_at_r_vector(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3)) + enddo + enddo ! j + !$OMP END DO + !$OMP END PARALLEL + + noL_0e = -2.d0 * sum(tmp) + + deallocate(tmp) + deallocate(tmp_L, tmp_R) + + ! --- + + allocate(tmp_O(n_points_final_grid), tmp_J(n_points_final_grid,3)) + tmp_O = 0.d0 + tmp_J = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, ipoint, tmp_O_priv, tmp_J_priv) & + !$OMP SHARED(elec_beta_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_O, tmp_J) + + allocate(tmp_O_priv(n_points_final_grid), tmp_J_priv(n_points_final_grid,3)) + tmp_O_priv = 0.d0 + tmp_J_priv = 0.d0 + + !$OMP DO + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_O_priv(ipoint) = tmp_O_priv(ipoint) + mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp_J_priv(ipoint,1) = tmp_J_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_J_priv(ipoint,2) = tmp_J_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_J_priv(ipoint,3) = tmp_J_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,i) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tmp_O = tmp_O + tmp_O_priv + tmp_J = tmp_J + tmp_J_priv + !$OMP END CRITICAL + + deallocate(tmp_O_priv, tmp_J_priv) + !$OMP END PARALLEL + + allocate(tmp_M(n_points_final_grid,3), tmp_S(n_points_final_grid)) + tmp_M = 0.d0 + tmp_S = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, j, ipoint, tmp_M_priv, tmp_S_priv) & + !$OMP SHARED(elec_beta_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_M, tmp_S) + + allocate(tmp_M_priv(n_points_final_grid,3), tmp_S_priv(n_points_final_grid)) + tmp_M_priv = 0.d0 + tmp_S_priv = 0.d0 + + !$OMP DO COLLAPSE(2) + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + + tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tmp_M = tmp_M + tmp_M_priv + tmp_S = tmp_S + tmp_S_priv + !$OMP END CRITICAL + + deallocate(tmp_M_priv, tmp_S_priv) + !$OMP END PARALLEL + + allocate(tmp(n_points_final_grid)) + + do ipoint = 1, n_points_final_grid + + tmp_S(ipoint) = 2.d0 * (tmp_J(ipoint,1)*tmp_J(ipoint,1) + tmp_J(ipoint,2)*tmp_J(ipoint,2) + tmp_J(ipoint,3)*tmp_J(ipoint,3)) - tmp_S(ipoint) + + tmp(ipoint) = final_weight_at_r_vector(ipoint) * ( tmp_O(ipoint) * tmp_S(ipoint) & + - 2.d0 * ( tmp_J(ipoint,1) * tmp_M(ipoint,1) & + + tmp_J(ipoint,2) * tmp_M(ipoint,2) & + + tmp_J(ipoint,3) * tmp_M(ipoint,3))) + enddo + + noL_0e = noL_0e -2.d0 * (sum(tmp)) + + deallocate(tmp) + + else + + allocate(tmp(elec_alpha_num)) + allocate(tmp_L(n_points_final_grid,3), tmp_R(n_points_final_grid,3)) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(j, i, ipoint, tmp_L, tmp_R) & + !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp, final_weight_at_r_vector) + + !$OMP DO + do j = 1, elec_beta_num + + tmp_L = 0.d0 + tmp_R = 0.d0 + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + tmp_L(ipoint,1) = tmp_L(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L(ipoint,2) = tmp_L(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L(ipoint,3) = tmp_L(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) + + tmp_R(ipoint,1) = tmp_R(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_R(ipoint,2) = tmp_R(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_R(ipoint,3) = tmp_R(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + + tmp(j) = 0.d0 + do ipoint = 1, n_points_final_grid + tmp(j) = tmp(j) + final_weight_at_r_vector(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3)) + enddo + + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + tmp_L(ipoint,1) = tmp_L(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L(ipoint,2) = tmp_L(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L(ipoint,3) = tmp_L(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) + + tmp_R(ipoint,1) = tmp_R(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_R(ipoint,2) = tmp_R(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_R(ipoint,3) = tmp_R(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + + do ipoint = 1, n_points_final_grid + tmp(j) = tmp(j) + final_weight_at_r_vector(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3)) + enddo + enddo ! j + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(j, i, ipoint, tmp_L, tmp_R) & + !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp, final_weight_at_r_vector) + + !$OMP DO + do j = elec_beta_num+1, elec_alpha_num + + tmp_L = 0.d0 + tmp_R = 0.d0 + do i = 1, elec_alpha_num + do ipoint = 1, n_points_final_grid + tmp_L(ipoint,1) = tmp_L(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L(ipoint,2) = tmp_L(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L(ipoint,3) = tmp_L(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) + + tmp_R(ipoint,1) = tmp_R(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_R(ipoint,2) = tmp_R(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_R(ipoint,3) = tmp_R(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + + tmp(j) = 0.d0 + do ipoint = 1, n_points_final_grid + tmp(j) = tmp(j) + 0.5d0 * final_weight_at_r_vector(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3)) + enddo + enddo ! j + !$OMP END DO + !$OMP END PARALLEL + + noL_0e = -2.d0 * sum(tmp) + + deallocate(tmp) + deallocate(tmp_L, tmp_R) + + ! --- + + allocate(tmp_O(n_points_final_grid), tmp_J(n_points_final_grid,3)) + tmp_O = 0.d0 + tmp_J = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, ipoint, tmp_O_priv, tmp_J_priv) & + !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_O, tmp_J) + + allocate(tmp_O_priv(n_points_final_grid), tmp_J_priv(n_points_final_grid,3)) + tmp_O_priv = 0.d0 + tmp_J_priv = 0.d0 + + !$OMP DO + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_O_priv(ipoint) = tmp_O_priv(ipoint) + mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp_J_priv(ipoint,1) = tmp_J_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_J_priv(ipoint,2) = tmp_J_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_J_priv(ipoint,3) = tmp_J_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,i) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP DO + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + tmp_O_priv(ipoint) = tmp_O_priv(ipoint) + 0.5d0 * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp_J_priv(ipoint,1) = tmp_J_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_J_priv(ipoint,2) = tmp_J_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_J_priv(ipoint,3) = tmp_J_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,i) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tmp_O = tmp_O + tmp_O_priv + tmp_J = tmp_J + tmp_J_priv + !$OMP END CRITICAL + + deallocate(tmp_O_priv, tmp_J_priv) + !$OMP END PARALLEL + + ! --- + + allocate(tmp_M(n_points_final_grid,3), tmp_S(n_points_final_grid)) + tmp_M = 0.d0 + tmp_S = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, j, ipoint, tmp_M_priv, tmp_S_priv) & + !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_M, tmp_S) + + allocate(tmp_M_priv(n_points_final_grid,3), tmp_S_priv(n_points_final_grid)) + tmp_M_priv = 0.d0 + tmp_S_priv = 0.d0 + + !$OMP DO COLLAPSE(2) + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + + tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP DO COLLAPSE(2) + do i = elec_beta_num+1, elec_alpha_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + + tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + + tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP DO COLLAPSE(2) + do i = elec_beta_num+1, elec_alpha_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + + tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tmp_M = tmp_M + tmp_M_priv + tmp_S = tmp_S + tmp_S_priv + !$OMP END CRITICAL + + deallocate(tmp_M_priv, tmp_S_priv) + !$OMP END PARALLEL + + allocate(tmp(n_points_final_grid)) + + do ipoint = 1, n_points_final_grid + + tmp_S(ipoint) = 2.d0 * (tmp_J(ipoint,1)*tmp_J(ipoint,1) + tmp_J(ipoint,2)*tmp_J(ipoint,2) + tmp_J(ipoint,3)*tmp_J(ipoint,3)) - tmp_S(ipoint) + + tmp(ipoint) = final_weight_at_r_vector(ipoint) * ( tmp_O(ipoint) * tmp_S(ipoint) & + - 2.d0 * ( tmp_J(ipoint,1) * tmp_M(ipoint,1) & + + tmp_J(ipoint,2) * tmp_M(ipoint,2) & + + tmp_J(ipoint,3) * tmp_M(ipoint,3))) + enddo + + noL_0e = noL_0e -2.d0 * (sum(tmp)) + + deallocate(tmp) + + endif + + call wall_time(t1) + print*, " Wall time for noL_0e (min) = ", (t1 - t0)/60.d0 + + print*, " noL_0e = ", noL_0e + +END_PROVIDER + +! --- + BEGIN_PROVIDER [double precision, noL_1e, (mo_num, mo_num)] implicit none @@ -1028,7 +1423,7 @@ BEGIN_PROVIDER [double precision, noL_2e, (mo_num, mo_num, mo_num, mo_num)] call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 4*n_points_final_grid, 0.5d0 & , tmp1(1,1,1,1), 4*n_points_final_grid, tmp2(1,1,1,1), 4*n_points_final_grid & - , 1.d0, tmp(1,1,1,1), mo_num*mo_num) + , 0.d0, tmp(1,1,1,1), mo_num*mo_num) deallocate(tmp1, tmp2) @@ -1178,7 +1573,7 @@ BEGIN_PROVIDER [double precision, noL_2e, (mo_num, mo_num, mo_num, mo_num)] call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 4*n_points_final_grid, 0.5d0 & , tmp1(1,1,1,1), 4*n_points_final_grid, tmp2(1,1,1,1), 4*n_points_final_grid & - , 1.d0, tmp(1,1,1,1), mo_num*mo_num) + , 0.d0, tmp(1,1,1,1), mo_num*mo_num) deallocate(tmp1, tmp2) diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f index e827cf75..369efd15 100644 --- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f @@ -37,9 +37,10 @@ program tc_bi_ortho !call test_no() !call test_no_v0() - !call test_no_0() - call test_no_1() - call test_no_2() + call test_noL_0e() + call test_noL_1e() + !call test_noL_2e_v0() + call test_noL_2e() end @@ -319,7 +320,7 @@ subroutine test_no_v0() print*, ' accu (%) = ', 100.d0*accu/norm return -end subroutine test_no_0 +end ! --- @@ -365,7 +366,7 @@ subroutine test_no() print*, ' accu (%) = ', 100.d0*accu/norm return -end subroutine test_no +end ! --- @@ -502,19 +503,28 @@ end ! --- -subroutine test_no_0() +subroutine test_noL_0e() implicit none - double precision :: accu, norm + double precision :: accu, norm, thr - print*, ' testing no_0 ...' + thr = 1d-8 + + print*, ' testing noL_0e ...' PROVIDE noL_0e_naive + PROVIDE noL_0e_v0 PROVIDE noL_0e accu = dabs(noL_0e_naive - noL_0e) norm = dabs(noL_0e_naive) + if(accu .gt. thr) then + print*, ' problem on noL_0e' + print*, noL_0e_naive, noL_0e + stop + endif + print*, ' accu (%) = ', 100.d0*accu/norm return @@ -522,16 +532,17 @@ end ! --- -subroutine test_no_1() +subroutine test_noL_1e() implicit none integer :: i, j double precision :: accu, contrib, new, ref, thr, norm - print*, ' testing no_1 ...' + print*, ' testing noL_1e ...' PROVIDE noL_1e_naive PROVIDE noL_1e + PROVIDE energy_1e_noL_HF thr = 1d-8 @@ -557,24 +568,68 @@ subroutine test_no_1() print*, ' accu (%) = ', 100.d0*accu/norm - PROVIDE energy_1e_noL_HF + return +end + +! --- + +subroutine test_noL_2e_v0() + + implicit none + integer :: i, j, k, l + double precision :: accu, contrib, new, ref, thr, norm + + print*, ' testing noL_2e_v0 ...' + + PROVIDE noL_2e_naive + PROVIDE noL_2e_v0 + PROVIDE energy_2e_noL_HF + + thr = 1d-8 + + accu = 0.d0 + norm = 0.d0 + do i = 1, mo_num + do j = 1, mo_num + do k = 1, mo_num + do l = 1, mo_num + + new = noL_2e_v0 (l,k,j,i) + ref = noL_2e_naive(l,k,j,i) + contrib = dabs(new - ref) + if(contrib .gt. thr) then + print*, ' problem on noL_2e_v0' + print*, l, k, j, i + print*, ref, new, contrib + stop + endif + + accu += contrib + norm += dabs(ref) + enddo + enddo + enddo + enddo + + print*, ' accu (%) = ', 100.d0*accu/norm return end ! --- -subroutine test_no_2() + +subroutine test_noL_2e() implicit none integer :: i, j, k, l double precision :: accu, contrib, new, ref, thr, norm - print*, ' testing no_2 ...' + print*, ' testing noL_2e ...' PROVIDE noL_2e_naive PROVIDE noL_2e - !PROVIDE energy_2e_noL_HF + PROVIDE energy_2e_noL_HF thr = 1d-8 diff --git a/src/tc_scf/fock_three_hermit.irp.f b/src/tc_scf/fock_three_hermit.irp.f index a936da9b..6c132189 100644 --- a/src/tc_scf/fock_three_hermit.irp.f +++ b/src/tc_scf/fock_three_hermit.irp.f @@ -78,13 +78,16 @@ end ! --- -! TODO DGEMM BEGIN_PROVIDER [double precision, diag_three_elem_hf] implicit none - integer :: i, j, k, ipoint, mm - double precision :: contrib, weight, four_third, one_third, two_third, exchange_int_231 - double precision :: integral_aaa, hthree, integral_aab, integral_abb, integral_bbb + integer :: i, j, k, ipoint, mm + double precision :: contrib, weight, four_third, one_third, two_third, exchange_int_231 + double precision :: integral_aaa, hthree, integral_aab, integral_abb, integral_bbb + double precision, allocatable :: tmp(:) + double precision, allocatable :: tmp_L(:,:), tmp_R(:,:) + double precision, allocatable :: tmp_M(:,:), tmp_S(:), tmp_O(:), tmp_J(:,:) + double precision, allocatable :: tmp_M_priv(:,:), tmp_S_priv(:), tmp_O_priv(:), tmp_J_priv(:,:) PROVIDE mo_l_coef mo_r_coef @@ -131,14 +134,397 @@ BEGIN_PROVIDER [double precision, diag_three_elem_hf] else - provide mo_l_coef mo_r_coef - call give_aaa_contrib(integral_aaa) - call give_aab_contrib(integral_aab) - call give_abb_contrib(integral_abb) - call give_bbb_contrib(integral_bbb) - diag_three_elem_hf = integral_aaa + integral_aab + integral_abb + integral_bbb -! print*,'integral_aaa + integral_aab + integral_abb + integral_bbb' -! print*,integral_aaa , integral_aab , integral_abb , integral_bbb + ! ------------ + ! SLOW VERSION + ! ------------ + + !call give_aaa_contrib(integral_aaa) + !call give_aab_contrib(integral_aab) + !call give_abb_contrib(integral_abb) + !call give_bbb_contrib(integral_bbb) + !diag_three_elem_hf = integral_aaa + integral_aab + integral_abb + integral_bbb + + ! ------------ + ! ------------ + + PROVIDE int2_grad1_u12_bimo_t + PROVIDE mos_l_in_r_array_transp + PROVIDE mos_r_in_r_array_transp + + if(elec_alpha_num .eq. elec_beta_num) then + + allocate(tmp(elec_beta_num)) + allocate(tmp_L(n_points_final_grid,3), tmp_R(n_points_final_grid,3)) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(j, i, ipoint, tmp_L, tmp_R) & + !$OMP SHARED(elec_beta_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp, final_weight_at_r_vector) + + !$OMP DO + do j = 1, elec_beta_num + + tmp_L = 0.d0 + tmp_R = 0.d0 + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + tmp_L(ipoint,1) = tmp_L(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L(ipoint,2) = tmp_L(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L(ipoint,3) = tmp_L(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) + + tmp_R(ipoint,1) = tmp_R(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_R(ipoint,2) = tmp_R(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_R(ipoint,3) = tmp_R(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + + tmp(j) = 0.d0 + do ipoint = 1, n_points_final_grid + tmp(j) = tmp(j) + final_weight_at_r_vector(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3)) + enddo + enddo ! j + !$OMP END DO + !$OMP END PARALLEL + + diag_three_elem_hf = -2.d0 * sum(tmp) + + deallocate(tmp) + deallocate(tmp_L, tmp_R) + + ! --- + + allocate(tmp_O(n_points_final_grid), tmp_J(n_points_final_grid,3)) + tmp_O = 0.d0 + tmp_J = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, ipoint, tmp_O_priv, tmp_J_priv) & + !$OMP SHARED(elec_beta_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_O, tmp_J) + + allocate(tmp_O_priv(n_points_final_grid), tmp_J_priv(n_points_final_grid,3)) + tmp_O_priv = 0.d0 + tmp_J_priv = 0.d0 + + !$OMP DO + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_O_priv(ipoint) = tmp_O_priv(ipoint) + mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp_J_priv(ipoint,1) = tmp_J_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_J_priv(ipoint,2) = tmp_J_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_J_priv(ipoint,3) = tmp_J_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,i) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tmp_O = tmp_O + tmp_O_priv + tmp_J = tmp_J + tmp_J_priv + !$OMP END CRITICAL + + deallocate(tmp_O_priv, tmp_J_priv) + !$OMP END PARALLEL + + allocate(tmp_M(n_points_final_grid,3), tmp_S(n_points_final_grid)) + tmp_M = 0.d0 + tmp_S = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, j, ipoint, tmp_M_priv, tmp_S_priv) & + !$OMP SHARED(elec_beta_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_M, tmp_S) + + allocate(tmp_M_priv(n_points_final_grid,3), tmp_S_priv(n_points_final_grid)) + tmp_M_priv = 0.d0 + tmp_S_priv = 0.d0 + + !$OMP DO COLLAPSE(2) + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + + tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tmp_M = tmp_M + tmp_M_priv + tmp_S = tmp_S + tmp_S_priv + !$OMP END CRITICAL + + deallocate(tmp_M_priv, tmp_S_priv) + !$OMP END PARALLEL + + allocate(tmp(n_points_final_grid)) + + do ipoint = 1, n_points_final_grid + + tmp_S(ipoint) = 2.d0 * (tmp_J(ipoint,1)*tmp_J(ipoint,1) + tmp_J(ipoint,2)*tmp_J(ipoint,2) + tmp_J(ipoint,3)*tmp_J(ipoint,3)) - tmp_S(ipoint) + + tmp(ipoint) = final_weight_at_r_vector(ipoint) * ( tmp_O(ipoint) * tmp_S(ipoint) & + - 2.d0 * ( tmp_J(ipoint,1) * tmp_M(ipoint,1) & + + tmp_J(ipoint,2) * tmp_M(ipoint,2) & + + tmp_J(ipoint,3) * tmp_M(ipoint,3))) + enddo + + diag_three_elem_hf = diag_three_elem_hf -2.d0 * (sum(tmp)) + + deallocate(tmp) + + else + + allocate(tmp(elec_alpha_num)) + allocate(tmp_L(n_points_final_grid,3), tmp_R(n_points_final_grid,3)) + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(j, i, ipoint, tmp_L, tmp_R) & + !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp, final_weight_at_r_vector) + + !$OMP DO + do j = 1, elec_beta_num + + tmp_L = 0.d0 + tmp_R = 0.d0 + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + tmp_L(ipoint,1) = tmp_L(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L(ipoint,2) = tmp_L(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L(ipoint,3) = tmp_L(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) + + tmp_R(ipoint,1) = tmp_R(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_R(ipoint,2) = tmp_R(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_R(ipoint,3) = tmp_R(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + + tmp(j) = 0.d0 + do ipoint = 1, n_points_final_grid + tmp(j) = tmp(j) + final_weight_at_r_vector(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3)) + enddo + + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + tmp_L(ipoint,1) = tmp_L(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L(ipoint,2) = tmp_L(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L(ipoint,3) = tmp_L(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) + + tmp_R(ipoint,1) = tmp_R(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_R(ipoint,2) = tmp_R(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_R(ipoint,3) = tmp_R(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + + do ipoint = 1, n_points_final_grid + tmp(j) = tmp(j) + final_weight_at_r_vector(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3)) + enddo + enddo ! j + !$OMP END DO + !$OMP END PARALLEL + + ! --- + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(j, i, ipoint, tmp_L, tmp_R) & + !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp, final_weight_at_r_vector) + + !$OMP DO + do j = elec_beta_num+1, elec_alpha_num + + tmp_L = 0.d0 + tmp_R = 0.d0 + do i = 1, elec_alpha_num + do ipoint = 1, n_points_final_grid + tmp_L(ipoint,1) = tmp_L(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L(ipoint,2) = tmp_L(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) + tmp_L(ipoint,3) = tmp_L(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) + + tmp_R(ipoint,1) = tmp_R(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_R(ipoint,2) = tmp_R(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_R(ipoint,3) = tmp_R(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + + tmp(j) = 0.d0 + do ipoint = 1, n_points_final_grid + tmp(j) = tmp(j) + 0.5d0 * final_weight_at_r_vector(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3)) + enddo + enddo ! j + !$OMP END DO + !$OMP END PARALLEL + + diag_three_elem_hf = -2.d0 * sum(tmp) + + deallocate(tmp) + deallocate(tmp_L, tmp_R) + + ! --- + + allocate(tmp_O(n_points_final_grid), tmp_J(n_points_final_grid,3)) + tmp_O = 0.d0 + tmp_J = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, ipoint, tmp_O_priv, tmp_J_priv) & + !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_O, tmp_J) + + allocate(tmp_O_priv(n_points_final_grid), tmp_J_priv(n_points_final_grid,3)) + tmp_O_priv = 0.d0 + tmp_J_priv = 0.d0 + + !$OMP DO + do i = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + tmp_O_priv(ipoint) = tmp_O_priv(ipoint) + mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp_J_priv(ipoint,1) = tmp_J_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_J_priv(ipoint,2) = tmp_J_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_J_priv(ipoint,3) = tmp_J_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,i,i) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP DO + do i = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + tmp_O_priv(ipoint) = tmp_O_priv(ipoint) + 0.5d0 * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i) + tmp_J_priv(ipoint,1) = tmp_J_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,i) + tmp_J_priv(ipoint,2) = tmp_J_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,i) + tmp_J_priv(ipoint,3) = tmp_J_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,i) + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tmp_O = tmp_O + tmp_O_priv + tmp_J = tmp_J + tmp_J_priv + !$OMP END CRITICAL + + deallocate(tmp_O_priv, tmp_J_priv) + !$OMP END PARALLEL + + ! --- + + allocate(tmp_M(n_points_final_grid,3), tmp_S(n_points_final_grid)) + tmp_M = 0.d0 + tmp_S = 0.d0 + + !$OMP PARALLEL & + !$OMP DEFAULT(NONE) & + !$OMP PRIVATE(i, j, ipoint, tmp_M_priv, tmp_S_priv) & + !$OMP SHARED(elec_beta_num, elec_alpha_num, n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, tmp_M, tmp_S) + + allocate(tmp_M_priv(n_points_final_grid,3), tmp_S_priv(n_points_final_grid)) + tmp_M_priv = 0.d0 + tmp_S_priv = 0.d0 + + !$OMP DO COLLAPSE(2) + do i = 1, elec_beta_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + + tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP DO COLLAPSE(2) + do i = elec_beta_num+1, elec_alpha_num + do j = 1, elec_beta_num + do ipoint = 1, n_points_final_grid + + tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + + tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,j) * mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,j) * mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,j) * mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,i) + + tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP DO COLLAPSE(2) + do i = elec_beta_num+1, elec_alpha_num + do j = elec_beta_num+1, elec_alpha_num + do ipoint = 1, n_points_final_grid + + tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,j,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,j) + + tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,1,i,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & + + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,2,i,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) & + + 0.5d0 * int2_grad1_u12_bimo_t(ipoint,3,i,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) + enddo + enddo + enddo + !$OMP END DO NOWAIT + + !$OMP CRITICAL + tmp_M = tmp_M + tmp_M_priv + tmp_S = tmp_S + tmp_S_priv + !$OMP END CRITICAL + + deallocate(tmp_M_priv, tmp_S_priv) + !$OMP END PARALLEL + + allocate(tmp(n_points_final_grid)) + + do ipoint = 1, n_points_final_grid + + tmp_S(ipoint) = 2.d0 * (tmp_J(ipoint,1)*tmp_J(ipoint,1) + tmp_J(ipoint,2)*tmp_J(ipoint,2) + tmp_J(ipoint,3)*tmp_J(ipoint,3)) - tmp_S(ipoint) + + tmp(ipoint) = final_weight_at_r_vector(ipoint) * ( tmp_O(ipoint) * tmp_S(ipoint) & + - 2.d0 * ( tmp_J(ipoint,1) * tmp_M(ipoint,1) & + + tmp_J(ipoint,2) * tmp_M(ipoint,2) & + + tmp_J(ipoint,3) * tmp_M(ipoint,3))) + enddo + + diag_three_elem_hf = diag_three_elem_hf - 2.d0 * (sum(tmp)) + + deallocate(tmp) + + endif + endif @@ -374,3 +760,7 @@ BEGIN_PROVIDER [ double precision, fock_3_w_kl_wla_phi_k, (n_points_final_grid,3 enddo END_PROVIDER + + + + From 2b3bf4fc00fff56bbc2b0a5bd8623ef3efae0bd8 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 18 Sep 2023 18:14:04 +0200 Subject: [PATCH 297/337] Installation of QMCkl --- configure | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/configure b/configure index 48e6fd12..47896abd 100755 --- a/configure +++ b/configure @@ -191,7 +191,7 @@ if [[ "${PACKAGES}.x" != ".x" ]] ; then fi if [[ ${PACKAGES} = all ]] ; then - PACKAGES="zlib ninja zeromq f77zmq gmp ocaml docopt resultsFile bats trexio" + PACKAGES="zlib ninja zeromq f77zmq gmp ocaml docopt resultsFile bats trexio qmckl" fi @@ -229,6 +229,17 @@ EOF ./configure --prefix=\${QP_ROOT} make -j 8 && make -j 8 check && make -j 8 install EOF + elif [[ ${PACKAGE} = qmckl ]] ; then + + VERSION=0.5.2 + execute << EOF + cd "\${QP_ROOT}"/external + wget https://github.com/TREX-CoE/qmckl/releases/download/v${VERSION}/qmckl-${VERSION}.tar.gz + tar -zxf qmckl-${VERSION}.tar.gz + cd qmckl-${VERSION} + ./configure --prefix=\${QP_ROOT} --enable-hpc --disable-doc + make -j 4 && make -j 4 check && make install +EOF elif [[ ${PACKAGE} = gmp ]] ; then @@ -371,6 +382,12 @@ if [[ ${TREXIO} = $(not_found) ]] ; then fail fi +QMCKL=$(find_lib -lqmckl) +if [[ ${QMCKL} = $(not_found) ]] ; then + error "QMCkl (qmckl) is not installed." + fail +fi + F77ZMQ=$(find_lib -lzmq -lf77zmq -lpthread) if [[ ${F77ZMQ} = $(not_found) ]] ; then error "Fortran binding of ZeroMQ (f77zmq) is not installed." From 67906ed6257ecc7b8648f3ab48d30b97dd302198 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 19 Sep 2023 09:59:30 +0200 Subject: [PATCH 298/337] Fixing GitHub Actions --- .github/workflows/compilation.yml | 1 + .github/workflows/configuration.yml | 3 +++ 2 files changed, 4 insertions(+) diff --git a/.github/workflows/compilation.yml b/.github/workflows/compilation.yml index 85daf7db..43e0b901 100644 --- a/.github/workflows/compilation.yml +++ b/.github/workflows/compilation.yml @@ -49,6 +49,7 @@ jobs: ./configure -i resultsFile || : ./configure -i bats || : ./configure -i trexio-nohdf5 || : + ./configure -i qmckl || : ./configure -c ./config/gfortran_debug.cfg - name: Compilation run: | diff --git a/.github/workflows/configuration.yml b/.github/workflows/configuration.yml index 178b394e..15b66f2b 100644 --- a/.github/workflows/configuration.yml +++ b/.github/workflows/configuration.yml @@ -56,6 +56,9 @@ jobs: - name: trexio run: | ./configure -i trexio || echo OK + - name: qmckl + run: | + ./configure -i qmckl || echo OK - name: Final check run: | ./configure -c config/gfortran_debug.cfg From 1f14e707b4dfb7df3547c1f95e51a04115a82104 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 19 Sep 2023 11:32:27 +0200 Subject: [PATCH 299/337] Fix fortran mod file --- configure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure b/configure index 47896abd..b0422ace 100755 --- a/configure +++ b/configure @@ -238,7 +238,7 @@ EOF tar -zxf qmckl-${VERSION}.tar.gz cd qmckl-${VERSION} ./configure --prefix=\${QP_ROOT} --enable-hpc --disable-doc - make -j 4 && make -j 4 check && make install + make && make -j 4 check && make install EOF From d251ddfa65055ef4821d63bdf3194879d2529857 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Tue, 19 Sep 2023 11:35:52 +0200 Subject: [PATCH 300/337] added LIN & NEED for qmckl --- src/qmckl/LIB | 1 + src/qmckl/NEED | 1 + 2 files changed, 2 insertions(+) create mode 100644 src/qmckl/LIB create mode 100644 src/qmckl/NEED diff --git a/src/qmckl/LIB b/src/qmckl/LIB new file mode 100644 index 00000000..a9fabb84 --- /dev/null +++ b/src/qmckl/LIB @@ -0,0 +1 @@ +-lqmckl diff --git a/src/qmckl/NEED b/src/qmckl/NEED new file mode 100644 index 00000000..d2066b18 --- /dev/null +++ b/src/qmckl/NEED @@ -0,0 +1 @@ +nuclei From c47a09f9c9922a2a050a7ff23d72c5165e6f3def Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 19 Sep 2023 13:34:41 +0200 Subject: [PATCH 301/337] Fix multiple qmckl or trexio installations --- configure | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/configure b/configure index b0422ace..f64e241f 100755 --- a/configure +++ b/configure @@ -211,7 +211,7 @@ EOF execute << EOF cd "\${QP_ROOT}"/external wget https://github.com/TREX-CoE/trexio/releases/download/v${VERSION}/trexio-${VERSION}.tar.gz - tar -zxf trexio-${VERSION}.tar.gz + tar -zxf trexio-${VERSION}.tar.gz && rm trexio-${VERSION}.tar.gz cd trexio-${VERSION} ./configure --prefix=\${QP_ROOT} --without-hdf5 make -j 8 && make -j 8 check && make -j 8 install @@ -224,7 +224,7 @@ EOF execute << EOF cd "\${QP_ROOT}"/external wget https://github.com/TREX-CoE/trexio/releases/download/v${VERSION}/trexio-${VERSION}.tar.gz - tar -zxf trexio-${VERSION}.tar.gz + tar -zxf trexio-${VERSION}.tar.gz && rm trexio-${VERSION}.tar.gz cd trexio-${VERSION} ./configure --prefix=\${QP_ROOT} make -j 8 && make -j 8 check && make -j 8 install @@ -235,7 +235,7 @@ EOF execute << EOF cd "\${QP_ROOT}"/external wget https://github.com/TREX-CoE/qmckl/releases/download/v${VERSION}/qmckl-${VERSION}.tar.gz - tar -zxf qmckl-${VERSION}.tar.gz + tar -zxf qmckl-${VERSION}.tar.gz && rm qmckl-${VERSION}.tar.gz cd qmckl-${VERSION} ./configure --prefix=\${QP_ROOT} --enable-hpc --disable-doc make && make -j 4 check && make install From 1e12eba42b0649dc2f7c921516745c3a861bfbc3 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 19 Sep 2023 14:43:42 +0200 Subject: [PATCH 302/337] Changed how libraries are handled --- data/pseudo/def2 | 920 ++++++++++++++++++++++++++++ scripts/compilation/qp_create_ninja | 5 +- src/trexio/LIB | 1 + src/zmq/LIB | 1 + 4 files changed, 924 insertions(+), 3 deletions(-) create mode 100644 data/pseudo/def2 create mode 100644 src/trexio/LIB create mode 100644 src/zmq/LIB diff --git a/data/pseudo/def2 b/data/pseudo/def2 new file mode 100644 index 00000000..4278e77b --- /dev/null +++ b/data/pseudo/def2 @@ -0,0 +1,920 @@ +$ECP +RB-ECP GEN 28 3 +1 ----- f-ul potential ----- + -12.3169000 2 3.8431140 +3 ----- s-f potential ----- + 89.5001980 2 5.0365510 + 0.4937610 2 1.9708490 + 12.3169000 2 3.8431140 +3 ----- p-f potential ----- + 58.5689740 2 4.2583410 + 0.4317910 2 1.4707090 + 12.3169000 2 3.8431140 +3 ----- d-f potential ----- + 26.2248980 2 3.0231270 + 0.9628390 2 0.6503830 + 12.3169000 2 3.8431140 +SR-ECP GEN 28 3 +1 ----- f-ul potential ----- + -15.8059920 2 4.6339750 +3 ----- s-f potential ----- + 135.4794300 2 7.4000740 + 17.5344630 2 3.6063790 + 15.8059920 2 4.6339750 +3 ----- p-f potential ----- + 88.3597090 2 6.4848680 + 15.3943720 2 3.2880530 + 15.8059920 2 4.6339750 +3 ----- d-f potential ----- + 29.8889870 2 4.6228410 + 6.6594140 2 2.2469040 + 15.8059920 2 4.6339750 +Y-ECP GEN 28 3 +2 ----- f-ul potential ----- + -19.12219811 2 6.5842120 + -2.43637543 2 3.2921060 +4 ----- s-f potential ----- + 135.15384412 2 7.4880494 + 15.55244130 2 3.7440247 + 19.12219811 2 6.5842120 + 2.43637543 2 3.2921060 +4 ----- p-f potential ----- + 87.78499167 2 6.4453772 + 11.56406599 2 3.2226886 + 19.12219811 2 6.5842120 + 2.43637543 2 3.2921060 +4 ----- d-f potential ----- + 29.70100072 2 4.6584472 + 5.53996847 2 2.3292236 + 19.12219811 2 6.5842120 + 2.43637543 2 3.2921060 +ZR-ECP GEN 28 3 +2 ----- f-ul potential ----- + -21.09377605 2 7.5400000 + -3.08069427 2 3.7700000 +4 ----- s-f potential ----- + 150.26759106 2 8.2000000 + 18.97621650 2 4.0897278 + 21.09377605 2 7.5400000 + 3.08069427 2 3.7700000 +4 ----- p-f potential ----- + 99.62212372 2 7.1100000 + 14.16873329 2 3.5967980 + 21.09377605 2 7.5400000 + 3.08069427 2 3.7700000 +4 ----- d-f potential ----- + 35.04512355 2 5.3500000 + 6.11125948 2 2.4918215 + 21.09377605 2 7.5400000 + 3.08069427 2 3.7700000 +NB-ECP GEN 28 3 +2 ----- f-ul potential ----- + -22.92954996 2 8.4900000 + -3.66630986 2 4.2500000 +4 ----- s-f potential ----- + 165.17914349 2 8.9000000 + 21.99297437 2 4.4300000 + 22.92954996 2 8.4900000 + 3.66630986 2 4.2500000 +4 ----- p-f potential ----- + 111.79441445 2 7.7700000 + 16.63348326 2 3.9600000 + 22.92954996 2 8.4900000 + 3.66630986 2 4.2500000 +4 ----- d-f potential ----- + 38.11224880 2 6.0500000 + 8.03916727 2 2.8400000 + 22.92954996 2 8.4900000 + 3.66630986 2 4.2500000 +MO-ECP GEN 28 3 +2 ----- f-ul potential ----- + -24.80517707 2 9.4500000 + -4.15378155 2 4.7200000 +4 ----- s-f potential ----- + 180.10310850 2 9.7145938 + 24.99722791 2 4.6805004 + 24.80517707 2 9.4500000 + 4.15378155 2 4.7200000 +4 ----- p-f potential ----- + 123.77275231 2 8.1421366 + 19.53022800 2 4.6259863 + 24.80517707 2 9.4500000 + 4.15378155 2 4.7200000 +4 ----- d-f potential ----- + 48.37502229 2 6.6184148 + 8.89205274 2 3.2487516 + 24.80517707 2 9.4500000 + 4.15378155 2 4.7200000 +TC-ECP GEN 28 3 +2 ----- f-ul potential ----- + -26.56244747 2 10.4000000 + -4.58568054 2 5.2000000 +4 ----- s-f potential ----- + 195.15916591 2 10.4223462 + 28.09260333 2 5.0365160 + 26.56244747 2 10.4000000 + 4.58568054 2 5.2000000 +4 ----- p-f potential ----- + 135.28456622 2 8.9504494 + 21.80650430 2 4.8544394 + 26.56244747 2 10.4000000 + 4.58568054 2 5.2000000 +4 ----- d-f potential ----- + 54.32972942 2 6.9456968 + 11.15506795 2 3.9705849 + 26.56244747 2 10.4000000 + 4.58568054 2 5.2000000 +RU-ECP GEN 28 3 +2 ----- f-ul potential ----- + -28.34061627 2 11.3600000 + -4.94462923 2 5.6800000 +4 ----- s-f potential ----- + 209.82297122 2 11.1052693 + 30.65472642 2 5.4147454 + 28.34061627 2 11.3600000 + 4.94462923 2 5.6800000 +4 ----- p-f potential ----- + 146.33618228 2 9.7712707 + 24.12787723 2 5.0739908 + 28.34061627 2 11.3600000 + 4.94462923 2 5.6800000 +4 ----- d-f potential ----- + 67.51589667 2 7.6714231 + 9.87010415 2 4.1365647 + 28.34061627 2 11.3600000 + 4.94462923 2 5.6800000 +RH-ECP GEN 28 3 +2 ----- f-ul potential ----- + -30.09345572 2 12.3100000 + -5.21848192 2 6.1600000 +4 ----- s-f potential ----- + 225.34775353 2 11.7200000 + 32.82318898 2 5.8200000 + 30.09345572 2 12.3100000 + 5.21848192 2 6.1600000 +4 ----- p-f potential ----- + 158.70941159 2 10.4200000 + 26.44410049 2 5.4500000 + 30.09345572 2 12.3100000 + 5.21848192 2 6.1600000 +4 ----- d-f potential ----- + 62.75862572 2 8.8200000 + 10.97871947 2 3.8700000 + 30.09345572 2 12.3100000 + 5.21848192 2 6.1600000 +PD-ECP GEN 28 3 +2 ----- f-ul potential ----- + -31.92955431 2 13.2700000 + -5.39821694 2 6.6300000 +4 ----- s-f potential ----- + 240.22904033 2 12.4300000 + 35.17194347 2 6.1707594 + 31.92955431 2 13.2700000 + 5.39821694 2 6.6300000 +4 ----- p-f potential ----- + 170.41727605 2 11.0800000 + 28.47213287 2 5.8295541 + 31.92955431 2 13.2700000 + 5.39821694 2 6.6300000 +4 ----- d-f potential ----- + 69.01384488 2 9.5100000 + 11.75086158 2 4.1397811 + 31.92955431 2 13.2700000 + 5.39821694 2 6.6300000 +AG-ECP GEN 28 3 +2 ----- f-ul potential ----- + -33.68992012 2 14.2200000 + -5.53112021 2 7.1100000 +4 ----- s-f potential ----- + 255.13936452 2 13.1300000 + 36.86612154 2 6.5100000 + 33.68992012 2 14.2200000 + 5.53112021 2 7.1100000 +4 ----- p-f potential ----- + 182.18186871 2 11.7400000 + 30.35775148 2 6.2000000 + 33.68992012 2 14.2200000 + 5.53112021 2 7.1100000 +4 ----- d-f potential ----- + 73.71926087 2 10.2100000 + 12.50211712 2 4.3800000 + 33.68992012 2 14.2200000 + 5.53112021 2 7.1100000 +CD-ECP GEN 28 3 +2 ----- f-ul potential ----- + -35.47662555 2 15.1847957 + -5.61767685 2 7.5923978 +4 ----- s-f potential ----- + 270.00948324 2 13.8358689 + 38.76730798 2 6.8572704 + 35.47662555 2 15.1847957 + 5.61767685 2 7.5923978 +4 ----- p-f potential ----- + 193.82962939 2 12.4049710 + 31.89652523 2 6.5677995 + 35.47662555 2 15.1847957 + 5.61767685 2 7.5923978 +4 ----- d-f potential ----- + 79.19364700 2 10.8969253 + 13.23082674 2 4.6411649 + 35.47662555 2 15.1847957 + 5.61767685 2 7.5923978 +IN-ECP GEN 28 3 +2 ----- f-ul potential ----- + -13.72807800 2 12.53905600 + -18.20686600 2 12.55256100 +4 ----- s-f potential ----- + 281.12235000 2 15.39282200 + 61.90147000 2 8.05586400 + 13.72807800 2 12.53905600 + 18.20686600 2 12.55256100 +6 ----- p-f potential ----- + 67.46215400 2 13.92867200 + 134.94925000 2 13.34723400 + 14.74614000 2 7.61413200 + 29.63926200 2 7.31836500 + 13.72807800 2 12.53905600 + 18.20686600 2 12.55256100 +6 ----- d-f potential ----- + 35.49325400 2 14.03471500 + 53.17877300 2 14.51161600 + 9.17728100 2 5.55055000 + 12.39241000 2 5.05941500 + 13.72807800 2 12.53905600 + 18.20686600 2 12.55256100 +SN-ECP GEN 28 3 +2 ----- f-ul potential ----- + -12.57633300 2 12.28234800 + -16.59594400 2 12.27215000 +4 ----- s-f potential ----- + 279.98868200 2 17.42041400 + 62.37781000 2 7.63115500 + 12.57633300 2 12.28234800 + 16.59594400 2 12.27215000 +6 ----- p-f potential ----- + 66.16252300 2 16.13102400 + 132.17439600 2 15.62807700 + 16.33941700 2 7.32560800 + 32.48895900 2 6.94251900 + 12.57633300 2 12.28234800 + 16.59594400 2 12.27215000 +6 ----- d-f potential ----- + 36.38744100 2 15.51497600 + 54.50784100 2 15.18816000 + 8.69682300 2 5.45602400 + 12.84020800 2 5.36310500 + 12.57633300 2 12.28234800 + 16.59594400 2 12.27215000 +SB-ECP GEN 28 3 +2 ----- f-ul potential ----- + -15.36680100 2 14.44497800 + -20.29613800 2 14.44929500 +4 ----- s-f potential ----- + 281.07158100 2 16.33086500 + 61.71660400 2 8.55654200 + 15.36680100 2 14.44497800 + 20.29613800 2 14.44929500 +6 ----- p-f potential ----- + 67.45738000 2 14.47033700 + 134.93350300 2 13.81619400 + 14.71634400 2 8.42492400 + 29.51851200 2 8.09272800 + 15.36680100 2 14.44497800 + 20.29613800 2 14.44929500 +6 ----- d-f potential ----- + 35.44781500 2 14.88633100 + 53.14346600 2 15.14631900 + 9.17922300 2 5.90826700 + 13.24025300 2 5.59432200 + 15.36680100 2 14.44497800 + 20.29613800 2 14.44929500 +TE-ECP GEN 28 3 +2 ----- f-ul potential ----- + -15.74545000 2 15.20616800 + -20.74244800 2 15.20170200 +4 ----- s-f potential ----- + 281.04584300 2 16.81447300 + 61.62065600 2 8.79352600 + 15.74545000 2 15.20616800 + 20.74244800 2 15.20170200 +6 ----- p-f potential ----- + 67.44946400 2 14.87780100 + 134.90430400 2 14.26973100 + 14.68954700 2 8.72443500 + 29.41506300 2 8.29151500 + 15.74545000 2 15.20616800 + 20.74244800 2 15.20170200 +6 ----- d-f potential ----- + 35.43205700 2 15.20500800 + 53.13568700 2 15.22584800 + 9.06980200 2 6.07176900 + 13.12230400 2 5.80476000 + 15.74545000 2 15.20616800 + 20.74244800 2 15.20170200 +I-ECP GEN 28 3 +4 ----- f-ul potential ----- + -21.84204000 2 19.45860900 + -28.46819100 2 19.34926000 + -0.24371300 2 4.82376700 + -0.32080400 2 4.88431500 +7 ----- s-f potential ----- + 49.99429300 2 40.01583500 + 281.02531700 2 17.42974700 + 61.57332600 2 9.00548400 + 21.84204000 2 19.45860900 + 28.46819100 2 19.34926000 + 0.24371300 2 4.82376700 + 0.32080400 2 4.88431500 +8 ----- p-f potential ----- + 67.44284100 2 15.35546600 + 134.88113700 2 14.97183300 + 14.67505100 2 8.96016400 + 29.37566600 2 8.25909600 + 21.84204000 2 19.45860900 + 28.46819100 2 19.34926000 + 0.24371300 2 4.82376700 + 0.32080400 2 4.88431500 +10 ----- d-f potential ----- + 35.43952900 2 15.06890800 + 53.17605700 2 14.55532200 + 9.06719500 2 6.71864700 + 13.20693700 2 6.45639300 + 0.08933500 2 1.19177900 + 0.05238000 2 1.29115700 + 21.84204000 2 19.45860900 + 28.46819100 2 19.34926000 + 0.24371300 2 4.82376700 + 0.32080400 2 4.88431500 +XE-ECP GEN 28 3 +4 ----- f-ul potential ----- + -23.08929500 2 20.88155700 + -30.07447500 2 20.78344300 + -0.28822700 2 5.25338900 + -0.38692400 2 5.36118800 +7 ----- s-f potential ----- + 49.99796200 2 40.00518400 + 281.01330300 2 17.81221400 + 61.53825500 2 9.30415000 + 23.08929500 2 20.88155700 + 30.07447500 2 20.78344300 + 0.28822700 2 5.25338900 + 0.38692400 2 5.36118800 +8 ----- p-f potential ----- + 67.43914200 2 15.70177200 + 134.87471100 2 15.25860800 + 14.66330000 2 9.29218400 + 29.35473000 2 8.55900300 + 23.08929500 2 20.88155700 + 30.07447500 2 20.78344300 + 0.28822700 2 5.25338900 + 0.38692400 2 5.36118800 +10 ----- d-f potential ----- + 35.43690800 2 15.18560000 + 53.19577200 2 14.28450000 + 9.04623200 2 7.12188900 + 13.22368100 2 6.99196300 + 0.08485300 2 0.62394600 + 0.04415500 2 0.64728400 + 23.08929500 2 20.88155700 + 30.07447500 2 20.78344300 + 0.28822700 2 5.25338900 + 0.38692400 2 5.36118800 +CS-ECP GEN 46 3 +1 ----- f-ul potential ----- + -28.8843090 2 3.1232690 +3 ----- s-f potential ----- + 84.5477300 2 4.0797500 + 16.6541730 2 2.4174060 + 28.8843090 2 3.1232690 +3 ----- p-f potential ----- + 157.0490590 2 5.5140800 + 26.4233070 2 2.1603160 + 28.8843090 2 3.1232690 +3 ----- d-f potential ----- + 13.1727530 2 1.8074100 + 3.3428330 2 0.8581820 + 28.8843090 2 3.1232690 +BA-ECP GEN 46 3 +1 ----- f-ul potential ----- + -33.4731740 2 3.5894650 +3 ----- s-f potential ----- + 427.8458160 2 9.5269860 + 204.4175300 2 4.4875100 + 33.4731740 2 3.5894650 +3 ----- p-f potential ----- + 293.6058640 2 8.3159300 + 294.1933160 2 4.2922170 + 33.4731740 2 3.5894650 +3 ----- d-f potential ----- + 112.5504020 2 5.9161080 + 181.7826210 2 2.8748420 + 33.4731740 2 3.5894650 +LA-ECP GEN 46 3 +1 ----- f-ul potential ----- + -36.0100160 2 4.0286000 +3 ----- s-f potential ----- + 91.9321770 2 3.3099000 + -3.7887640 2 1.6550000 + 36.0100160 2 4.0286000 +3 ----- p-f potential ----- + 63.7594860 2 2.8368000 + -0.6479580 2 1.4184000 + 36.0100160 2 4.0286000 +3 ----- d-f potential ----- + 36.1161730 2 2.0213000 + 0.2191140 2 1.0107000 + 36.0100160 2 4.0286000 +CE-ECP GEN 28 5 +1 ----- h-ul potential ----- + 0.00000000 2 1.00000000 +1 ----- s-h potential ----- + 580.08345700 2 20.13782900 +1 ----- p-h potential ----- + 310.30283300 2 15.99848200 +1 ----- d-h potential ----- + 167.81394400 2 14.97418700 +1 ----- f-h potential ----- + -49.39022900 2 23.40245500 +1 ----- g-h potential ----- + -21.33187900 2 16.57055300 +PR-ECP GEN 28 5 +1 ----- h-ul potential ----- + 0.00000000 2 1.00000000 +1 ----- s-h potential ----- + 577.57312200 2 20.76627800 +1 ----- p-h potential ----- + 295.78584600 2 16.07844800 +1 ----- d-h potential ----- + 150.86705500 2 14.70508900 +1 ----- f-h potential ----- + -48.73676600 2 23.37896900 +1 ----- g-h potential ----- + -22.32948800 2 17.44713800 +ND-ECP GEN 28 5 +1 ----- h-ul potential ----- + 0.00000000 2 1.00000000 +1 ----- s-h potential ----- + 574.37098000 2 21.35226700 +1 ----- p-h potential ----- + 280.94644000 2 16.11926500 +1 ----- d-h potential ----- + 138.67062700 2 14.49410300 +1 ----- f-h potential ----- + -47.52266800 2 23.18386000 +1 ----- g-h potential ----- + -23.34458700 2 18.34417400 +PM-ECP GEN 28 5 +1 ----- h-ul potential ----- + 0.00000000 2 1.00000000 +1 ----- s-h potential ----- + 575.39574900 2 21.94286500 +1 ----- p-h potential ----- + 281.70451400 2 16.55516100 +1 ----- d-h potential ----- + 123.52473700 2 13.96030800 +1 ----- f-h potential ----- + -50.74151100 2 24.03354600 +1 ----- g-h potential ----- + -24.37251000 2 19.26024500 +SM-ECP GEN 28 5 +1 ----- h-ul potential ----- + 0.00000000 2 1.00000000 +1 ----- s-h potential ----- + 572.98533200 2 22.34447100 +1 ----- p-h potential ----- + 272.35914500 2 16.69459000 +1 ----- d-h potential ----- + 115.29390000 2 13.72770500 +1 ----- f-h potential ----- + -51.10839200 2 24.05909200 +1 ----- g-h potential ----- + -25.42188500 2 20.19724900 +EU-ECP GEN 28 5 +1 ----- h-ul potential ----- + 0.00000000 2 1.00000000 +1 ----- s-h potential ----- + 607.65933100 2 23.47138400 +1 ----- p-h potential ----- + 264.38547600 2 16.77247900 +1 ----- d-h potential ----- + 115.38137500 2 13.98134300 +1 ----- f-h potential ----- + -49.40079400 2 23.96288800 +1 ----- g-h potential ----- + -26.74827300 2 21.23245800 +GD-ECP GEN 28 5 +1 ----- h-ul potential ----- + 0.00000000 2 1.00000000 +1 ----- s-h potential ----- + 637.20086900 2 24.60215100 +1 ----- p-h potential ----- + 261.68960100 2 16.88925000 +1 ----- d-h potential ----- + 106.85653300 2 13.64335800 +1 ----- f-h potential ----- + -50.68359000 2 24.12691700 +1 ----- g-h potential ----- + -27.57963000 2 22.13188700 +TB-ECP GEN 28 5 +1 ----- h-ul potential ----- + 0.00000000 2 1.00000000 +1 ----- s-h potential ----- + 668.59715500 2 24.95295600 +1 ----- p-h potential ----- + 266.98047500 2 17.61089900 +1 ----- d-h potential ----- + 97.50659600 2 12.97600900 +1 ----- f-h potential ----- + -52.17575700 2 24.24886900 +1 ----- g-h potential ----- + -28.69426800 2 23.13067200 +DY-ECP GEN 28 5 +1 ----- h-ul potential ----- + 0.00000000 2 1.00000000 +1 ----- s-h potential ----- + 705.67122100 2 26.42958600 +1 ----- p-h potential ----- + 254.86698900 2 17.31703400 +1 ----- d-h potential ----- + 95.04518700 2 12.91359900 +1 ----- f-h potential ----- + -54.57409300 2 24.90787800 +1 ----- g-h potential ----- + -29.82827700 2 24.14875300 +HO-ECP GEN 28 5 +1 ----- h-ul potential ----- + 0.00000000 2 1.00000000 +1 ----- s-h potential ----- + 755.70313600 2 28.39725700 +1 ----- p-h potential ----- + 253.55199800 2 17.43863300 +1 ----- d-h potential ----- + 89.63567700 2 12.43421200 +1 ----- f-h potential ----- + -55.48203600 2 25.38701000 +1 ----- g-h potential ----- + -30.99112500 2 25.18850100 +ER-ECP GEN 28 5 +1 ----- h-ul potential ----- + 0.00000000 2 1.00000000 +1 ----- s-h potential ----- + 800.95287600 2 29.79859200 +1 ----- p-h potential ----- + 262.01986900 2 18.11423700 +1 ----- d-h potential ----- + 80.17055200 2 11.36958700 +1 ----- f-h potential ----- + -42.33628500 2 21.82123300 +1 ----- g-h potential ----- + -32.18527800 2 26.25073500 +TM-ECP GEN 28 5 +1 ----- h-ul potential ----- + 0.00000000 2 1.00000000 +1 ----- s-h potential ----- + 845.51074300 2 31.14412200 +1 ----- p-h potential ----- + 258.58523900 2 18.09235300 +1 ----- d-h potential ----- + 80.72905900 2 11.46915900 +1 ----- f-h potential ----- + -48.70126600 2 23.60554400 +1 ----- g-h potential ----- + -33.39549600 2 27.32978100 +YB-ECP GEN 28 5 +1 ----- h-ul potential ----- + 0.00000000 2 1.00000000 +1 ----- s-h potential ----- + 891.01377700 2 32.42448400 +1 ----- p-h potential ----- + 264.03695300 2 18.65623200 +1 ----- d-h potential ----- + 73.92391900 2 10.49022200 +1 ----- f-h potential ----- + -39.59217300 2 20.77418300 +1 ----- g-h potential ----- + -34.63863800 2 28.43102800 +LU-ECP GEN 28 5 +1 ----- h-ul potential ----- + 0.00000000 2 1.00000000 +1 ----- s-h potential ----- + 989.99558400 2 35.16209700 +1 ----- p-h potential ----- + 278.86565200 2 19.46440200 +1 ----- d-h potential ----- + 71.00917800 2 10.00686500 +1 ----- f-h potential ----- + -47.40589000 2 23.51793200 +1 ----- g-h potential ----- + -35.55714600 2 29.41223800 +HF-ECP GEN 60 3 +1 ----- f-ul potential ----- + 10.04672251 2 1.78576984 +3 ----- s-f potential ----- + 1499.28471073 2 14.76995900 + 40.28210136 2 7.38497940 + -10.04672251 2 1.78576984 +3 ----- p-f potential ----- + 397.73300533 2 9.84948950 + 19.31640586 2 4.92474450 + -10.04672251 2 1.78576984 +3 ----- d-f potential ----- + 101.32980526 2 6.09675640 + 5.87343821 2 3.04837820 + -10.04672251 2 1.78576984 +TA-ECP GEN 60 3 +1 ----- f-ul potential ----- + 12.01796094 2 2.01788111 +3 ----- s-f potential ----- + 1345.88064703 2 14.54640770 + 36.76680620 2 7.27320380 + -12.01796094 2 2.01788111 +3 ----- p-f potential ----- + 378.42530145 2 9.93556529 + 22.29309086 2 4.96778243 + -12.01796094 2 2.01788111 +3 ----- d-f potential ----- + 104.88395571 2 6.34737691 + 8.75584805 2 3.17368846 + -12.01796094 2 2.01788111 +W-ECP GEN 60 3 +1 ----- f-ul potential ----- + 14.15257947 2 2.25888846 +3 ----- s-f potential ----- + 1192.39588226 2 14.32285640 + 32.52293315 2 7.16142810 + -14.15257947 2 2.25888846 +3 ----- p-f potential ----- + 359.03196711 2 10.02164110 + 24.03038019 2 5.01082040 + -14.15257947 2 2.25888846 +3 ----- d-f potential ----- + 108.30134897 2 6.59799743 + 10.98252827 2 3.29899871 + -14.15257947 2 2.25888846 +RE-ECP GEN 60 3 +1 ----- f-ul potential ----- + 16.44985227 2 2.50865059 +3 ----- s-f potential ----- + 1038.95157226 2 14.09930510 + 29.56173830 2 7.04965250 + -16.44985227 2 2.50865059 +3 ----- p-f potential ----- + 339.54350965 2 10.10771690 + 24.91369646 2 5.05385830 + -16.44985227 2 2.50865059 +3 ----- d-f potential ----- + 111.69965275 2 6.84861794 + 12.62432927 2 3.42430897 + -16.44985227 2 2.50865059 +OS-ECP GEN 60 3 +1 ----- f-ul potential ----- + 18.90945701 2 2.76707510 +3 ----- s-f potential ----- + 885.40571914 2 13.87575390 + 25.96704014 2 6.93787690 + -18.90945701 2 2.76707510 +3 ----- p-f potential ----- + 320.08390185 2 10.19379260 + 26.14876493 2 5.09689620 + -18.90945701 2 2.76707510 +3 ----- d-f potential ----- + 115.04484313 2 7.09923846 + 13.62257457 2 3.54961923 + -18.90945701 2 2.76707510 +IR-ECP GEN 60 3 +1 ----- f-ul potential ----- + 21.53103107 2 3.03407192 +3 ----- s-f potential ----- + 732.26919978 2 13.65220260 + 26.48472087 2 6.82610130 + -21.53103107 2 3.03407192 +3 ----- p-f potential ----- + 299.48947357 2 10.27986840 + 26.46623354 2 5.13993410 + -21.53103107 2 3.03407192 +3 ----- d-f potential ----- + 124.45759451 2 7.34985897 + 14.03599518 2 3.67492949 + -21.53103107 2 3.03407192 +PT-ECP GEN 60 3 +1 ----- f-ul potential ----- + 24.31437573 2 3.30956857 +3 ----- s-f potential ----- + 579.22386092 2 13.42865130 + 29.66949062 2 6.71432560 + -24.31437573 2 3.30956857 +3 ----- p-f potential ----- + 280.86077422 2 10.36594420 + 26.74538204 2 5.18297210 + -24.31437573 2 3.30956857 +3 ----- d-f potential ----- + 120.39644429 2 7.60047949 + 15.81092058 2 3.80023974 + -24.31437573 2 3.30956857 +AU-ECP GEN 60 3 +2 ----- f-ul potential ----- + 30.49008890 2 4.78982000 + 5.17107381 2 2.39491000 +4 ----- s-f potential ----- + 426.84667920 2 13.20510000 + 37.00708285 2 6.60255000 + -30.49008890 2 4.78982000 + -5.17107381 2 2.39491000 +4 ----- p-f potential ----- + 261.19958038 2 10.45202000 + 26.96249604 2 5.22601000 + -30.49008890 2 4.78982000 + -5.17107381 2 2.39491000 +4 ----- d-f potential ----- + 124.79066561 2 7.85110000 + 16.30072573 2 3.92555000 + -30.49008890 2 4.78982000 + -5.17107381 2 2.39491000 +HG-ECP GEN 60 3 +1 ----- f-ul potential ----- + 30.36499643 2 3.88579112 +3 ----- s-f potential ----- + 275.73721174 2 12.98154870 + 49.08921249 2 6.49077440 + -30.36499643 2 3.88579112 +3 ----- p-f potential ----- + 241.54007398 2 10.53809580 + 27.39659081 2 5.26904790 + -30.36499643 2 3.88579112 +3 ----- d-f potential ----- + 127.86700761 2 8.10172051 + 16.60831151 2 4.05086026 + -30.36499643 2 3.88579112 +TL-ECP GEN 60 3 +4 ----- f-ul potential ----- + 15.82548800 2 5.62639900 + 21.10402100 2 5.54895200 + 2.91512700 2 2.87494600 + 3.89690300 2 2.82145100 +6 ----- s-f potential ----- + 281.28466300 2 12.16780500 + 62.43425100 2 8.29490900 + -15.82548800 2 5.62639900 + -21.10402100 2 5.54895200 + -2.91512700 2 2.87494600 + -3.89690300 2 2.82145100 +8 ----- p-f potential ----- + 4.63340800 2 7.15149200 + 9.34175600 2 5.17286500 + 72.29925300 2 9.89107200 + 144.55803700 2 9.00339100 + -15.82548800 2 5.62639900 + -21.10402100 2 5.54895200 + -2.91512700 2 2.87494600 + -3.89690300 2 2.82145100 +8 ----- d-f potential ----- + 35.94303900 2 7.13021800 + 53.90959300 2 6.92690600 + 10.38193900 2 5.41757000 + 15.58382200 2 5.13868100 + -15.82548800 2 5.62639900 + -21.10402100 2 5.54895200 + -2.91512700 2 2.87494600 + -3.89690300 2 2.82145100 +PB-ECP GEN 60 3 +2 ----- f-ul potential ----- + 12.20989200 2 3.88751200 + 16.19029100 2 3.81196300 +4 ----- s-f potential ----- + 281.28549900 2 12.29630300 + 62.52021700 2 8.63263400 + -12.20989200 2 3.88751200 + -16.19029100 2 3.81196300 +6 ----- p-f potential ----- + 72.27689700 2 10.24179000 + 144.59108300 2 8.92417600 + 4.75869300 2 6.58134200 + 9.94062100 2 6.25540300 + -12.20989200 2 3.88751200 + -16.19029100 2 3.81196300 +6 ----- d-f potential ----- + 35.84850700 2 7.75433600 + 53.72434200 2 7.72028100 + 10.11525600 2 4.97026400 + 14.83373100 2 4.56378900 + -12.20989200 2 3.88751200 + -16.19029100 2 3.81196300 +BI-ECP GEN 60 3 +2 ----- f-ul potential ----- + 13.71338300 2 4.21454600 + 18.19430800 2 4.13340000 +4 ----- s-f potential ----- + 283.26422700 2 13.04309000 + 62.47195900 2 8.22168200 + -13.71338300 2 4.21454600 + -18.19430800 2 4.13340000 +6 ----- p-f potential ----- + 72.00149900 2 10.46777700 + 144.00227700 2 9.11890100 + 5.00794500 2 6.75479100 + 9.99155000 2 6.25259200 + -13.71338300 2 4.21454600 + -18.19430800 2 4.13340000 +6 ----- d-f potential ----- + 36.39625900 2 8.08147400 + 54.59766400 2 7.89059500 + 9.98429400 2 4.95555600 + 14.98148500 2 4.70455900 + -13.71338300 2 4.21454600 + -18.19430800 2 4.13340000 +PO-ECP GEN 60 3 +4 ----- f-ul potential ----- + 17.42829500 2 5.01327000 + 23.38035300 2 4.98464000 + 0.16339200 2 1.32676000 + 0.32456600 2 1.52875800 +6 ----- s-f potential ----- + 283.24470600 2 13.27722700 + 62.39646100 2 8.39951800 + -17.42829500 2 5.01327000 + -23.38035300 2 4.98464000 + -0.16339200 2 1.32676000 + -0.32456600 2 1.52875800 +8 ----- p-f potential ----- + 71.99171600 2 10.66568200 + 143.97187100 2 9.28375300 + 4.94961500 2 6.87274900 + 9.74049900 2 6.32615000 + -17.42829500 2 5.01327000 + -23.38035300 2 4.98464000 + -0.16339200 2 1.32676000 + -0.32456600 2 1.52875800 +8 ----- d-f potential ----- + 36.37838300 2 8.21486600 + 54.56271500 2 8.00869600 + 9.88949900 2 5.05522700 + 14.69387700 2 4.78255300 + -17.42829500 2 5.01327000 + -23.38035300 2 4.98464000 + -0.16339200 2 1.32676000 + -0.32456600 2 1.52875800 +AT-ECP GEN 60 3 +4 ----- f-ul potential ----- + 19.87019800 2 5.81216300 + 26.41645200 2 5.75371500 + 0.99497000 2 2.51347200 + 1.49070100 2 2.53626100 +7 ----- s-f potential ----- + 49.95715800 2 30.20083200 + 283.21037100 2 13.61230600 + 62.28105200 2 8.52934000 + -19.87019800 2 5.81216300 + -26.41645200 2 5.75371500 + -0.99497000 2 2.51347200 + -1.49070100 2 2.53626100 +8 ----- p-f potential ----- + 71.98237100 2 10.85406500 + 143.90353200 2 9.46822900 + 4.87175900 2 7.03111400 + 8.98305900 2 6.14385800 + -19.87019800 2 5.81216300 + -26.41645200 2 5.75371500 + -0.99497000 2 2.51347200 + -1.49070100 2 2.53626100 +8 ----- d-f potential ----- + 36.36323700 2 8.31351500 + 54.54897000 2 7.99896500 + 9.77628500 2 5.17996600 + 14.26475500 2 4.94222600 + -19.87019800 2 5.81216300 + -26.41645200 2 5.75371500 + -0.99497000 2 2.51347200 + -1.49070100 2 2.53626100 +RN-ECP GEN 60 3 +4 ----- f-ul potential ----- + 21.79729000 2 6.34857100 + 28.94680500 2 6.29594900 + 1.44736500 2 2.88211800 + 2.17796400 2 2.90804800 +7 ----- s-f potential ----- + 49.96555100 2 30.15124200 + 283.07000000 2 14.52124100 + 62.00287000 2 8.05203800 + -21.79729000 2 6.34857100 + -28.94680500 2 6.29594900 + -1.44736500 2 2.88211800 + -2.17796400 2 2.90804800 +8 ----- p-f potential ----- + 71.96911900 2 11.00994200 + 143.86055900 2 9.61762500 + 4.71476100 2 7.33600800 + 9.01306500 2 6.40625300 + -21.79729000 2 6.34857100 + -28.94680500 2 6.29594900 + -1.44736500 2 2.88211800 + -2.17796400 2 2.90804800 +8 ----- d-f potential ----- + 36.36836500 2 8.36922000 + 54.55176100 2 8.11697500 + 9.63448700 2 5.35365600 + 14.38790200 2 5.09721200 + -21.79729000 2 6.34857100 + -28.94680500 2 6.29594900 + -1.44736500 2 2.88211800 + -2.17796400 2 2.90804800 +$END diff --git a/scripts/compilation/qp_create_ninja b/scripts/compilation/qp_create_ninja index 606fd0f6..e67d896b 100755 --- a/scripts/compilation/qp_create_ninja +++ b/scripts/compilation/qp_create_ninja @@ -38,9 +38,8 @@ def comp_path(path): from qp_path import QP_ROOT, QP_SRC, QP_EZFIO -LIB = " -lz -ltrexio" +LIB = " -lz" EZFIO_LIB = join("$QP_ROOT", "lib", "libezfio_irp.a") -ZMQ_LIB = join("$QP_ROOT", "lib", "libf77zmq.a") + " " + join("$QP_ROOT", "lib", "libzmq.a") + " -lstdc++ -lrt -ldl" ROOT_BUILD_NINJA = join("$QP_ROOT", "config", "build.ninja") ROOT_BUILD_NINJA_EXP = join(QP_ROOT, "config", "build.ninja") ROOT_BUILD_NINJA_EXP_tmp = join(QP_ROOT, "config", "build.ninja.tmp") @@ -118,7 +117,7 @@ def ninja_create_env_variable(pwd_config_file): lib_lapack = get_compilation_option(pwd_config_file, "LAPACK_LIB") lib_usr = get_compilation_option(pwd_config_file, "LIB") - str_lib = " ".join([lib_lapack, EZFIO_LIB, ZMQ_LIB, LIB, lib_usr]) + str_lib = " ".join([lib_lapack, EZFIO_LIB, LIB, lib_usr]) # Read all LIB files in modules for directory in [real_join(QP_SRC, m) for m in sorted(os.listdir(QP_SRC))]: diff --git a/src/trexio/LIB b/src/trexio/LIB new file mode 100644 index 00000000..ccff168e --- /dev/null +++ b/src/trexio/LIB @@ -0,0 +1 @@ +-ltrexio diff --git a/src/zmq/LIB b/src/zmq/LIB new file mode 100644 index 00000000..ad8f4d2c --- /dev/null +++ b/src/zmq/LIB @@ -0,0 +1 @@ +-lf77zmq -lzmq From c44d624ceb7c4bd4ff1e1721092836a9b8f85739 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 19 Sep 2023 15:19:33 +0200 Subject: [PATCH 303/337] Added Jastrow parameters in EZFIO --- scripts/ezfio_interface/ei_handler.py | 4 ++ src/jastrow/EZFIO.cfg | 69 +++++++++++++++++++++++++++ src/jastrow/NEED | 2 + src/jastrow/README.md | 3 ++ src/non_h_ints_mu/NEED | 1 + src/non_h_ints_mu/qmckl.irp.f | 60 +++++------------------ src/nuclei/EZFIO.cfg | 7 +-- 7 files changed, 96 insertions(+), 50 deletions(-) create mode 100644 src/jastrow/EZFIO.cfg create mode 100644 src/jastrow/NEED create mode 100644 src/jastrow/README.md diff --git a/scripts/ezfio_interface/ei_handler.py b/scripts/ezfio_interface/ei_handler.py index fd514ace..3af43883 100755 --- a/scripts/ezfio_interface/ei_handler.py +++ b/scripts/ezfio_interface/ei_handler.py @@ -829,4 +829,8 @@ if __name__ == "__main__": # _| for (m, dict_ezfio_cfg) in l_dict_ezfio_cfg: + if dict_ezfio_cfg == {}: + print("Error: Empty EZFIO.cfg in ", arguments["--path_module"]) + sys.exit(-1) code_generation(arguments, dict_ezfio_cfg, m) + diff --git a/src/jastrow/EZFIO.cfg b/src/jastrow/EZFIO.cfg new file mode 100644 index 00000000..b41185a3 --- /dev/null +++ b/src/jastrow/EZFIO.cfg @@ -0,0 +1,69 @@ +[jast_type] +doc: Type of Jastrow [None| Mu | Qmckl] +type: character*(32) +interface: ezfio, provider, ocaml +default: None + +[jast_qmckl_type_nucl_num] +doc: Number of different nuclei types in QMCkl jastrow +type: integer +interface: ezfio, provider + +[jast_qmckl_type_nucl_vector] +doc: Nucleus type in QMCkl jastrow +type: integer +size: (nuclei.nucl_num) +interface: ezfio, provider + +[jast_qmckl_rescale_ee] +doc: Rescaling factor for electron-electron in QMCkl Jastrow +type: double precision +interface: ezfio, provider + +[jast_qmckl_rescale_en] +doc: Rescaling factor for electron-nucleus in QMCkl Jastrow +type: double precision +size: (jastrow.jast_qmckl_type_nucl_num) +interface: ezfio, provider + +[jast_qmckl_aord_num] +doc: Order of polynomials in e-n parameters of QMCkl jastrow +type: integer +interface: ezfio, provider + +[jast_qmckl_bord_num] +doc: Order of polynomials in e-e parameters of QMCkl jastrow +type: integer +interface: ezfio, provider + +[jast_qmckl_cord_num] +doc: Order of polynomials in e-e-n parameters of QMCkl jastrow +type: integer +interface: ezfio, provider + +[jast_qmckl_c_vector_size] +doc: Number of parameters for c_vector +type: integer +interface: ezfio, provider + +[jast_qmckl_a_vector] +doc: electron-nucleus parameters in QMCkl Jastrow +type: double precision +size: (jastrow.jast_qmckl_type_nucl_num*jastrow.jast_qmckl_aord_num+jastrow.jast_qmckl_type_nucl_num) +interface: ezfio, provider + +[jast_qmckl_b_vector] +doc: electron-electron parameters in QMCkl Jastrow +type: double precision +size: (jastrow.jast_qmckl_bord_num+1) +interface: ezfio, provider + +[jast_qmckl_c_vector] +doc: electron-electron-nucleus parameters in QMCkl Jastrow +type: double precision +size: (jastrow.jast_qmckl_c_vector_size) +interface: ezfio, provider + + + + diff --git a/src/jastrow/NEED b/src/jastrow/NEED new file mode 100644 index 00000000..f03c11fd --- /dev/null +++ b/src/jastrow/NEED @@ -0,0 +1,2 @@ +nuclei +electrons diff --git a/src/jastrow/README.md b/src/jastrow/README.md new file mode 100644 index 00000000..aefb6ad5 --- /dev/null +++ b/src/jastrow/README.md @@ -0,0 +1,3 @@ +# Jastrow + +Information relative to the Jastrow factor in trans-correlated calculations. diff --git a/src/non_h_ints_mu/NEED b/src/non_h_ints_mu/NEED index ecde6390..c44c65af 100644 --- a/src/non_h_ints_mu/NEED +++ b/src/non_h_ints_mu/NEED @@ -1,3 +1,4 @@ qmckl +jastrow ao_tc_eff_map bi_ortho_mos diff --git a/src/non_h_ints_mu/qmckl.irp.f b/src/non_h_ints_mu/qmckl.irp.f index d83de4dc..c9a9a55d 100644 --- a/src/non_h_ints_mu/qmckl.irp.f +++ b/src/non_h_ints_mu/qmckl.irp.f @@ -26,77 +26,43 @@ BEGIN_PROVIDER [ integer*8, qmckl_ctx_jastrow ] ! Jastrow parameters - rc = qmckl_set_jastrow_champ_type_nucl_num (qmckl_ctx_jastrow, 2_8) + rc = qmckl_set_jastrow_champ_type_nucl_num(qmckl_ctx_jastrow, 1_8*jast_qmckl_type_nucl_num) rc = qmckl_check(qmckl_ctx_jastrow, rc) if (rc /= QMCKL_SUCCESS) stop -1 - rc = qmckl_set_jastrow_champ_type_nucl_vector (qmckl_ctx_jastrow, (/0_8,1_8,1_8/), 1_8*nucl_num) + rc = qmckl_set_jastrow_champ_type_nucl_vector(qmckl_ctx_jastrow, 1_8*jast_qmckl_type_nucl_vector-1_8, 1_8*nucl_num) rc = qmckl_check(qmckl_ctx_jastrow, rc) if (rc /= QMCKL_SUCCESS) stop -1 - rc = qmckl_set_jastrow_champ_rescale_factor_ee (qmckl_ctx_jastrow, 0.6d0) + rc = qmckl_set_jastrow_champ_rescale_factor_ee(qmckl_ctx_jastrow, jast_qmckl_rescale_ee) rc = qmckl_check(qmckl_ctx_jastrow, rc) if (rc /= QMCKL_SUCCESS) stop -1 - rc = qmckl_set_jastrow_champ_rescale_factor_en (qmckl_ctx_jastrow, (/0.6d0, 0.6d0 /), 2_8 ) + rc = qmckl_set_jastrow_champ_rescale_factor_en(qmckl_ctx_jastrow, jast_qmckl_rescale_en, 1_8*jast_qmckl_type_nucl_num) rc = qmckl_check(qmckl_ctx_jastrow, rc) if (rc /= QMCKL_SUCCESS) stop -1 - rc = qmckl_set_jastrow_champ_aord_num (qmckl_ctx_jastrow, 5_8) + rc = qmckl_set_jastrow_champ_aord_num(qmckl_ctx_jastrow, jast_qmckl_aord_num*1_8) rc = qmckl_check(qmckl_ctx_jastrow, rc) if (rc /= QMCKL_SUCCESS) stop -1 - rc = qmckl_set_jastrow_champ_bord_num (qmckl_ctx_jastrow, 5_8) + rc = qmckl_set_jastrow_champ_a_vector(qmckl_ctx_jastrow, jast_qmckl_a_vector, 1_8*size(jast_qmckl_a_vector)) rc = qmckl_check(qmckl_ctx_jastrow, rc) if (rc /= QMCKL_SUCCESS) stop -1 - rc = qmckl_set_jastrow_champ_cord_num (qmckl_ctx_jastrow, 0_8) + rc = qmckl_set_jastrow_champ_bord_num(qmckl_ctx_jastrow, jast_qmckl_bord_num*1_8) rc = qmckl_check(qmckl_ctx_jastrow, rc) if (rc /= QMCKL_SUCCESS) stop -1 -! double precision :: a_vector(12) = dble(& -! (/ 0.00000000, 0.00000000, -0.71168405, -0.44415699, -0.13865109, 0.07002267 , & -! 0.00000000, 0.00000000, -0.11379992, 0.04542846, 0.01696997, -0.01809299 /) ) - -! double precision :: b_vector(6) = dble(& -! (/ 0.00000000, 0.65603311, 0.14581988, 0.03138163, 0.00153156, -0.00447302 /) ) - -! double precision :: c_vector(46) = & -! (/ 1.06384279d0, -1.44303973d0, -0.92409833d0, 0.11845356d0, -0.02980776d0, & -! 1.07048863d0, 0.06009623d0, -0.01854872d0, -0.00915398d0, 0.01324198d0, & -! -0.00504959d0, -0.01202497d0, -0.00531644d0, 0.15101629d0, -0.00723831d0, & -! -0.00384182d0, -0.00295036d0, -0.00114583d0, 0.00158107d0, -0.00078107d0, & -! -0.00080000d0, -0.14140576d0, -0.00237271d0, -0.03006706d0, 0.01537009d0, & -! -0.02327226d0, 0.16502789d0, -0.01458259d0, -0.09946065d0, 0.00850029d0, & -! -0.02969361d0, -0.01159547d0, 0.00516313d0, 0.00405247d0, -0.02200886d0, & -! 0.03376709d0, 0.01277767d0, -0.01523013d0, -0.00739224d0, -0.00463953d0, & -! 0.00003174d0, -0.01421128d0, 0.00808140d0, 0.00612988d0, -0.00610632d0, & -! 0.01926215d0 /) - -! a_vector = 0.d0 -! b_vector = 0.d0 -! c_vector = 0.d0 - - double precision :: a_vector(12) = dble(& - (/ 0.00000000 , 0.00000000, -0.45105821, -0.23519218, -0.03825391, 0.10072866, & - 0.00000000 , 0.00000000, -0.06930592, -0.02909224, -0.00134650, 0.01477242 /) ) - - double precision :: b_vector(6) = dble(& - (/ 0.00000000, 0.00000000, 0.29217862, -0.00450671, -0.02925982, -0.01381532 /) ) - - double precision :: c_vector(46) - c_vector = 0.d0 - - rc = qmckl_set_jastrow_champ_a_vector(qmckl_ctx_jastrow, a_vector, 12_8) + rc = qmckl_set_jastrow_champ_b_vector(qmckl_ctx_jastrow, jast_qmckl_b_vector, 1_8*size(jast_qmckl_b_vector)) rc = qmckl_check(qmckl_ctx_jastrow, rc) if (rc /= QMCKL_SUCCESS) stop -1 - rc = qmckl_set_jastrow_champ_b_vector(qmckl_ctx_jastrow, b_vector, 6_8) - rc = qmckl_check(qmckl_ctx_jastrow, rc) - if (rc /= QMCKL_SUCCESS) stop -1 -! rc = qmckl_set_jastrow_champ_c_vector(qmckl_ctx_jastrow, c_vector, 46_8) -! rc = qmckl_check(qmckl_ctx_jastrow, rc) -! if (rc /= QMCKL_SUCCESS) stop -1 + if (jast_qmckl_cord_num > 0) then + rc = qmckl_set_jastrow_champ_c_vector(qmckl_ctx_jastrow, jast_qmckl_c_vector, 1_8*jast_qmckl_c_vector_size) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + endif END_PROVIDER diff --git a/src/nuclei/EZFIO.cfg b/src/nuclei/EZFIO.cfg index 060eede6..20c63932 100644 --- a/src/nuclei/EZFIO.cfg +++ b/src/nuclei/EZFIO.cfg @@ -5,7 +5,7 @@ interface: ezfio, provider [nucl_label] doc: Nuclear labels -type: character*(32) +type: character*(32) size: (nuclei.nucl_num) interface: ezfio, provider @@ -17,7 +17,7 @@ interface: ezfio, provider [nucl_coord] doc: Nuclear coordinates in the format (:, {x,y,z}) -type: double precision +type: double precision size: (nuclei.nucl_num,3) interface: ezfio @@ -37,11 +37,12 @@ type: logical doc: If true, the calculation uses periodic boundary conditions interface: ezfio, provider, ocaml default: false + [n_pts_charge] type: integer doc: Number of point charges to be added to the potential interface: ezfio -default: 0 +default: 0 [pts_charge_z] type: double precision From 4f78610432cf6e14e44d8e8cb38e99ae01d0806a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 20 Sep 2023 15:16:10 +0200 Subject: [PATCH 304/337] Fix locks in openmp --- src/tc_bi_ortho/slater_tc_opt.irp.f | 31 +++++++++++++++-------------- src/tc_bi_ortho/tc_hmat.irp.f | 2 +- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f index f7c9b7b3..9901a853 100644 --- a/src/tc_bi_ortho/slater_tc_opt.irp.f +++ b/src/tc_bi_ortho/slater_tc_opt.irp.f @@ -4,10 +4,11 @@ subroutine provide_all_three_ints_bi_ortho() BEGIN_DOC - ! routine that provides all necessary three-electron integrals + ! routine that provides all necessary three-electron integrals END_DOC implicit none + PROVIDE ao_two_e_integrals_in_map if(three_body_h_tc) then @@ -17,14 +18,14 @@ subroutine provide_all_three_ints_bi_ortho() endif if(three_e_4_idx_term) then - PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort + PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort endif if(pure_three_body_h_tc)then provide three_body_ints_bi_ort endif if(.not. double_normal_ord .and. three_e_5_idx_term) then - PROVIDE three_e_5_idx_direct_bi_ort + PROVIDE three_e_5_idx_direct_bi_ort elseif(double_normal_ord .and. (.not. three_e_5_idx_term)) then PROVIDE normal_two_body_bi_orth endif @@ -44,9 +45,9 @@ subroutine htilde_mu_mat_opt_bi_ortho_tot(key_j, key_i, Nint, htot) ! ! where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis !! - ! Returns the total matrix element + ! Returns the total matrix element !! WARNING !! - ! + ! ! Non hermitian !! ! END_DOC @@ -69,9 +70,9 @@ subroutine htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, ! ! where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis !! - ! Returns the detail of the matrix element in terms of single, two and three electron contribution. + ! Returns the detail of the matrix element in terms of single, two and three electron contribution. !! WARNING !! - ! + ! ! Non hermitian !! ! END_DOC @@ -82,7 +83,7 @@ subroutine htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, integer, intent(in) :: Nint integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2) double precision, intent(out) :: hmono, htwoe, hthree, htot - integer :: degree + integer :: degree hmono = 0.d0 htwoe = 0.d0 @@ -94,7 +95,7 @@ subroutine htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, if(.not.pure_three_body_h_tc) then if(degree .gt. 2) return - + if(degree == 0) then call diag_htilde_mu_mat_fock_bi_ortho (Nint, key_i, hmono, htwoe, hthree, htot) else if (degree == 1) then @@ -103,7 +104,7 @@ subroutine htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, call double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot) endif - else + else if(degree .gt. 3) return @@ -122,7 +123,7 @@ subroutine htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, if(degree==0) then htot += nuclear_repulsion endif - + end ! --- @@ -133,9 +134,9 @@ subroutine htilde_mu_mat_opt_bi_ortho_no_3e(key_j, key_i, Nint, htot) ! ! where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis !! - ! Returns the detail of the matrix element WITHOUT ANY CONTRIBUTION FROM THE THREE ELECTRON TERMS + ! Returns the detail of the matrix element WITHOUT ANY CONTRIBUTION FROM THE THREE ELECTRON TERMS !! WARNING !! - ! + ! ! Non hermitian !! ! END_DOC @@ -146,7 +147,7 @@ subroutine htilde_mu_mat_opt_bi_ortho_no_3e(key_j, key_i, Nint, htot) integer, intent(in) :: Nint integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2) double precision, intent(out) :: htot - integer :: degree + integer :: degree htot = 0.d0 @@ -164,7 +165,7 @@ subroutine htilde_mu_mat_opt_bi_ortho_no_3e(key_j, key_i, Nint, htot) if(degree==0) then htot += nuclear_repulsion endif - + end ! --- diff --git a/src/tc_bi_ortho/tc_hmat.irp.f b/src/tc_bi_ortho/tc_hmat.irp.f index 5fb0a620..88652caa 100644 --- a/src/tc_bi_ortho/tc_hmat.irp.f +++ b/src/tc_bi_ortho/tc_hmat.irp.f @@ -15,7 +15,7 @@ BEGIN_PROVIDER [double precision, htilde_matrix_elmt_bi_ortho, (N_det,N_det)] integer :: i, j double precision :: htot - PROVIDE N_int + call provide_all_three_ints_bi_ortho i = 1 j = 1 From 11660aa7ba16f10a00ad4cf390c37ce27d6a1b9e Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 21 Sep 2023 15:24:51 +0200 Subject: [PATCH 305/337] updated dft_utils_func/on_top_from_ueg.irp.f --- src/dft_utils_func/on_top_from_ueg.irp.f | 48 ++++++++++++++++++++++++ src/tc_scf/rh_tcscf_diis.irp.f | 2 +- 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/src/dft_utils_func/on_top_from_ueg.irp.f b/src/dft_utils_func/on_top_from_ueg.irp.f index 5b964a03..4e28ad89 100644 --- a/src/dft_utils_func/on_top_from_ueg.irp.f +++ b/src/dft_utils_func/on_top_from_ueg.irp.f @@ -146,3 +146,51 @@ end end subroutine g0_dg0 + subroutine g0_dg0_d2g0(rho, rho_a, rho_b, g0, dg0drho, d2g0drho2) + + implicit none + BEGIN_DOC + ! Give the on-top pair distribution function g0 second derivative according to rho d2g0drho2 + END_DOC + + double precision, intent (in) :: rho, rho_a, rho_b + double precision, intent (out) :: g0, dg0drho, d2g0drho2 + double precision :: pi + double precision :: g0_UEG_mu_inf, dg0drs, d2g0drs2, d2rsdrho2 + double precision :: C1, F1, D1, E1, B1, rs + + pi = dacos(-1.d0) + C1 = 0.0819306d0 + F1 = 0.752411d0 + D1 = -0.0127713d0 + E1 = 0.00185898d0 + B1 = 0.7317d0 - F1 + if(dabs(rho).gt.1.d-20)then + rs = (3.d0 / (4.d0*pi*rho))**(1.d0/3.d0) + else + rs = (3.d0 / (4.d0*pi*1.d-20))**(1.d0/3.d0) + endif + + g0 = g0_UEG_mu_inf(rho_a, rho_b) + if(dabs(F1*rs).lt.50.d0)then + dg0drs = 0.5d0*((-B1 + 2.d0*C1*rs + 3.d0*D1*rs**2 + 4.d0*E1*rs**3)-F1*(1.d0 - B1*rs + C1*rs**2 + D1*rs**3 + E1*rs**4))*dexp(-F1*rs) + d2g0drs2 = 0.5d0*((2.d0*C1 + 6.d0*D1*rs + 12*E1*rs**2) - 2.d0*F1*(-B1 + 2.d0*C1*rs + 3.d0*D1*rs**2 + 4.d0*E1*rs**3)& + &+ (F1**2)*(1.d0 - B1*rs + C1*rs**2 + D1*rs**3 + E1*rs**4))*dexp(-F1*rs) + else + dg0drs = 0.d0 + d2g0drs2 = 0.d0 + endif + + if(dabs(rho).gt.1.d-20)then + dg0drho = -((6.d0*dsqrt(pi)*rho**2)**(-2.d0/3.d0))*dg0drs + d2rsdrho2 = -8.d0*dsqrt(pi)*rho*(6.d0*dsqrt(pi)*rho**2)**(-5.d0/3.d0) + d2g0drho2 = dg0drho*d2rsdrho2 -((6.d0*dsqrt(pi)*rho**2)**(-4.d0/3.d0))*d2g0drs2 + else + dg0drho = -((6.d0*dsqrt(pi)*1.d-40)**(-2.d0/3.d0))*dg0drs + d2rsdrho2 = -8.d0*dsqrt(pi)*(1.d-20)*(6.d0*dsqrt(pi)*1.d-40)**(-5.d0/3.d0) + d2g0drho2 = dg0drho*d2rsdrho2 - ((6.d0*dsqrt(pi)*1.d-40)**(-4.d0/3.d0))*d2g0drs2 + endif + + end subroutine g0_dg0 + + diff --git a/src/tc_scf/rh_tcscf_diis.irp.f b/src/tc_scf/rh_tcscf_diis.irp.f index 0504373c..c7f35451 100644 --- a/src/tc_scf/rh_tcscf_diis.irp.f +++ b/src/tc_scf/rh_tcscf_diis.irp.f @@ -245,7 +245,7 @@ subroutine rh_tcscf_diis() write(json_unit, json_real_fmt) ' delta Energy ', e_delta write(json_unit, json_real_fmt) ' DIIS error ', er_DIIS write(json_unit, json_real_fmt) ' level_shift ', level_shift_tcscf - write(json_unit, json_real_fmt) ' DIIS ', dim_DIIS + write(json_unit, json_int_fmtx) ' DIIS ', dim_DIIS write(json_unit, json_real_fmt) ' Wall time (min)', (t1-t0)/60.d0 call unlock_io From 2c6bdd9bb9b8789b5350b350eaeae9192cd2a2f0 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 22 Sep 2023 16:25:56 +0200 Subject: [PATCH 306/337] Update EZFIO, and qp_import_trexio for UHF --- external/ezfio | 2 +- scripts/qp_import_trexio.py | 30 ++++++++++++++++++++++-------- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/external/ezfio b/external/ezfio index d5805497..dba01c4f 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit d5805497fa0ef30e70e055cde1ecec2963303e93 +Subproject commit dba01c4fe0ff7b84c5ecfb1c7c77ec68781311b3 diff --git a/scripts/qp_import_trexio.py b/scripts/qp_import_trexio.py index 142411a6..e2e8fae2 100755 --- a/scripts/qp_import_trexio.py +++ b/scripts/qp_import_trexio.py @@ -133,7 +133,7 @@ def write_ezfio(trexio_filename, filename): try: basis_type = trexio.read_basis_type(trexio_file) - if basis_type.lower() in ["gaussian", "slater"]: + if basis_type.lower()[0] in ["g", "s"]: shell_num = trexio.read_basis_shell_num(trexio_file) prim_num = trexio.read_basis_prim_num(trexio_file) ang_mom = trexio.read_basis_shell_ang_mom(trexio_file) @@ -454,19 +454,33 @@ def write_ezfio(trexio_filename, filename): else: print("None") - print("Determinant\t\t...\t", end=' ') + print("Determinant\t...\t", end=' ') alpha = [ i for i in range(num_alpha) ] beta = [ i for i in range(num_beta) ] if trexio.has_mo_spin(trexio_file): spin = trexio.read_mo_spin(trexio_file) if max(spin) == 1: - beta = [ i for i in range(mo_num) if spin[i] == 1 ] + alpha = [ i for i in range(len(spin)) if spin[i] == 0 ] + alpha = [ alpha[i] for i in range(num_alpha) ] + beta = [ i for i in range(len(spin)) if spin[i] == 1 ] beta = [ beta[i] for i in range(num_beta) ] - - alpha = qp_bitmasks.BitMask(alpha) - beta = qp_bitmasks.BitMask(beta ) - print(alpha) - print(beta) + print("Warning -- UHF orbitals --", end=' ') + alpha_s = ['0']*mo_num + beta_s = ['0']*mo_num + for i in alpha: + alpha_s[i] = '1' + for i in beta: + beta_s[i] = '1' + alpha_s = ''.join(alpha_s)[::-1] + beta_s = ''.join(beta_s)[::-1] + alpha = [ int(i,2) for i in qp_bitmasks.string_to_bitmask(alpha_s) ][::-1] + beta = [ int(i,2) for i in qp_bitmasks.string_to_bitmask(beta_s ) ][::-1] + ezfio.set_determinants_bit_kind(8) + ezfio.set_determinants_n_int(1+mo_num//64) + ezfio.set_determinants_n_det(1) + ezfio.set_determinants_n_states(1) + ezfio.set_determinants_psi_det(alpha+beta) + ezfio.set_determinants_psi_coef([[1.0]]) print("OK") From 4cbe630ec5ecfa93881f7f968b3adc6488ac3888 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 22 Sep 2023 16:26:58 +0200 Subject: [PATCH 307/337] Add many call provide in TC --- configure | 6 +- src/bi_ort_ints/three_body_ijm.irp.f | 9 +- src/bi_ort_ints/three_body_ints_bi_ort.irp.f | 3 + src/non_h_ints_mu/jast_deriv.irp.f | 122 +++++++++++-------- src/non_h_ints_mu/qmckl.irp.f | 4 + src/tc_bi_ortho/tc_bi_ortho.irp.f | 11 ++ src/tc_scf/tc_petermann_factor.irp.f | 26 +++- 7 files changed, 118 insertions(+), 63 deletions(-) diff --git a/configure b/configure index f64e241f..1535948d 100755 --- a/configure +++ b/configure @@ -213,7 +213,7 @@ EOF wget https://github.com/TREX-CoE/trexio/releases/download/v${VERSION}/trexio-${VERSION}.tar.gz tar -zxf trexio-${VERSION}.tar.gz && rm trexio-${VERSION}.tar.gz cd trexio-${VERSION} - ./configure --prefix=\${QP_ROOT} --without-hdf5 + ./configure --prefix=\${QP_ROOT} --without-hdf5 CFLAGS='-g' make -j 8 && make -j 8 check && make -j 8 install tar -zxvf "\${QP_ROOT}"/external/qp2-dependencies/${ARCHITECTURE}/ninja.tar.gz mv ninja "\${QP_ROOT}"/bin/ @@ -226,7 +226,7 @@ EOF wget https://github.com/TREX-CoE/trexio/releases/download/v${VERSION}/trexio-${VERSION}.tar.gz tar -zxf trexio-${VERSION}.tar.gz && rm trexio-${VERSION}.tar.gz cd trexio-${VERSION} - ./configure --prefix=\${QP_ROOT} + ./configure --prefix=\${QP_ROOT} CFLAGS="-g" make -j 8 && make -j 8 check && make -j 8 install EOF elif [[ ${PACKAGE} = qmckl ]] ; then @@ -237,7 +237,7 @@ EOF wget https://github.com/TREX-CoE/qmckl/releases/download/v${VERSION}/qmckl-${VERSION}.tar.gz tar -zxf qmckl-${VERSION}.tar.gz && rm qmckl-${VERSION}.tar.gz cd qmckl-${VERSION} - ./configure --prefix=\${QP_ROOT} --enable-hpc --disable-doc + ./configure --prefix=\${QP_ROOT} --enable-hpc --disable-doc CFLAGS='-g' make && make -j 4 check && make install EOF diff --git a/src/bi_ort_ints/three_body_ijm.irp.f b/src/bi_ort_ints/three_body_ijm.irp.f index 5de33a76..cc1b6ea0 100644 --- a/src/bi_ort_ints/three_body_ijm.irp.f +++ b/src/bi_ort_ints/three_body_ijm.irp.f @@ -18,12 +18,13 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_direct_bi_ort, (mo_num, mo_num, double precision :: integral, wall1, wall0 PROVIDE mo_l_coef mo_r_coef + provide mos_r_in_r_array_transp mos_l_in_r_array_transp three_e_3_idx_direct_bi_ort = 0.d0 print *, ' Providing the three_e_3_idx_direct_bi_ort ...' call wall_time(wall0) - provide mos_r_in_r_array_transp mos_l_in_r_array_transp + call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, integral) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & @@ -79,6 +80,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_cycle_1_bi_ort, (mo_num, mo_num provide mos_r_in_r_array_transp mos_l_in_r_array_transp + call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, integral) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,m,integral) & @@ -135,6 +137,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_cycle_2_bi_ort, (mo_num, mo_num provide mos_r_in_r_array_transp mos_l_in_r_array_transp + call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, integral) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,m,integral) & @@ -191,6 +194,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_exch23_bi_ort, (mo_num, mo_num, provide mos_r_in_r_array_transp mos_l_in_r_array_transp + call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, integral) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,m,integral) & @@ -247,6 +251,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_exch13_bi_ort, (mo_num, mo_num, provide mos_r_in_r_array_transp mos_l_in_r_array_transp + call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, integral) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,m,integral) & @@ -303,6 +308,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_exch12_bi_ort, (mo_num, mo_num, provide mos_r_in_r_array_transp mos_l_in_r_array_transp + call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, integral) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,m,integral) & @@ -349,6 +355,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_exch12_bi_ort_new, (mo_num, mo_ provide mos_r_in_r_array_transp mos_l_in_r_array_transp + call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, integral) !$OMP PARALLEL & !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,m,integral) & diff --git a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f index c30b9f25..726e48ba 100644 --- a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f +++ b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f @@ -29,6 +29,9 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_n !provide x_W_ki_bi_ortho_erf_rk provide mos_r_in_r_array_transp mos_l_in_r_array_transp + provide int2_grad1_u12_ao_transp final_grid_points int2_grad1_u12_bimo_t + provide mo_l_coef mo_r_coef mos_l_in_r_array_transp mos_r_in_r_array_transp n_points_final_grid + !$OMP PARALLEL & !$OMP DEFAULT (NONE) & diff --git a/src/non_h_ints_mu/jast_deriv.irp.f b/src/non_h_ints_mu/jast_deriv.irp.f index 6b8445b1..7a4717f7 100644 --- a/src/non_h_ints_mu/jast_deriv.irp.f +++ b/src/non_h_ints_mu/jast_deriv.irp.f @@ -51,7 +51,7 @@ r1(2) = final_grid_points(2,ipoint) r1(3) = final_grid_points(3,ipoint) - do jpoint = 1, n_points_extra_final_grid ! r2 + do jpoint = 1, n_points_extra_final_grid ! r2 r2(1) = final_grid_points_extra(1,jpoint) r2(2) = final_grid_points_extra(2,jpoint) @@ -63,9 +63,9 @@ dy = grad1_u2b(2) dz = grad1_u2b(3) - grad1_u12_num(jpoint,ipoint,1) = dx - grad1_u12_num(jpoint,ipoint,2) = dy - grad1_u12_num(jpoint,ipoint,3) = dz + grad1_u12_num(jpoint,ipoint,1) = dx + grad1_u12_num(jpoint,ipoint,2) = dy + grad1_u12_num(jpoint,ipoint,3) = dz grad1_u12_squared_num(jpoint,ipoint) = dx*dx + dy*dy + dz*dz enddo @@ -92,7 +92,7 @@ v1b_r1 = j1b_nucl(r1) call grad1_j1b_nucl(r1, grad1_v1b) - do jpoint = 1, n_points_extra_final_grid ! r2 + do jpoint = 1, n_points_extra_final_grid ! r2 r2(1) = final_grid_points_extra(1,jpoint) r2(2) = final_grid_points_extra(2,jpoint) @@ -106,9 +106,9 @@ dy = (grad1_u2b(2) * v1b_r1 + u2b_r12 * grad1_v1b(2)) * v1b_r2 dz = (grad1_u2b(3) * v1b_r1 + u2b_r12 * grad1_v1b(3)) * v1b_r2 - grad1_u12_num(jpoint,ipoint,1) = dx - grad1_u12_num(jpoint,ipoint,2) = dy - grad1_u12_num(jpoint,ipoint,3) = dz + grad1_u12_num(jpoint,ipoint,1) = dx + grad1_u12_num(jpoint,ipoint,2) = dy + grad1_u12_num(jpoint,ipoint,3) = dz grad1_u12_squared_num(jpoint,ipoint) = dx*dx + dy*dy + dz*dz enddo @@ -121,85 +121,101 @@ double precision :: f f = 1.d0 / dble(elec_num - 1) + integer*8 :: n_points, k + n_points = n_points_extra_final_grid * n_points_final_grid + double precision, allocatable :: rij(:,:,:) - allocate( rij(3, 2, n_points_extra_final_grid) ) + allocate( rij(3, 2, n_points) ) use qmckl integer(qmckl_exit_code) :: rc - integer*8 :: npoints - npoints = n_points_extra_final_grid - double precision, allocatable :: gl(:,:,:) - allocate( gl(2,4,n_points_extra_final_grid) ) + allocate( gl(2,4,n_points) ) + k=0 do ipoint = 1, n_points_final_grid ! r1 - - do jpoint = 1, n_points_extra_final_grid ! r2 - rij(1:3, 1, jpoint) = final_grid_points (1:3, ipoint) - rij(1:3, 2, jpoint) = final_grid_points_extra(1:3, jpoint) + do jpoint = 1, n_points_extra_final_grid ! r2 + k=k+1 + rij(1:3, 1, k) = final_grid_points (1:3, ipoint) + rij(1:3, 2, k) = final_grid_points_extra(1:3, jpoint) enddo + enddo - rc = qmckl_set_electron_coord(qmckl_ctx_jastrow, 'N', npoints, rij, npoints*6_8) - if (rc /= QMCKL_SUCCESS) then - print *, irp_here, 'qmckl error in set_electron_coord' - stop -1 - endif + rc = qmckl_set_electron_coord(qmckl_ctx_jastrow, 'N', n_points, rij, n_points*6_8) + if (rc /= QMCKL_SUCCESS) then + print *, irp_here, 'qmckl error in set_electron_coord' + rc = qmckl_check(qmckl_ctx_jastrow, rc) + stop -1 + endif - ! --- - ! e-e term + ! --- + ! e-e term - rc = qmckl_get_jastrow_champ_factor_ee_gl(qmckl_ctx_jastrow, gl, 8_8*npoints) - if (rc /= QMCKL_SUCCESS) then - print *, irp_here, 'qmckl error in fact_ee_gl' - stop -1 - endif + rc = qmckl_get_jastrow_champ_factor_ee_gl(qmckl_ctx_jastrow, gl, 8_8*n_points) + if (rc /= QMCKL_SUCCESS) then + print *, irp_here, ' qmckl error in fact_ee_gl' + rc = qmckl_check(qmckl_ctx_jastrow, rc) + stop -1 + endif - do jpoint = 1, n_points_extra_final_grid ! r2 - grad1_u12_num(jpoint,ipoint,1) = gl(1,1,jpoint) - grad1_u12_num(jpoint,ipoint,2) = gl(1,2,jpoint) - grad1_u12_num(jpoint,ipoint,3) = gl(1,3,jpoint) + k=0 + do ipoint = 1, n_points_final_grid ! r1 + do jpoint = 1, n_points_extra_final_grid ! r2 + k=k+1 + grad1_u12_num(jpoint,ipoint,1) = gl(1,1,k) + grad1_u12_num(jpoint,ipoint,2) = gl(1,2,k) + grad1_u12_num(jpoint,ipoint,3) = gl(1,3,k) enddo + enddo ! --- ! e-e-n term -! rc = qmckl_get_jastrow_champ_factor_een_gl(qmckl_ctx_jastrow, gl, 8_8*npoints) +! rc = qmckl_get_jastrow_champ_factor_een_gl(qmckl_ctx_jastrow, gl, 8_8*n_points) ! if (rc /= QMCKL_SUCCESS) then ! print *, irp_here, 'qmckl error in fact_een_gl' +! rc = qmckl_check(qmckl_ctx_jastrow, rc) ! stop -1 ! endif ! -! do jpoint = 1, n_points_extra_final_grid ! r2 -! grad1_u12_num(jpoint,ipoint,1) = grad1_u12_num(jpoint,ipoint,1) + gl(1,1,jpoint) -! grad1_u12_num(jpoint,ipoint,2) = grad1_u12_num(jpoint,ipoint,2) + gl(1,2,jpoint) -! grad1_u12_num(jpoint,ipoint,3) = grad1_u12_num(jpoint,ipoint,3) + gl(1,3,jpoint) +! k=0 +! do ipoint = 1, n_points_final_grid ! r1 +! do jpoint = 1, n_points_extra_final_grid ! r2 +! k=k+1 +! grad1_u12_num(jpoint,ipoint,1) = grad1_u12_num(jpoint,ipoint,1) + gl(1,1,k) +! grad1_u12_num(jpoint,ipoint,2) = grad1_u12_num(jpoint,ipoint,2) + gl(1,2,k) +! grad1_u12_num(jpoint,ipoint,3) = grad1_u12_num(jpoint,ipoint,3) + gl(1,3,k) +! enddo ! enddo ! --- ! e-n term - rc = qmckl_get_jastrow_champ_factor_en_gl(qmckl_ctx_jastrow, gl, 8_8*npoints) - if (rc /= QMCKL_SUCCESS) then - print *, irp_here, 'qmckl error in fact_en_gl' - stop -1 - endif + rc = qmckl_get_jastrow_champ_factor_en_gl(qmckl_ctx_jastrow, gl, 8_8*n_points) + if (rc /= QMCKL_SUCCESS) then + print *, irp_here, 'qmckl error in fact_en_gl' + rc = qmckl_check(qmckl_ctx_jastrow, rc) + stop -1 + endif - do jpoint = 1, n_points_extra_final_grid ! r2 - grad1_u12_num(jpoint,ipoint,1) = grad1_u12_num(jpoint,ipoint,1) + f * gl(1,1,jpoint) - grad1_u12_num(jpoint,ipoint,2) = grad1_u12_num(jpoint,ipoint,2) + f * gl(1,2,jpoint) - grad1_u12_num(jpoint,ipoint,3) = grad1_u12_num(jpoint,ipoint,3) + f * gl(1,3,jpoint) + k=0 + do ipoint = 1, n_points_final_grid ! r1 + do jpoint = 1, n_points_extra_final_grid ! r2 + k = k+1 + grad1_u12_num(jpoint,ipoint,1) = grad1_u12_num(jpoint,ipoint,1) + f * gl(1,1,k) + grad1_u12_num(jpoint,ipoint,2) = grad1_u12_num(jpoint,ipoint,2) + f * gl(1,2,k) + grad1_u12_num(jpoint,ipoint,3) = grad1_u12_num(jpoint,ipoint,3) + f * gl(1,3,k) enddo - do jpoint = 1, n_points_extra_final_grid ! r2 - dx = grad1_u12_num(jpoint,ipoint,1) - dy = grad1_u12_num(jpoint,ipoint,2) - dz = grad1_u12_num(jpoint,ipoint,3) + do jpoint = 1, n_points_extra_final_grid ! r2 + dx = grad1_u12_num(jpoint,ipoint,1) + dy = grad1_u12_num(jpoint,ipoint,2) + dz = grad1_u12_num(jpoint,ipoint,3) grad1_u12_squared_num(jpoint,ipoint) = dx*dx + dy*dy + dz*dz enddo - enddo deallocate(gl, rij) @@ -212,7 +228,7 @@ endif call wall_time(time1) - print*, ' Wall time for grad1_u12_num & grad1_u12_squared_num (min) =', (time1-time0)/60.d0 + print*, ' Wall time for grad1_u12_num & grad1_u12_squared_num (min) =', (time1-time0)/60.d0 END_PROVIDER diff --git a/src/non_h_ints_mu/qmckl.irp.f b/src/non_h_ints_mu/qmckl.irp.f index c9a9a55d..128c83c6 100644 --- a/src/non_h_ints_mu/qmckl.irp.f +++ b/src/non_h_ints_mu/qmckl.irp.f @@ -59,6 +59,10 @@ BEGIN_PROVIDER [ integer*8, qmckl_ctx_jastrow ] if (rc /= QMCKL_SUCCESS) stop -1 + rc = qmckl_set_jastrow_champ_cord_num(qmckl_ctx_jastrow, jast_qmckl_cord_num*1_8) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + if (jast_qmckl_cord_num > 0) then rc = qmckl_set_jastrow_champ_c_vector(qmckl_ctx_jastrow, jast_qmckl_c_vector, 1_8*jast_qmckl_c_vector_size) rc = qmckl_check(qmckl_ctx_jastrow, rc) diff --git a/src/tc_bi_ortho/tc_bi_ortho.irp.f b/src/tc_bi_ortho/tc_bi_ortho.irp.f index 2887c7be..e27672a2 100644 --- a/src/tc_bi_ortho/tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/tc_bi_ortho.irp.f @@ -13,6 +13,17 @@ program tc_bi_ortho my_n_pt_a_grid = tc_grid1_a touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + if(j1b_type .ge. 100) then + my_extra_grid_becke = .True. + PROVIDE tc_grid2_a tc_grid2_r + my_n_pt_r_extra_grid = tc_grid2_r + my_n_pt_a_extra_grid = tc_grid2_a + touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + + call write_int(6, my_n_pt_r_extra_grid, 'radial internal grid over') + call write_int(6, my_n_pt_a_extra_grid, 'angular internal grid over') + endif + read_wf = .True. touch read_wf diff --git a/src/tc_scf/tc_petermann_factor.irp.f b/src/tc_scf/tc_petermann_factor.irp.f index 2e9c67e2..14fff898 100644 --- a/src/tc_scf/tc_petermann_factor.irp.f +++ b/src/tc_scf/tc_petermann_factor.irp.f @@ -30,9 +30,22 @@ subroutine main() allocate(Sl(mo_num,mo_num), Sr(mo_num,mo_num), Pf(mo_num,mo_num)) - call dgemm( "T", "N", mo_num, mo_num, ao_num, 1.d0 & - , mo_l_coef, size(mo_l_coef, 1), mo_l_coef, size(mo_l_coef, 1) & - , 0.d0, Sl, size(Sl, 1) ) + + call LTxSxR(ao_num, mo_num, mo_l_coef, ao_overlap, mo_r_coef, Sl) + !call dgemm( "T", "N", mo_num, mo_num, ao_num, 1.d0 & + ! , mo_l_coef, size(mo_l_coef, 1), mo_l_coef, size(mo_l_coef, 1) & + ! , 0.d0, Sl, size(Sl, 1) ) + + print *, '' + print *, ' left-right orthog matrix:' + do i = 1, mo_num + write(*,'(100(F8.4,X))') Sl(:,i) + enddo + + call LTxSxR(ao_num, mo_num, mo_l_coef, ao_overlap, mo_l_coef, Sl) + !call dgemm( "T", "N", mo_num, mo_num, ao_num, 1.d0 & + ! , mo_l_coef, size(mo_l_coef, 1), mo_l_coef, size(mo_l_coef, 1) & + ! , 0.d0, Sl, size(Sl, 1) ) print *, '' print *, ' left-orthog matrix:' @@ -40,9 +53,10 @@ subroutine main() write(*,'(100(F8.4,X))') Sl(:,i) enddo - call dgemm( "T", "N", mo_num, mo_num, ao_num, 1.d0 & - , mo_r_coef, size(mo_r_coef, 1), mo_r_coef, size(mo_r_coef, 1) & - , 0.d0, Sr, size(Sr, 1) ) + call LTxSxR(ao_num, mo_num, mo_r_coef, ao_overlap, mo_r_coef, Sr) +! call dgemm( "T", "N", mo_num, mo_num, ao_num, 1.d0 & +! , mo_r_coef, size(mo_r_coef, 1), mo_r_coef, size(mo_r_coef, 1) & +! , 0.d0, Sr, size(Sr, 1) ) print *, '' print *, ' right-orthog matrix:' From a53f195791d135782d4cf63ef1509e8e98dcfc26 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 22 Sep 2023 17:14:16 +0200 Subject: [PATCH 308/337] fixed config files --- config/flang_avx.cfg | 62 +++++++++++++++++++++++++++++++++ config/gfortran10.cfg | 62 +++++++++++++++++++++++++++++++++ config/gfortran_mpi_mkl.cfg | 62 +++++++++++++++++++++++++++++++++ config/ifort_2019_avx_notz.cfg | 63 ++++++++++++++++++++++++++++++++++ config/ifort_2019_debug.cfg | 2 +- config/ifort_2021_avx_notz.cfg | 63 ++++++++++++++++++++++++++++++++++ config/ifort_2021_debug.cfg | 2 +- 7 files changed, 314 insertions(+), 2 deletions(-) create mode 100644 config/flang_avx.cfg create mode 100644 config/gfortran10.cfg create mode 100644 config/gfortran_mpi_mkl.cfg create mode 100644 config/ifort_2019_avx_notz.cfg create mode 100644 config/ifort_2021_avx_notz.cfg diff --git a/config/flang_avx.cfg b/config/flang_avx.cfg new file mode 100644 index 00000000..625c3843 --- /dev/null +++ b/config/flang_avx.cfg @@ -0,0 +1,62 @@ +# Common flags +############## +# +# -ffree-line-length-none : Needed for IRPF90 which produces long lines +# -lblas -llapack : Link with libblas and liblapack libraries provided by the system +# -I . : Include the curent directory (Mandatory) +# +# --ninja : Allow the utilisation of ninja. (Mandatory) +# --align=32 : Align all provided arrays on a 32-byte boundary +# +# +[COMMON] +FC : flang -ffree-line-length-none -I . -mavx -g -fPIC +LAPACK_LIB : -llapack -lblas +IRPF90 : irpf90 +IRPF90_FLAGS : --ninja --align=32 -DSET_NESTED + +# Global options +################ +# +# 1 : Activate +# 0 : Deactivate +# +[OPTION] +MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +CACHE : 0 ; Enable cache_compile.py +OPENMP : 1 ; Append OpenMP flags + +# Optimization flags +#################### +# +# -Ofast : Disregard strict standards compliance. Enables all -O3 optimizations. +# It also enables optimizations that are not valid +# for all standard-compliant programs. It turns on +# -ffast-math and the Fortran-specific +# -fno-protect-parens and -fstack-arrays. +[OPT] +FCFLAGS : -Ofast -mavx + +# Profiling flags +################# +# +[PROFILE] +FC : -p -g +FCFLAGS : -Ofast + +# Debugging flags +################# +# +# -fcheck=all : Checks uninitialized variables, array subscripts, etc... +# -g : Extra debugging information +# +[DEBUG] +FCFLAGS : -fcheck=all -g + +# OpenMP flags +################# +# +[OPENMP] +FC : -fopenmp +IRPF90_FLAGS : --openmp + diff --git a/config/gfortran10.cfg b/config/gfortran10.cfg new file mode 100644 index 00000000..03eaccd1 --- /dev/null +++ b/config/gfortran10.cfg @@ -0,0 +1,62 @@ +# Common flags +############## +# +# -ffree-line-length-none : Needed for IRPF90 which produces long lines +# -lblas -llapack : Link with libblas and liblapack libraries provided by the system +# -I . : Include the curent directory (Mandatory) +# +# --ninja : Allow the utilisation of ninja. (Mandatory) +# --align=32 : Align all provided arrays on a 32-byte boundary +# +# +[COMMON] +FC : gfortran-10 -g -ffree-line-length-none -I . -fPIC +LAPACK_LIB : -lblas -llapack +IRPF90 : irpf90 +IRPF90_FLAGS : --ninja --align=32 --assert -DSET_NESTED + +# Global options +################ +# +# 1 : Activate +# 0 : Deactivate +# +[OPTION] +MODE : DEBUG ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +CACHE : 0 ; Enable cache_compile.py +OPENMP : 1 ; Append OpenMP flags + +# Optimization flags +#################### +# +# -Ofast : Disregard strict standards compliance. Enables all -O3 optimizations. +# It also enables optimizations that are not valid +# for all standard-compliant programs. It turns on +# -ffast-math and the Fortran-specific +# -fno-protect-parens and -fstack-arrays. +[OPT] +FCFLAGS : -Ofast + +# Profiling flags +################# +# +[PROFILE] +FC : -p -g +FCFLAGS : -Ofast + +# Debugging flags +################# +# +# -fcheck=all : Checks uninitialized variables, array subscripts, etc... +# -g : Extra debugging information +# +[DEBUG] +FCFLAGS : -g -msse4.2 -fcheck=all -Waliasing -Wampersand -Wconversion -Wsurprising -Wintrinsics-std -Wno-tabs -Wintrinsic-shadow -Wline-truncation -Wreal-q-constant -Wuninitialized -fbacktrace -ffpe-trap=zero,overflow,underflow -finit-real=nan + +# OpenMP flags +################# +# +[OPENMP] +FC : -fopenmp +IRPF90_FLAGS : --openmp + diff --git a/config/gfortran_mpi_mkl.cfg b/config/gfortran_mpi_mkl.cfg new file mode 100644 index 00000000..7cc88f1f --- /dev/null +++ b/config/gfortran_mpi_mkl.cfg @@ -0,0 +1,62 @@ +# Common flags +############## +# +# -ffree-line-length-none : Needed for IRPF90 which produces long lines +# -lblas -llapack : Link with libblas and liblapack libraries provided by the system +# -I . : Include the curent directory (Mandatory) +# +# --ninja : Allow the utilisation of ninja. (Mandatory) +# --align=32 : Align all provided arrays on a 32-byte boundary +# +# +[COMMON] +FC : mpif90 -ffree-line-length-none -I . -g -fPIC -std=legacy +LAPACK_LIB : -lblas -llapack +IRPF90 : irpf90 +IRPF90_FLAGS : --ninja --align=32 -DMPI -DSET_NESTED + +# Global options +################ +# +# 1 : Activate +# 0 : Deactivate +# +[OPTION] +MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +CACHE : 0 ; Enable cache_compile.py +OPENMP : 1 ; Append OpenMP flags + +# Optimization flags +#################### +# +# -Ofast : Disregard strict standards compliance. Enables all -O3 optimizations. +# It also enables optimizations that are not valid +# for all standard-compliant programs. It turns on +# -ffast-math and the Fortran-specific +# -fno-protect-parens and -fstack-arrays. +[OPT] +FCFLAGS : -Ofast -msse4.2 + +# Profiling flags +################# +# +[PROFILE] +FC : -p -g +FCFLAGS : -Ofast -msse4.2 + +# Debugging flags +################# +# +# -fcheck=all : Checks uninitialized variables, array subscripts, etc... +# -g : Extra debugging information +# +[DEBUG] +FCFLAGS : -fcheck=all -g + +# OpenMP flags +################# +# +[OPENMP] +FC : -fopenmp +IRPF90_FLAGS : --openmp + diff --git a/config/ifort_2019_avx_notz.cfg b/config/ifort_2019_avx_notz.cfg new file mode 100644 index 00000000..f68b256d --- /dev/null +++ b/config/ifort_2019_avx_notz.cfg @@ -0,0 +1,63 @@ +# Common flags +############## +# +# -mkl=[parallel|sequential] : Use the MKL library +# --ninja : Allow the utilisation of ninja. It is mandatory ! +# --align=32 : Align all provided arrays on a 32-byte boundary +# +[COMMON] +FC : ifort -fpic +LAPACK_LIB : -mkl=parallel +IRPF90 : irpf90 +IRPF90_FLAGS : --ninja --align=32 --define=WITHOUT_TRAILZ --define=WITHOUT_SHIFTRL -DSET_NESTED + +# Global options +################ +# +# 1 : Activate +# 0 : Deactivate +# +[OPTION] +MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +CACHE : 0 ; Enable cache_compile.py +OPENMP : 1 ; Append OpenMP flags + +# Optimization flags +#################### +# +# -xHost : Compile a binary optimized for the current architecture +# -O2 : O3 not better than O2. +# -ip : Inter-procedural optimizations +# -ftz : Flushes denormal results to zero +# +[OPT] +FC : -traceback +FCFLAGS : -xAVX -O2 -ip -ftz -g + +# Profiling flags +################# +# +[PROFILE] +FC : -p -g +FCFLAGS : -xSSE4.2 -O2 -ip -ftz + +# Debugging flags +################# +# +# -traceback : Activate backtrace on runtime +# -fpe0 : All floating point exaceptions +# -C : Checks uninitialized variables, array subscripts, etc... +# -g : Extra debugging information +# -xSSE2 : Valgrind needs a very simple x86 executable +# +[DEBUG] +FC : -g -traceback +FCFLAGS : -xSSE2 -C -fpe0 -implicitnone + +# OpenMP flags +################# +# +[OPENMP] +FC : -qopenmp +IRPF90_FLAGS : --openmp + diff --git a/config/ifort_2019_debug.cfg b/config/ifort_2019_debug.cfg index cb14f467..8c16c4ac 100644 --- a/config/ifort_2019_debug.cfg +++ b/config/ifort_2019_debug.cfg @@ -7,7 +7,7 @@ # [COMMON] FC : ifort -fpic -LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps +LAPACK_LIB : -mkl=parallel IRPF90 : irpf90 IRPF90_FLAGS : --ninja --align=32 --assert -DINTEL -DSET_NESTED diff --git a/config/ifort_2021_avx_notz.cfg b/config/ifort_2021_avx_notz.cfg new file mode 100644 index 00000000..1fa595d7 --- /dev/null +++ b/config/ifort_2021_avx_notz.cfg @@ -0,0 +1,63 @@ +# Common flags +############## +# +# -mkl=[parallel|sequential] : Use the MKL library +# --ninja : Allow the utilisation of ninja. It is mandatory ! +# --align=32 : Align all provided arrays on a 32-byte boundary +# +[COMMON] +FC : ifort -fpic +LAPACK_LIB : -mkl=parallel +IRPF90 : irpf90 +IRPF90_FLAGS : --ninja --align=32 --define=WITHOUT_TRAILZ --define=WITHOUT_SHIFTRL + +# Global options +################ +# +# 1 : Activate +# 0 : Deactivate +# +[OPTION] +MODE : OPT ; [ OPT | PROFILE | DEBUG ] : Chooses the section below +CACHE : 0 ; Enable cache_compile.py +OPENMP : 1 ; Append OpenMP flags + +# Optimization flags +#################### +# +# -xHost : Compile a binary optimized for the current architecture +# -O2 : O3 not better than O2. +# -ip : Inter-procedural optimizations +# -ftz : Flushes denormal results to zero +# +[OPT] +FC : -traceback +FCFLAGS : -xAVX -O2 -ip -ftz -g + +# Profiling flags +################# +# +[PROFILE] +FC : -p -g +FCFLAGS : -xSSE4.2 -O2 -ip -ftz + +# Debugging flags +################# +# +# -traceback : Activate backtrace on runtime +# -fpe0 : All floating point exaceptions +# -C : Checks uninitialized variables, array subscripts, etc... +# -g : Extra debugging information +# -xSSE2 : Valgrind needs a very simple x86 executable +# +[DEBUG] +FC : -g -traceback +FCFLAGS : -xSSE2 -C -fpe0 -implicitnone + +# OpenMP flags +################# +# +[OPENMP] +FC : -qopenmp +IRPF90_FLAGS : --openmp + diff --git a/config/ifort_2021_debug.cfg b/config/ifort_2021_debug.cfg index d70b1465..80802f33 100644 --- a/config/ifort_2021_debug.cfg +++ b/config/ifort_2021_debug.cfg @@ -7,7 +7,7 @@ # [COMMON] FC : ifort -fpic -LAPACK_LIB : -mkl=parallel -lirc -lsvml -limf -lipps +LAPACK_LIB : -mkl=parallel IRPF90 : irpf90 IRPF90_FLAGS : --ninja --align=32 --assert -DINTEL From e8dbceb4a8e97310ee0d7d5f42845fb36b701202 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 23 Sep 2023 10:24:51 +0200 Subject: [PATCH 309/337] minor modif --- src/non_h_ints_mu/tc_integ_num.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/non_h_ints_mu/tc_integ_num.irp.f b/src/non_h_ints_mu/tc_integ_num.irp.f index ee34f531..5a088331 100644 --- a/src/non_h_ints_mu/tc_integ_num.irp.f +++ b/src/non_h_ints_mu/tc_integ_num.irp.f @@ -47,7 +47,7 @@ call total_memory(mem) mem = max(1.d0, qp_max_mem - mem) n_double = mem * 1.d8 - n_blocks = min(n_double / (n_points_extra_final_grid * 4), 1.d0*n_points_final_grid) + n_blocks = int(min(n_double / (n_points_extra_final_grid * 4.d0), 1.d0*n_points_final_grid)) n_rest = int(mod(n_points_final_grid, n_blocks)) n_pass = int((n_points_final_grid - n_rest) / n_blocks) From ede0bf7152f514f2ac05eb7f162c61534f6f59eb Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 23 Sep 2023 10:30:38 +0200 Subject: [PATCH 310/337] minor modif --- src/tc_keywords/j1b_pen.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tc_keywords/j1b_pen.irp.f b/src/tc_keywords/j1b_pen.irp.f index 2d5e59a9..56bc63dc 100644 --- a/src/tc_keywords/j1b_pen.irp.f +++ b/src/tc_keywords/j1b_pen.irp.f @@ -91,7 +91,7 @@ print *, ' parameters for nuclei jastrow' print *, ' i, Z, j1b_pen, j1b_pen_coef' do i = 1, nucl_num - write(*,"(I4, 2x, 3(E15.7, 2X))"), i, nucl_charge(i), j1b_pen(i), j1b_pen_coef(i) + write(*,"(I4, 2x, 3(E15.7, 2X))") i, nucl_charge(i), j1b_pen(i), j1b_pen_coef(i) enddo END_PROVIDER From 03754f1d5f73072a9f28f4cc0268f6efa3b1ed5d Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Sat, 23 Sep 2023 11:26:25 +0200 Subject: [PATCH 311/337] noL_0e in tc-scf --- src/tc_scf/fock_three_hermit.irp.f | 7 ++++++- src/tc_scf/rh_tcscf_diis.irp.f | 12 +++--------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/tc_scf/fock_three_hermit.irp.f b/src/tc_scf/fock_three_hermit.irp.f index 6c132189..00d47fae 100644 --- a/src/tc_scf/fock_three_hermit.irp.f +++ b/src/tc_scf/fock_three_hermit.irp.f @@ -95,7 +95,12 @@ BEGIN_PROVIDER [double precision, diag_three_elem_hf] if(.not. three_body_h_tc) then - diag_three_elem_hf = 0.d0 + if(noL_standard) then + PROVIDE noL_0e + diag_three_elem_hf = noL_0e + else + diag_three_elem_hf = 0.d0 + endif else diff --git a/src/tc_scf/rh_tcscf_diis.irp.f b/src/tc_scf/rh_tcscf_diis.irp.f index 0504373c..66fc83bd 100644 --- a/src/tc_scf/rh_tcscf_diis.irp.f +++ b/src/tc_scf/rh_tcscf_diis.irp.f @@ -71,10 +71,7 @@ subroutine rh_tcscf_diis() etc_tot = TC_HF_energy etc_1e = TC_HF_one_e_energy etc_2e = TC_HF_two_e_energy - etc_3e = 0.d0 - if(three_body_h_tc) then - etc_3e = diag_three_elem_hf - endif + etc_3e = diag_three_elem_hf !tc_grad = grad_non_hermit er_DIIS = maxval(abs(FQS_SQF_mo)) e_delta = dabs(etc_tot - e_save) @@ -202,10 +199,7 @@ subroutine rh_tcscf_diis() etc_tot = TC_HF_energy etc_1e = TC_HF_one_e_energy etc_2e = TC_HF_two_e_energy - etc_3e = 0.d0 - if(three_body_h_tc) then - etc_3e = diag_three_elem_hf - endif + etc_3e = diag_three_elem_hf !tc_grad = grad_non_hermit er_DIIS = maxval(abs(FQS_SQF_mo)) e_delta = dabs(etc_tot - e_save) @@ -245,7 +239,7 @@ subroutine rh_tcscf_diis() write(json_unit, json_real_fmt) ' delta Energy ', e_delta write(json_unit, json_real_fmt) ' DIIS error ', er_DIIS write(json_unit, json_real_fmt) ' level_shift ', level_shift_tcscf - write(json_unit, json_real_fmt) ' DIIS ', dim_DIIS + write(json_unit, json_int_fmt) ' DIIS ', dim_DIIS write(json_unit, json_real_fmt) ' Wall time (min)', (t1-t0)/60.d0 call unlock_io From cc606ba8f8f25e6ee8dad9a39d63cc8d5b52496d Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 25 Sep 2023 14:17:00 +0200 Subject: [PATCH 312/337] Fix 5z_bfd basis --- data/basis/cc-pv5z_ecp_bfd | 696 ++++++++++++++++++------------------- 1 file changed, 348 insertions(+), 348 deletions(-) diff --git a/data/basis/cc-pv5z_ecp_bfd b/data/basis/cc-pv5z_ecp_bfd index 84b0300e..1d4cebff 100644 --- a/data/basis/cc-pv5z_ecp_bfd +++ b/data/basis/cc-pv5z_ecp_bfd @@ -1,5 +1,5 @@ ALUMINUM -s 9 1.00 +s 9 1 0.045518 0.206193 2 0.100308 0.559887 3 0.221051 0.407852 @@ -9,15 +9,15 @@ s 9 1.00 7 5.213294 -0.003935 8 11.488606 0.000470 9 25.317597 -0.000014 -s 1 1.00 +s 1 1 0.056415 1.000000 -s 1 1.00 +s 1 1 0.155063 1.000000 -s 1 1.00 +s 1 1 0.332041 1.000000 -s 1 1.00 +s 1 1 0.725343 1.000000 -p 9 1.00 +p 9 1 0.014848 0.009932 2 0.030967 0.160212 3 0.064586 0.389171 @@ -27,37 +27,37 @@ p 9 1.00 7 1.221985 -0.053293 8 2.548578 0.004846 9 5.315330 -0.000726 -p 1 1.00 +p 1 1 0.033949 1.000000 -p 1 1.00 +p 1 1 0.083154 1.000000 -p 1 1.00 +p 1 1 0.251360 1.000000 -p 1 1.00 +p 1 1 0.314422 1.000000 -d 1 1.00 +d 1 1 0.088651 1.000000 -d 1 1.00 +d 1 1 0.241216 1.000000 -d 1 1.00 +d 1 1 0.575129 1.000000 -d 1 1.00 +d 1 1 0.989127 1.000000 -f 1 1.00 +f 1 1 0.148598 1.000000 -f 1 1.00 +f 1 1 0.374850 1.000000 -f 1 1.00 +f 1 1 0.781006 1.000000 -g 1 1.00 +g 1 1 0.259548 1.000000 -g 1 1.00 +g 1 1 0.561381 1.000000 -h 1 1.00 +h 1 1 0.328731 1.000000 ARSENIC -s 9 1.00 +s 9 1 0.147347 0.155473 2 0.312164 0.494617 3 0.661339 0.526705 @@ -67,15 +67,15 @@ s 9 1.00 7 13.322677 -0.000115 8 28.224956 -0.000595 9 59.796402 0.000127 -s 1 1.00 +s 1 1 0.189594 1.000000 -s 1 1.00 +s 1 1 0.778040 1.000000 -s 1 1.00 +s 1 1 0.971266 1.000000 -s 1 1.00 +s 1 1 1.979612 1.000000 -p 9 1.00 +p 9 1 0.090580 0.079101 2 0.188085 0.260718 3 0.390548 0.395065 @@ -85,37 +85,37 @@ p 9 1.00 7 7.260371 -0.001407 8 15.075781 0.001710 9 31.304069 -0.000275 -p 1 1.00 +p 1 1 0.133916 1.000000 -p 1 1.00 +p 1 1 0.356186 1.000000 -p 1 1.00 +p 1 1 0.833562 1.000000 -p 1 1.00 +p 1 1 1.430927 1.000000 -d 1 1.00 +d 1 1 0.268113 1.000000 -d 1 1.00 +d 1 1 0.697753 1.000000 -d 1 1.00 +d 1 1 1.185366 1.000000 -d 1 1.00 +d 1 1 2.118102 1.000000 -f 1 1.00 +f 1 1 0.422461 1.000000 -f 1 1.00 +f 1 1 0.973776 1.000000 -f 1 1.00 +f 1 1 2.020616 1.000000 -g 1 1.00 +g 1 1 0.695217 1.000000 -g 1 1.00 +g 1 1 1.690111 1.000000 -h 1 1.00 +h 1 1 1.258944 1.000000 BERYLLIUM -s 9 1.00 +s 9 1 0.030068 0.025105 2 0.054002 0.178890 3 0.096986 0.263939 @@ -125,15 +125,15 @@ s 9 1.00 7 1.009077 -0.114576 8 1.812290 -0.067207 9 3.254852 0.017250 -s 1 1.00 +s 1 1 0.012778 1.000000 -s 1 1.00 +s 1 1 0.108807 1.000000 -s 1 1.00 +s 1 1 0.216157 1.000000 -s 1 1.00 +s 1 1 1.207279 1.000000 -p 9 1.00 +p 9 1 0.015064 0.735052 2 0.028584 -0.476214 3 0.054236 0.564806 @@ -143,37 +143,37 @@ p 9 1.00 7 0.703030 0.067510 8 1.333967 -0.002868 9 2.531139 0.017869 -p 1 1.00 +p 1 1 0.072561 1.000000 -p 1 1.00 +p 1 1 0.501715 1.000000 -p 1 1.00 +p 1 1 0.184471 1.000000 -p 1 1.00 +p 1 1 2.128672 1.000000 -d 1 1.00 +d 1 1 0.090175 1.000000 -d 1 1.00 +d 1 1 0.743653 1.000000 -d 1 1.00 +d 1 1 0.238494 1.000000 -d 1 1.00 +d 1 1 0.933001 1.000000 -f 1 1.00 +f 1 1 0.129140 1.000000 -f 1 1.00 +f 1 1 0.299150 1.000000 -f 1 1.00 +f 1 1 0.739023 1.000000 -g 1 1.00 +g 1 1 0.316080 1.000000 -g 1 1.00 +g 1 1 0.863442 1.000000 -h 1 1.00 +h 1 1 0.409080 1.000000 BORON -s 9 1.00 +s 9 1 0.040569 0.032031 2 0.081044 0.243317 3 0.161898 0.434636 @@ -183,15 +183,15 @@ s 9 1.00 7 2.578276 -0.098781 8 5.150520 0.016164 9 10.288990 -0.000016 -s 1 1.00 +s 1 1 0.070664 1.000000 -s 1 1.00 +s 1 1 0.170896 1.000000 -s 1 1.00 +s 1 1 0.375720 1.000000 -s 1 1.00 +s 1 1 0.614105 1.000000 -p 9 1.00 +p 9 1 0.029207 0.019909 2 0.058408 0.141775 3 0.116803 0.294463 @@ -201,37 +201,37 @@ p 9 1.00 7 1.868068 0.066454 8 3.735743 0.021248 9 7.470701 0.002837 -p 1 1.00 +p 1 1 0.057917 1.000000 -p 1 1.00 +p 1 1 0.143772 1.000000 -p 1 1.00 +p 1 1 0.436327 1.000000 -p 1 1.00 +p 1 1 0.566611 1.000000 -d 1 1.00 +d 1 1 0.134838 1.000000 -d 1 1.00 +d 1 1 0.380163 1.000000 -d 1 1.00 +d 1 1 0.808233 1.000000 -d 1 1.00 +d 1 1 1.022256 1.000000 -f 1 1.00 +f 1 1 0.272717 1.000000 -f 1 1.00 +f 1 1 0.799174 1.000000 -f 1 1.00 +f 1 1 1.002171 1.000000 -g 1 1.00 +g 1 1 0.486131 1.000000 -g 1 1.00 +g 1 1 0.824366 1.000000 -h 1 1.00 +h 1 1 0.632779 1.000000 CHLORINE -s 9 1.00 +s 9 1 0.119944 0.148917 2 0.257348 0.503616 3 0.552157 0.523995 @@ -241,15 +241,15 @@ s 9 1.00 7 11.701243 -0.001301 8 25.105812 -0.000294 9 53.866226 0.000076 -s 1 1.00 +s 1 1 0.152049 1.000000 -s 1 1.00 +s 1 1 0.639110 1.000000 -s 1 1.00 +s 1 1 0.801438 1.000000 -s 1 1.00 +s 1 1 1.671380 1.000000 -p 9 1.00 +p 9 1 0.074374 0.084925 2 0.155084 0.270658 3 0.323378 0.396022 @@ -259,37 +259,37 @@ p 9 1.00 7 6.113450 -0.000951 8 12.747651 0.001501 9 26.581165 -0.000249 -p 1 1.00 +p 1 1 0.103926 1.000000 -p 1 1.00 +p 1 1 0.275582 1.000000 -p 1 1.00 +p 1 1 0.667436 1.000000 -p 1 1.00 +p 1 1 1.171614 1.000000 -d 1 1.00 +d 1 1 0.237419 1.000000 -d 1 1.00 +d 1 1 0.729517 1.000000 -d 1 1.00 +d 1 1 0.924049 1.000000 -d 1 1.00 +d 1 1 1.522182 1.000000 -f 1 1.00 +f 1 1 0.335123 1.000000 -f 1 1.00 +f 1 1 0.789116 1.000000 -f 1 1.00 +f 1 1 1.609975 1.000000 -g 1 1.00 +g 1 1 0.576133 1.000000 -g 1 1.00 +g 1 1 1.402971 1.000000 -h 1 1.00 +h 1 1 1.099609 1.000000 CARBON -s 9 1.00 +s 9 1 0.051344 0.013991 2 0.102619 0.169852 3 0.205100 0.397529 @@ -299,15 +299,15 @@ s 9 1.00 7 3.272791 -0.121499 8 6.541187 0.015176 9 13.073594 -0.000705 -s 1 1.00 +s 1 1 0.098302 1.000000 -s 1 1.00 +s 1 1 0.232034 1.000000 -s 1 1.00 +s 1 1 0.744448 1.000000 -s 1 1.00 +s 1 1 1.009914 1.000000 -p 9 1.00 +p 9 1 0.029281 0.001787 2 0.058547 0.050426 3 0.117063 0.191634 @@ -317,37 +317,37 @@ p 9 1.00 7 1.871016 0.112024 8 3.741035 0.054425 9 7.480076 0.021931 -p 1 1.00 +p 1 1 0.084047 1.000000 -p 1 1.00 +p 1 1 0.216618 1.000000 -p 1 1.00 +p 1 1 0.576869 1.000000 -p 1 1.00 +p 1 1 1.006252 1.000000 -d 1 1.00 +d 1 1 0.206619 1.000000 -d 1 1.00 +d 1 1 0.606933 1.000000 -d 1 1.00 +d 1 1 1.001526 1.000000 -d 1 1.00 +d 1 1 1.504882 1.000000 -f 1 1.00 +f 1 1 0.400573 1.000000 -f 1 1.00 +f 1 1 1.099564 1.000000 -f 1 1.00 +f 1 1 1.501091 1.000000 -g 1 1.00 +g 1 1 0.797648 1.000000 -g 1 1.00 +g 1 1 1.401343 1.000000 -h 1 1.00 +h 1 1 1.001703 1.000000 FLUORINE -s 9 1.00 +s 9 1 0.172723 0.070240 2 0.364875 0.311088 3 0.770795 0.444675 @@ -357,15 +357,15 @@ s 9 1.00 7 15.350300 0.009104 8 32.427348 0.000810 9 68.502433 -0.000133 -s 1 1.00 +s 1 1 0.191146 1.000000 -s 1 1.00 +s 1 1 0.459697 1.000000 -s 1 1.00 +s 1 1 1.250265 1.000000 -s 1 1.00 +s 1 1 2.542428 1.000000 -p 9 1.00 +p 9 1 0.101001 0.035321 2 0.204414 0.136924 3 0.413707 0.249353 @@ -375,37 +375,37 @@ p 9 1.00 7 6.941026 0.088542 8 14.047737 0.039843 9 28.430799 0.003378 -p 1 1.00 +p 1 1 0.170574 1.000000 -p 1 1.00 +p 1 1 0.489019 1.000000 -p 1 1.00 +p 1 1 1.505085 1.000000 -p 1 1.00 +p 1 1 2.018698 1.000000 -d 1 1.00 +d 1 1 0.517711 1.000000 -d 1 1.00 +d 1 1 1.523306 1.000000 -d 1 1.00 +d 1 1 3.901897 1.000000 -d 1 1.00 +d 1 1 5.603581 1.000000 -f 1 1.00 +f 1 1 0.981494 1.000000 -f 1 1.00 +f 1 1 2.950321 1.000000 -f 1 1.00 +f 1 1 4.297889 1.000000 -g 1 1.00 +g 1 1 1.638933 1.000000 -g 1 1.00 +g 1 1 4.619953 1.000000 -h 1 1.00 +h 1 1 2.963127 1.000000 HELIUM -s 9 1.00 +s 9 1 0.077786 0.012425 2 0.161528 0.128251 3 0.335425 0.282221 @@ -415,15 +415,15 @@ s 9 1.00 7 6.237154 0.064912 8 12.951926 0.038892 9 26.895662 0.002531 -s 1 1.00 +s 1 1 1.324312 1.000000 -s 1 1.00 +s 1 1 0.876976 1.000000 -s 1 1.00 +s 1 1 0.294075 1.000000 -s 1 1.00 +s 1 1 0.116506 1.000000 -p 8 1.00 +p 8 1 0.228528 -0.000116 2 0.422019 2.116950 3 0.779333 -2.182954 @@ -432,27 +432,27 @@ p 8 1.00 6 4.907934 0.469710 7 9.063386 -0.224631 8 16.737180 0.098422 -p 1 1.00 +p 1 1 6.741009 1.000000 -p 1 1.00 +p 1 1 2.647340 1.000000 -p 1 1.00 +p 1 1 0.893850 1.000000 -d 1 1.00 +d 1 1 1.842278 1.000000 -d 1 1.00 +d 1 1 2.175208 1.000000 -d 1 1.00 +d 1 1 4.285515 1.000000 -f 1 1.00 +f 1 1 0.749734 1.000000 -f 1 1.00 +f 1 1 1.632074 1.000000 -g 1 1.00 +g 1 1 0.623669 1.000000 HYDROGEN -s 9 1.00 +s 9 1 0.013000 0.000706 2 0.029900 -0.002119 3 0.068770 0.057693 @@ -462,15 +462,15 @@ s 9 1.00 7 1.924458 0.097443 8 4.426254 0.029966 9 10.180385 -0.000452 -s 1 1.00 +s 1 1 0.122344 1.000000 -s 1 1.00 +s 1 1 0.402892 1.000000 -s 1 1.00 +s 1 1 0.715047 1.000000 -s 1 1.00 +s 1 1 1.379838 1.000000 -p 9 1.00 +p 9 1 0.003000 0.001242 2 0.007800 -0.000913 3 0.020281 -0.000054 @@ -480,27 +480,27 @@ p 9 1.00 7 0.926774 -0.013929 8 2.409612 -0.009395 9 6.264991 -0.000347 -p 1 1.00 +p 1 1 0.784765 1.000000 -p 1 1.00 +p 1 1 0.173606 1.000000 -p 1 1.00 +p 1 1 0.513665 1.000000 -d 1 1.00 +d 1 1 2.917388 1.000000 -d 1 1.00 +d 1 1 0.466379 1.000000 -d 1 1.00 +d 1 1 1.132171 1.000000 -f 1 1.00 +f 1 1 1.649608 1.000000 -f 1 1.00 +f 1 1 0.793185 1.000000 -g 1 1.00 +g 1 1 1.606813 1.000000 LITHIUM -s 9 1.00 +s 9 1 0.010125 0.007841 2 0.023437 0.258118 3 0.054251 0.423307 @@ -510,15 +510,15 @@ s 9 1.00 7 1.557659 0.007736 8 3.605689 0.003630 9 8.346494 -0.000646 -s 1 1.00 +s 1 1 0.025010 1.000000 -s 1 1.00 +s 1 1 0.104917 1.000000 -s 1 1.00 +s 1 1 0.670681 1.000000 -s 1 1.00 +s 1 1 1.004881 1.000000 -p 9 1.00 +p 9 1 0.018300 -0.005906 2 0.031699 -0.031422 3 0.054908 -0.043628 @@ -528,35 +528,35 @@ p 9 1.00 7 0.494330 -0.030830 8 0.856273 0.006185 9 1.483225 -0.008621 -p 1 1.00 +p 1 1 0.081041 1.000000 -p 1 1.00 +p 1 1 0.138470 1.000000 -p 1 1.00 +p 1 1 0.404355 1.000000 -p 1 1.00 +p 1 1 0.806184 1.000000 -d 1 1.00 +d 1 1 0.065574 1.000000 -d 1 1.00 +d 1 1 0.835758 1.000000 -d 1 1.00 +d 1 1 0.161784 1.000000 -d 1 1.00 +d 1 1 0.986350 1.000000 -f 1 1.00 +f 1 1 0.152988 1.000000 -f 1 1.00 +f 1 1 0.420698 1.000000 -f 1 1.00 +f 1 1 0.856748 1.000000 -g 1 1.00 +g 1 1 0.254479 1.000000 -g 1 1.00 +g 1 1 0.457496 1.000000 MAGNESIUM -s 9 1.00 +s 9 1 0.030975 0.165290 2 0.062959 0.506272 3 0.127970 0.333197 @@ -566,15 +566,15 @@ s 9 1.00 7 2.184285 0.048310 8 4.439759 -0.005312 9 9.024217 0.000465 -s 1 1.00 +s 1 1 0.023503 1.000000 -s 1 1.00 +s 1 1 0.061201 1.000000 -s 1 1.00 +s 1 1 0.764885 1.000000 -s 1 1.00 +s 1 1 1.054291 1.000000 -p 9 1.00 +p 9 1 0.047055 1.502038 2 0.083253 -1.433944 3 0.147298 1.318987 @@ -584,37 +584,37 @@ p 9 1.00 7 1.443383 0.086774 8 2.553745 -0.028677 9 4.518286 0.006085 -p 1 1.00 +p 1 1 0.082386 1.000000 -p 1 1.00 +p 1 1 0.177931 1.000000 -p 1 1.00 +p 1 1 0.385451 1.000000 -p 1 1.00 +p 1 1 0.833239 1.000000 -d 1 1.00 +d 1 1 0.102058 1.000000 -d 1 1.00 +d 1 1 0.815528 1.000000 -d 1 1.00 +d 1 1 0.222855 1.000000 -d 1 1.00 +d 1 1 0.973775 1.000000 -f 1 1.00 +f 1 1 0.141691 1.000000 -f 1 1.00 +f 1 1 0.425441 1.000000 -f 1 1.00 +f 1 1 0.847636 1.000000 -g 1 1.00 +g 1 1 0.171110 1.000000 -g 1 1.00 +g 1 1 0.438459 1.000000 -h 1 1.00 +h 1 1 0.360937 1.000000 SODIUM -s 9 1.00 +s 9 1 0.013061 0.200118 2 0.030041 0.467652 3 0.069092 0.227738 @@ -624,15 +624,15 @@ s 9 1.00 7 1.933315 0.003741 8 4.446533 -0.001117 9 10.226816 0.000244 -s 1 1.00 +s 1 1 0.063999 1.000000 -s 1 1.00 +s 1 1 0.414207 1.000000 -s 1 1.00 +s 1 1 0.848058 1.000000 -s 1 1.00 +s 1 1 1.097178 1.000000 -p 9 1.00 +p 9 1 0.002593 -0.002840 2 0.006741 0.005340 3 0.017525 -0.025936 @@ -642,35 +642,35 @@ p 9 1.00 7 0.800738 0.006199 8 2.081847 -0.001026 9 5.412617 0.000168 -p 1 1.00 +p 1 1 0.062027 1.000000 -p 1 1.00 +p 1 1 0.098643 1.000000 -p 1 1.00 +p 1 1 0.404379 1.000000 -p 1 1.00 +p 1 1 0.845826 1.000000 -d 1 1.00 +d 1 1 0.058125 1.000000 -d 1 1.00 +d 1 1 0.824577 1.000000 -d 1 1.00 +d 1 1 0.131674 1.000000 -d 1 1.00 +d 1 1 0.979694 1.000000 -f 1 1.00 +f 1 1 0.112793 1.000000 -f 1 1.00 +f 1 1 0.429471 1.000000 -f 1 1.00 +f 1 1 0.848460 1.000000 -g 1 1.00 +g 1 1 0.285680 1.000000 -g 1 1.00 +g 1 1 0.467702 1.000000 NEON -s 9 1.00 +s 9 1 0.205835 0.057514 2 0.391384 0.215776 3 0.744196 0.374799 @@ -680,15 +680,15 @@ s 9 1.00 7 9.727994 -0.085909 8 18.497256 0.006816 9 35.171534 0.000206 -s 1 1.00 +s 1 1 0.318678 1.000000 -s 1 1.00 +s 1 1 0.830178 1.000000 -s 1 1.00 +s 1 1 1.591904 1.000000 -s 1 1.00 +s 1 1 2.744999 1.000000 -p 9 1.00 +p 9 1 0.121772 0.029943 2 0.238248 0.114200 3 0.466136 0.219618 @@ -698,37 +698,37 @@ p 9 1.00 7 6.830378 0.112176 8 13.363732 0.063317 9 26.146332 0.008057 -p 1 1.00 +p 1 1 0.218226 1.000000 -p 1 1.00 +p 1 1 0.636921 1.000000 -p 1 1.00 +p 1 1 1.888191 1.000000 -p 1 1.00 +p 1 1 3.020108 1.000000 -d 1 1.00 +d 1 1 0.654924 1.000000 -d 1 1.00 +d 1 1 1.931502 1.000000 -d 1 1.00 +d 1 1 5.027566 1.000000 -d 1 1.00 +d 1 1 6.989700 1.000000 -f 1 1.00 +f 1 1 1.314297 1.000000 -f 1 1.00 +f 1 1 4.065928 1.000000 -f 1 1.00 +f 1 1 5.587487 1.000000 -g 1 1.00 +g 1 1 2.070925 1.000000 -g 1 1.00 +g 1 1 6.073107 1.000000 -h 1 1.00 +h 1 1 3.743118 1.000000 NITROGEN -s 9 1.00 +s 9 1 0.098869 0.067266 2 0.211443 0.334290 3 0.452197 0.454257 @@ -738,15 +738,15 @@ s 9 1.00 7 9.459462 0.014437 8 20.230246 0.000359 9 43.264919 -0.000094 -s 1 1.00 +s 1 1 0.115320 1.000000 -s 1 1.00 +s 1 1 0.286632 1.000000 -s 1 1.00 +s 1 1 0.702011 1.000000 -s 1 1.00 +s 1 1 1.532221 1.000000 -p 9 1.00 +p 9 1 0.073234 0.035758 2 0.145867 0.153945 3 0.290535 0.277656 @@ -756,37 +756,37 @@ p 9 1.00 7 4.572652 0.067219 8 9.107739 0.031594 9 18.140657 0.003301 -p 1 1.00 +p 1 1 0.120601 1.000000 -p 1 1.00 +p 1 1 0.322697 1.000000 -p 1 1.00 +p 1 1 0.978538 1.000000 -p 1 1.00 +p 1 1 1.272759 1.000000 -d 1 1.00 +d 1 1 0.305579 1.000000 -d 1 1.00 +d 1 1 0.891436 1.000000 -d 1 1.00 +d 1 1 1.542532 1.000000 -d 1 1.00 +d 1 1 2.798122 1.000000 -f 1 1.00 +f 1 1 0.587676 1.000000 -f 1 1.00 +f 1 1 1.592967 1.000000 -f 1 1.00 +f 1 1 2.443045 1.000000 -g 1 1.00 +g 1 1 1.038637 1.000000 -g 1 1.00 +g 1 1 2.842018 1.000000 -h 1 1.00 +h 1 1 2.272542 1.000000 OXYGEN -s 9 1.00 +s 9 1 0.125346 0.055741 2 0.268022 0.304848 3 0.573098 0.453752 @@ -796,15 +796,15 @@ s 9 1.00 7 11.980245 0.012024 8 25.616801 0.000407 9 54.775216 -0.000076 -s 1 1.00 +s 1 1 0.160664 1.000000 -s 1 1.00 +s 1 1 0.384526 1.000000 -s 1 1.00 +s 1 1 0.935157 1.000000 -s 1 1.00 +s 1 1 1.937532 1.000000 -p 9 1.00 +p 9 1 0.083598 0.044958 2 0.167017 0.150175 3 0.333673 0.255999 @@ -814,37 +814,37 @@ p 9 1.00 7 5.315785 0.082308 8 10.620108 0.039899 9 21.217318 0.004679 -p 1 1.00 +p 1 1 0.130580 1.000000 -p 1 1.00 +p 1 1 0.372674 1.000000 -p 1 1.00 +p 1 1 1.178227 1.000000 -p 1 1.00 +p 1 1 1.589967 1.000000 -d 1 1.00 +d 1 1 0.401152 1.000000 -d 1 1.00 +d 1 1 1.174596 1.000000 -d 1 1.00 +d 1 1 2.823972 1.000000 -d 1 1.00 +d 1 1 4.292433 1.000000 -f 1 1.00 +f 1 1 0.708666 1.000000 -f 1 1.00 +f 1 1 2.006788 1.000000 -f 1 1.00 +f 1 1 3.223721 1.000000 -g 1 1.00 +g 1 1 1.207657 1.000000 -g 1 1.00 +g 1 1 3.584495 1.000000 -h 1 1.00 +h 1 1 2.615818 1.000000 PHOSPHORUS -s 9 1.00 +s 9 1 0.074718 0.140225 2 0.160834 0.506746 3 0.346202 0.499893 @@ -854,15 +854,15 @@ s 9 1.00 7 7.432561 0.001798 8 15.998924 -0.000314 9 34.438408 0.000088 -s 1 1.00 +s 1 1 0.082092 1.000000 -s 1 1.00 +s 1 1 0.195525 1.000000 -s 1 1.00 +s 1 1 0.434767 1.000000 -s 1 1.00 +s 1 1 1.027573 1.000000 -p 9 1.00 +p 9 1 0.050242 0.072095 2 0.102391 0.278735 3 0.208669 0.411034 @@ -872,37 +872,37 @@ p 9 1.00 7 3.599410 -0.005103 8 7.335418 0.000328 9 14.949217 -0.000046 -p 1 1.00 +p 1 1 0.074159 1.000000 -p 1 1.00 +p 1 1 0.189382 1.000000 -p 1 1.00 +p 1 1 0.470798 1.000000 -p 1 1.00 +p 1 1 0.815677 1.000000 -d 1 1.00 +d 1 1 0.167800 1.000000 -d 1 1.00 +d 1 1 0.457307 1.000000 -d 1 1.00 +d 1 1 1.021650 1.000000 -d 1 1.00 +d 1 1 1.598720 1.000000 -f 1 1.00 +f 1 1 0.214751 1.000000 -f 1 1.00 +f 1 1 0.482380 1.000000 -f 1 1.00 +f 1 1 0.984966 1.000000 -g 1 1.00 +g 1 1 0.406484 1.000000 -g 1 1.00 +g 1 1 0.924507 1.000000 -h 1 1.00 +h 1 1 0.831913 1.000000 SILICON -s 9 1.00 +s 9 1 0.059887 0.167492 2 0.130108 0.532550 3 0.282668 0.464290 @@ -912,15 +912,15 @@ s 9 1.00 7 6.297493 -0.000106 8 13.681707 -0.000145 9 29.724387 0.000067 -s 1 1.00 +s 1 1 0.075500 1.000000 -s 1 1.00 +s 1 1 0.196459 1.000000 -s 1 1.00 +s 1 1 0.424036 1.000000 -s 1 1.00 +s 1 1 0.920486 1.000000 -p 9 1.00 +p 9 1 0.036525 0.078761 2 0.076137 0.308331 3 0.158712 0.417773 @@ -930,37 +930,37 @@ p 9 1.00 7 2.996797 0.000744 8 6.246966 -0.000259 9 13.022097 -0.000022 -p 1 1.00 +p 1 1 0.048136 1.000000 -p 1 1.00 +p 1 1 0.115813 1.000000 -p 1 1.00 +p 1 1 0.238594 1.000000 -p 1 1.00 +p 1 1 0.496918 1.000000 -d 1 1.00 +d 1 1 0.127945 1.000000 -d 1 1.00 +d 1 1 0.353096 1.000000 -d 1 1.00 +d 1 1 0.805426 1.000000 -d 1 1.00 +d 1 1 1.247695 1.000000 -f 1 1.00 +f 1 1 0.172876 1.000000 -f 1 1.00 +f 1 1 0.402208 1.000000 -f 1 1.00 +f 1 1 0.833081 1.000000 -g 1 1.00 +g 1 1 0.299885 1.000000 -g 1 1.00 +g 1 1 0.647054 1.000000 -h 1 1.00 +h 1 1 0.557542 1.000000 SULFUR -s 9 1.00 +s 9 1 0.095120 0.140074 2 0.202385 0.490942 3 0.430611 0.515297 @@ -970,15 +970,15 @@ s 9 1.00 7 8.824926 0.007266 8 18.776623 -0.001602 9 39.950656 0.000271 -s 1 1.00 +s 1 1 0.113918 1.000000 -s 1 1.00 +s 1 1 0.282790 1.000000 -s 1 1.00 +s 1 1 0.626702 1.000000 -s 1 1.00 +s 1 1 1.338226 1.000000 -p 9 1.00 +p 9 1 0.057087 0.081938 2 0.115901 0.251826 3 0.235305 0.376344 @@ -988,32 +988,32 @@ p 9 1.00 7 3.997726 -0.017191 8 8.116307 0.002580 9 16.477979 -0.000222 -p 1 1.00 +p 1 1 0.079101 1.000000 -p 1 1.00 +p 1 1 0.210632 1.000000 -p 1 1.00 +p 1 1 0.522537 1.000000 -p 1 1.00 +p 1 1 0.924454 1.000000 -d 1 1.00 +d 1 1 0.186546 1.000000 -d 1 1.00 +d 1 1 0.462328 1.000000 -d 1 1.00 +d 1 1 0.955579 1.000000 -d 1 1.00 +d 1 1 2.334308 1.000000 -f 1 1.00 +f 1 1 0.274343 1.000000 -f 1 1.00 +f 1 1 0.661568 1.000000 -f 1 1.00 +f 1 1 1.389533 1.000000 -g 1 1.00 +g 1 1 0.486698 1.000000 -g 1 1.00 +g 1 1 1.166495 1.000000 -h 1 1.00 +h 1 1 0.839494 1.000000 From a493e1511010cb7a5bc92f0479b06ba93a0cdb20 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 25 Sep 2023 14:26:43 +0200 Subject: [PATCH 313/337] Fix int64 overflow in qp_import_trexio --- scripts/qp_import_trexio.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/scripts/qp_import_trexio.py b/scripts/qp_import_trexio.py index e2e8fae2..09f8c166 100755 --- a/scripts/qp_import_trexio.py +++ b/scripts/qp_import_trexio.py @@ -38,6 +38,15 @@ else: QP_ROOT + "/install", QP_ROOT + "/scripts"] + sys.path +def uint64_to_int64(u): + # Check if the most significant bit is set + if u & (1 << 63): + # Calculate the two's complement + result = -int(np.bitwise_not(np.uint64(u))+1) + else: + # The number is already positive + result = u + return result def generate_xyz(l): @@ -473,8 +482,15 @@ def write_ezfio(trexio_filename, filename): beta_s[i] = '1' alpha_s = ''.join(alpha_s)[::-1] beta_s = ''.join(beta_s)[::-1] - alpha = [ int(i,2) for i in qp_bitmasks.string_to_bitmask(alpha_s) ][::-1] - beta = [ int(i,2) for i in qp_bitmasks.string_to_bitmask(beta_s ) ][::-1] + def conv(i): + try: + result = np.int64(i) + except: + result = np.int64(i-2**63-1) + return result + + alpha = [ uint64_to_int64(int(i,2)) for i in qp_bitmasks.string_to_bitmask(alpha_s) ][::-1] + beta = [ uint64_to_int64(int(i,2)) for i in qp_bitmasks.string_to_bitmask(beta_s ) ][::-1] ezfio.set_determinants_bit_kind(8) ezfio.set_determinants_n_int(1+mo_num//64) ezfio.set_determinants_n_det(1) From cfd0c875052c10caf6873e55078a1f4a0b8c9dff Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 26 Sep 2023 13:59:03 +0200 Subject: [PATCH 314/337] Added script to import CHAMP Jastrow --- scripts/import_champ_jastrow.py | 68 +++++++++++++++++++++++++++++++++ scripts/qp_import_trexio.py | 2 +- 2 files changed, 69 insertions(+), 1 deletion(-) create mode 100755 scripts/import_champ_jastrow.py diff --git a/scripts/import_champ_jastrow.py b/scripts/import_champ_jastrow.py new file mode 100755 index 00000000..ca74c21e --- /dev/null +++ b/scripts/import_champ_jastrow.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 + +conv = [ 0, 0, 2 , 6 , 13 , 23 , 37 , 55 , 78 , 106 , 140 ] + + +def import_jastrow(jastrow_filename): + with open(jastrow_filename,'r') as jastrow_file: + lines = [ line.strip() for line in jastrow_file.readlines() ] + lines = [ line for line in lines if line != "" ] + start = 0 + end = len(lines) + for i,line in enumerate(lines): + if line.startswith("jastrow_parameter"): + start = i + elif line.startswith("end"): + end = i + lines = lines[start:end] + type_num = (len(lines)-4)//2 + nord_a,nord_b,nord_c = [ int(i) for i in lines[1].split()[:3] ] + scale_k = float(lines[2].split()[0]) + vec_a = [] + for j in range(type_num): + vec_a += [ float(i) for i in lines[3+j].split()[:nord_a+1] ] + vec_b = [ float(i) for i in lines[3+type_num].split()[:nord_b+1] ] + vec_c = [] + for j in range(type_num): + vec_c += [ float(i) for i in lines[4+type_num+j].split()[:conv[nord_c]] ] + + return { + 'type_num' : type_num, + 'scale_k' : scale_k, + 'nord_a' : nord_a, + 'nord_b' : nord_b, + 'nord_c' : nord_c, + 'vec_a' : vec_a, + 'vec_b' : vec_b, + 'vec_c' : vec_c, + } + + +if __name__ == '__main__': + import sys + from ezfio import ezfio + ezfio.set_file(sys.argv[1]) + jastrow_file = sys.argv[2] + jastrow = import_jastrow(jastrow_file) + print (jastrow) + ezfio.set_jastrow_jast_type("Qmckl") + ezfio.set_jastrow_jast_qmckl_type_nucl_num(jastrow['type_num']) + charges = ezfio.get_nuclei_nucl_charge() + types = {} + k = 0 + for c in charges: + if c not in types: + types[c] = k + k += 1 + type_nucl_vector = [types[c] for c in charges] + ezfio.set_jastrow_jast_qmckl_type_nucl_vector(type_nucl_vector) + ezfio.set_jastrow_jast_qmckl_rescale_ee(jastrow['scale_k']) + ezfio.set_jastrow_jast_qmckl_rescale_en([jastrow['scale_k'] for i in type_nucl_vector]) + ezfio.set_jastrow_jast_qmckl_aord_num(jastrow['nord_a']) + ezfio.set_jastrow_jast_qmckl_bord_num(jastrow['nord_b']) + ezfio.set_jastrow_jast_qmckl_cord_num(jastrow['nord_c']) + ezfio.set_jastrow_jast_qmckl_c_vector_size(len(jastrow['vec_c'])) + ezfio.set_jastrow_jast_qmckl_a_vector(jastrow['vec_a']) + ezfio.set_jastrow_jast_qmckl_b_vector(jastrow['vec_b']) + ezfio.set_jastrow_jast_qmckl_c_vector(jastrow['vec_c']) + diff --git a/scripts/qp_import_trexio.py b/scripts/qp_import_trexio.py index 09f8c166..b3222601 100755 --- a/scripts/qp_import_trexio.py +++ b/scripts/qp_import_trexio.py @@ -142,7 +142,7 @@ def write_ezfio(trexio_filename, filename): try: basis_type = trexio.read_basis_type(trexio_file) - if basis_type.lower()[0] in ["g", "s"]: + if basis_type.lower() in ["gaussian", "slater"]: shell_num = trexio.read_basis_shell_num(trexio_file) prim_num = trexio.read_basis_prim_num(trexio_file) ang_mom = trexio.read_basis_shell_ang_mom(trexio_file) From 145e18f3946ebf585645a642531361bd487f30a8 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 26 Sep 2023 18:39:13 +0200 Subject: [PATCH 315/337] Fixed openmp bug --- configure | 2 +- scripts/import_champ_jastrow.py | 3 ++- src/bi_ort_ints/three_body_ijmk.irp.f | 18 +++++++++--------- src/non_h_ints_mu/qmckl.irp.f | 6 ++++++ src/tc_keywords/j1b_pen.irp.f | 2 +- 5 files changed, 19 insertions(+), 12 deletions(-) diff --git a/configure b/configure index 1535948d..893c7148 100755 --- a/configure +++ b/configure @@ -231,7 +231,7 @@ EOF EOF elif [[ ${PACKAGE} = qmckl ]] ; then - VERSION=0.5.2 + VERSION=0.5.3 execute << EOF cd "\${QP_ROOT}"/external wget https://github.com/TREX-CoE/qmckl/releases/download/v${VERSION}/qmckl-${VERSION}.tar.gz diff --git a/scripts/import_champ_jastrow.py b/scripts/import_champ_jastrow.py index ca74c21e..489309b7 100755 --- a/scripts/import_champ_jastrow.py +++ b/scripts/import_champ_jastrow.py @@ -49,12 +49,13 @@ if __name__ == '__main__': ezfio.set_jastrow_jast_qmckl_type_nucl_num(jastrow['type_num']) charges = ezfio.get_nuclei_nucl_charge() types = {} - k = 0 + k = 1 for c in charges: if c not in types: types[c] = k k += 1 type_nucl_vector = [types[c] for c in charges] + print(type_nucl_vector) ezfio.set_jastrow_jast_qmckl_type_nucl_vector(type_nucl_vector) ezfio.set_jastrow_jast_qmckl_rescale_ee(jastrow['scale_k']) ezfio.set_jastrow_jast_qmckl_rescale_en([jastrow['scale_k'] for i in type_nucl_vector]) diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f index 742d5a80..c1f2af60 100644 --- a/src/bi_ort_ints/three_body_ijmk.irp.f +++ b/src/bi_ort_ints/three_body_ijmk.irp.f @@ -17,10 +17,10 @@ ! ! notice the -1 sign: in this way three_e_4_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign ! - ! three_e_4_idx_direct_bi_ort (m,j,k,i) : Lk Ri Imm Ijj + Lj Rj Imm Iki + Lm Rm Ijj Iki - ! three_e_4_idx_exch13_bi_ort (m,j,k,i) : Lk Rm Imi Ijj + Lj Rj Imi Ikm + Lm Ri Ijj Ikm + ! three_e_4_idx_direct_bi_ort (m,j,k,i) : Lk Ri Imm Ijj + Lj Rj Imm Iki + Lm Rm Ijj Iki + ! three_e_4_idx_exch13_bi_ort (m,j,k,i) : Lk Rm Imi Ijj + Lj Rj Imi Ikm + Lm Ri Ijj Ikm ! three_e_4_idx_exch23_bi_ort (m,j,k,i) : Lk Ri Imj Ijm + Lj Rm Imj Iki + Lm Rj Ijm Iki - ! three_e_4_idx_cycle_1_bi_ort(m,j,k,i) : Lk Rm Imj Iji + Lj Ri Imj Ikm + Lm Rj Iji Ikm + ! three_e_4_idx_cycle_1_bi_ort(m,j,k,i) : Lk Rm Imj Iji + Lj Ri Imj Ikm + Lm Rj Iji Ikm ! END_DOC @@ -74,8 +74,8 @@ !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (n, ipoint, tmp_loc_1, tmp_loc_2, tmp_2d, tmp1, tmp2) & - !$OMP SHARED (mo_num, n_points_final_grid, i, k, & + !$OMP PRIVATE (k, i, j, m, n, ipoint, tmp_loc_1, tmp_loc_2, tmp_2d, tmp1, tmp2) & + !$OMP SHARED (mo_num, n_points_final_grid, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & !$OMP tmp_aux_1, tmp_aux_2, & @@ -125,17 +125,17 @@ do n = 1, mo_num do ipoint = 1, n_points_final_grid - + tmp_loc_1 = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,n) tmp_loc_2 = mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,i) - + tmp1(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,n,i) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,1,k,n) * tmp_loc_2 tmp1(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,n,i) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,2,k,n) * tmp_loc_2 tmp1(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,n,i) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,3,k,n) * tmp_loc_2 tmp1(ipoint,4,n) = int2_grad1_u12_bimo_t(ipoint,1,n,i) * int2_grad1_u12_bimo_t(ipoint,1,k,n) & + int2_grad1_u12_bimo_t(ipoint,2,n,i) * int2_grad1_u12_bimo_t(ipoint,2,k,n) & + int2_grad1_u12_bimo_t(ipoint,3,n,i) * int2_grad1_u12_bimo_t(ipoint,3,k,n) - + tmp2(ipoint,1,n) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,n) tmp2(ipoint,2,n) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,n) tmp2(ipoint,3,n) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,n) @@ -225,7 +225,7 @@ print *, ' wall time for three_e_4_idx_bi_ort', wall1 - wall0 call print_memory_usage() -END_PROVIDER +END_PROVIDER ! --- diff --git a/src/non_h_ints_mu/qmckl.irp.f b/src/non_h_ints_mu/qmckl.irp.f index 128c83c6..b9802371 100644 --- a/src/non_h_ints_mu/qmckl.irp.f +++ b/src/non_h_ints_mu/qmckl.irp.f @@ -1,13 +1,19 @@ BEGIN_PROVIDER [ integer*8, qmckl_ctx_jastrow ] use qmckl + use iso_c_binding implicit none BEGIN_DOC ! Context for the QMCKL library END_DOC integer(qmckl_exit_code) :: rc + logical(c_bool) :: c_true = .True. qmckl_ctx_jastrow = qmckl_context_create() + rc = qmckl_set_jastrow_champ_spin_independent(qmckl_ctx_jastrow, c_true) + rc = qmckl_check(qmckl_ctx_jastrow, rc) + if (rc /= QMCKL_SUCCESS) stop -1 + rc = qmckl_set_nucleus_num(qmckl_ctx_jastrow, nucl_num*1_8) rc = qmckl_check(qmckl_ctx_jastrow, rc) if (rc /= QMCKL_SUCCESS) stop -1 diff --git a/src/tc_keywords/j1b_pen.irp.f b/src/tc_keywords/j1b_pen.irp.f index 2d5e59a9..d509fc7e 100644 --- a/src/tc_keywords/j1b_pen.irp.f +++ b/src/tc_keywords/j1b_pen.irp.f @@ -91,7 +91,7 @@ print *, ' parameters for nuclei jastrow' print *, ' i, Z, j1b_pen, j1b_pen_coef' do i = 1, nucl_num - write(*,"(I4, 2x, 3(E15.7, 2X))"), i, nucl_charge(i), j1b_pen(i), j1b_pen_coef(i) + write(*,'(I4, 2x, 3(E15.7, 2X))') i, nucl_charge(i), j1b_pen(i), j1b_pen_coef(i) enddo END_PROVIDER From 6ad2dd668f977151eec89a27e1ba588e80ae6af1 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 27 Sep 2023 11:21:47 +0200 Subject: [PATCH 316/337] Less memory with QMCkl Jastrow --- src/non_h_ints_mu/jast_deriv.irp.f | 147 +++++++++++++++-------------- 1 file changed, 77 insertions(+), 70 deletions(-) diff --git a/src/non_h_ints_mu/jast_deriv.irp.f b/src/non_h_ints_mu/jast_deriv.irp.f index 7a4717f7..4137c51c 100644 --- a/src/non_h_ints_mu/jast_deriv.irp.f +++ b/src/non_h_ints_mu/jast_deriv.irp.f @@ -121,8 +121,11 @@ double precision :: f f = 1.d0 / dble(elec_num - 1) - integer*8 :: n_points, k - n_points = n_points_extra_final_grid * n_points_final_grid + integer*8 :: n_points, n_points_max, k + integer :: ipoint_block, ipoint_end + + n_points_max = n_points_extra_final_grid * n_points_final_grid + n_points = 100_8*n_points_extra_final_grid double precision, allocatable :: rij(:,:,:) allocate( rij(3, 2, n_points) ) @@ -131,92 +134,96 @@ integer(qmckl_exit_code) :: rc double precision, allocatable :: gl(:,:,:) + allocate( gl(2,4,n_points) ) - k=0 - do ipoint = 1, n_points_final_grid ! r1 - do jpoint = 1, n_points_extra_final_grid ! r2 - k=k+1 - rij(1:3, 1, k) = final_grid_points (1:3, ipoint) - rij(1:3, 2, k) = final_grid_points_extra(1:3, jpoint) + do ipoint_block = 1, n_points_final_grid, 100 ! r1 + ipoint_end = min(n_points_final_grid, ipoint_block+100) + + k=0 + do ipoint = ipoint_block, ipoint_end + do jpoint = 1, n_points_extra_final_grid ! r2 + k=k+1 + rij(1:3, 1, k) = final_grid_points (1:3, ipoint) + rij(1:3, 2, k) = final_grid_points_extra(1:3, jpoint) + end do enddo - enddo + rc = qmckl_set_electron_coord(qmckl_ctx_jastrow, 'N', n_points, rij, n_points*6_8) + if (rc /= QMCKL_SUCCESS) then + print *, irp_here, 'qmckl error in set_electron_coord' + rc = qmckl_check(qmckl_ctx_jastrow, rc) + stop -1 + endif - rc = qmckl_set_electron_coord(qmckl_ctx_jastrow, 'N', n_points, rij, n_points*6_8) - if (rc /= QMCKL_SUCCESS) then - print *, irp_here, 'qmckl error in set_electron_coord' - rc = qmckl_check(qmckl_ctx_jastrow, rc) - stop -1 - endif + ! --- + ! e-e term + rc = qmckl_get_jastrow_champ_factor_ee_gl(qmckl_ctx_jastrow, gl, 8_8*n_points) + if (rc /= QMCKL_SUCCESS) then + print *, irp_here, ' qmckl error in fact_ee_gl' + rc = qmckl_check(qmckl_ctx_jastrow, rc) + stop -1 + endif - ! --- - ! e-e term - - rc = qmckl_get_jastrow_champ_factor_ee_gl(qmckl_ctx_jastrow, gl, 8_8*n_points) - if (rc /= QMCKL_SUCCESS) then - print *, irp_here, ' qmckl error in fact_ee_gl' - rc = qmckl_check(qmckl_ctx_jastrow, rc) - stop -1 - endif - - k=0 - do ipoint = 1, n_points_final_grid ! r1 - do jpoint = 1, n_points_extra_final_grid ! r2 - k=k+1 - grad1_u12_num(jpoint,ipoint,1) = gl(1,1,k) - grad1_u12_num(jpoint,ipoint,2) = gl(1,2,k) - grad1_u12_num(jpoint,ipoint,3) = gl(1,3,k) + k=0 + do ipoint = ipoint_block, ipoint_end + do jpoint = 1, n_points_extra_final_grid ! r2 + k=k+1 + grad1_u12_num(jpoint,ipoint,1) = gl(1,1,k) + grad1_u12_num(jpoint,ipoint,2) = gl(1,2,k) + grad1_u12_num(jpoint,ipoint,3) = gl(1,3,k) + enddo enddo - enddo - ! --- - ! e-e-n term + ! --- + ! e-e-n term -! rc = qmckl_get_jastrow_champ_factor_een_gl(qmckl_ctx_jastrow, gl, 8_8*n_points) -! if (rc /= QMCKL_SUCCESS) then -! print *, irp_here, 'qmckl error in fact_een_gl' -! rc = qmckl_check(qmckl_ctx_jastrow, rc) -! stop -1 -! endif +! rc = qmckl_get_jastrow_champ_factor_een_gl(qmckl_ctx_jastrow, gl, 8_8*n_points) +! if (rc /= QMCKL_SUCCESS) then +! print *, irp_here, 'qmckl error in fact_een_gl' +! rc = qmckl_check(qmckl_ctx_jastrow, rc) +! stop -1 +! endif ! -! k=0 -! do ipoint = 1, n_points_final_grid ! r1 -! do jpoint = 1, n_points_extra_final_grid ! r2 -! k=k+1 -! grad1_u12_num(jpoint,ipoint,1) = grad1_u12_num(jpoint,ipoint,1) + gl(1,1,k) -! grad1_u12_num(jpoint,ipoint,2) = grad1_u12_num(jpoint,ipoint,2) + gl(1,2,k) -! grad1_u12_num(jpoint,ipoint,3) = grad1_u12_num(jpoint,ipoint,3) + gl(1,3,k) -! enddo -! enddo +! k=0 +! do ipoint = 1, n_points_final_grid ! r1 +! do jpoint = 1, n_points_extra_final_grid ! r2 +! k=k+1 +! grad1_u12_num(jpoint,ipoint,1) = grad1_u12_num(jpoint,ipoint,1) + gl(1,1,k) +! grad1_u12_num(jpoint,ipoint,2) = grad1_u12_num(jpoint,ipoint,2) + gl(1,2,k) +! grad1_u12_num(jpoint,ipoint,3) = grad1_u12_num(jpoint,ipoint,3) + gl(1,3,k) +! enddo +! enddo ! --- ! e-n term - rc = qmckl_get_jastrow_champ_factor_en_gl(qmckl_ctx_jastrow, gl, 8_8*n_points) - if (rc /= QMCKL_SUCCESS) then - print *, irp_here, 'qmckl error in fact_en_gl' - rc = qmckl_check(qmckl_ctx_jastrow, rc) - stop -1 - endif + rc = qmckl_get_jastrow_champ_factor_en_gl(qmckl_ctx_jastrow, gl, 8_8*n_points) + if (rc /= QMCKL_SUCCESS) then + print *, irp_here, 'qmckl error in fact_en_gl' + rc = qmckl_check(qmckl_ctx_jastrow, rc) + stop -1 + endif - k=0 - do ipoint = 1, n_points_final_grid ! r1 - do jpoint = 1, n_points_extra_final_grid ! r2 - k = k+1 - grad1_u12_num(jpoint,ipoint,1) = grad1_u12_num(jpoint,ipoint,1) + f * gl(1,1,k) - grad1_u12_num(jpoint,ipoint,2) = grad1_u12_num(jpoint,ipoint,2) + f * gl(1,2,k) - grad1_u12_num(jpoint,ipoint,3) = grad1_u12_num(jpoint,ipoint,3) + f * gl(1,3,k) + k=0 + do ipoint = ipoint_block, ipoint_end ! r1 + do jpoint = 1, n_points_extra_final_grid ! r2 + k = k+1 + grad1_u12_num(jpoint,ipoint,1) = grad1_u12_num(jpoint,ipoint,1) + f * gl(1,1,k) + grad1_u12_num(jpoint,ipoint,2) = grad1_u12_num(jpoint,ipoint,2) + f * gl(1,2,k) + grad1_u12_num(jpoint,ipoint,3) = grad1_u12_num(jpoint,ipoint,3) + f * gl(1,3,k) + + dx = grad1_u12_num(jpoint,ipoint,1) + dy = grad1_u12_num(jpoint,ipoint,2) + dz = grad1_u12_num(jpoint,ipoint,3) + grad1_u12_squared_num(jpoint,ipoint) = dx*dx + dy*dy + dz*dz + enddo enddo - do jpoint = 1, n_points_extra_final_grid ! r2 - dx = grad1_u12_num(jpoint,ipoint,1) - dy = grad1_u12_num(jpoint,ipoint,2) - dz = grad1_u12_num(jpoint,ipoint,3) - grad1_u12_squared_num(jpoint,ipoint) = dx*dx + dy*dy + dz*dz - enddo - enddo + enddo !ipoint_block + + deallocate(gl, rij) From 8806aee2bd3498cc806f47be458ae1041e5d899a Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 27 Sep 2023 16:02:08 +0200 Subject: [PATCH 317/337] Update for qmckl-0.5.3 --- src/non_h_ints_mu/qmckl.irp.f | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/non_h_ints_mu/qmckl.irp.f b/src/non_h_ints_mu/qmckl.irp.f index b9802371..1df80457 100644 --- a/src/non_h_ints_mu/qmckl.irp.f +++ b/src/non_h_ints_mu/qmckl.irp.f @@ -6,11 +6,10 @@ BEGIN_PROVIDER [ integer*8, qmckl_ctx_jastrow ] ! Context for the QMCKL library END_DOC integer(qmckl_exit_code) :: rc - logical(c_bool) :: c_true = .True. qmckl_ctx_jastrow = qmckl_context_create() - rc = qmckl_set_jastrow_champ_spin_independent(qmckl_ctx_jastrow, c_true) + rc = qmckl_set_jastrow_champ_spin_independent(qmckl_ctx_jastrow, 1) rc = qmckl_check(qmckl_ctx_jastrow, rc) if (rc /= QMCKL_SUCCESS) stop -1 From 541d7f5ff91b246bd9454d4f14879ca54470e837 Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 3 Oct 2023 20:04:34 +0200 Subject: [PATCH 318/337] added attachment orbitals --- src/determinants/density_matrix.irp.f | 98 +++++++++++++++ src/determinants/dipole_moments.irp.f | 16 +-- src/tc_bi_ortho/tc_bi_ortho_prop.irp.f | 15 +++ src/tc_bi_ortho/tc_prop.irp.f | 1 + src/tools/attachement_orb.irp.f | 168 +++++++++++++++++++++++++ 5 files changed, 290 insertions(+), 8 deletions(-) create mode 100644 src/tools/attachement_orb.irp.f diff --git a/src/determinants/density_matrix.irp.f b/src/determinants/density_matrix.irp.f index ce4d96c2..46726df0 100644 --- a/src/determinants/density_matrix.irp.f +++ b/src/determinants/density_matrix.irp.f @@ -493,3 +493,101 @@ subroutine get_occupation_from_dets(istate,occupation) enddo end +BEGIN_PROVIDER [double precision, difference_dm, (mo_num, mo_num, N_states)] + implicit none + BEGIN_DOC +! difference_dm(i,j,istate) = dm(i,j,1) - dm(i,j,istate) + END_DOC + integer :: istate + do istate = 1, N_states + difference_dm(:,:,istate) = one_e_dm_mo_alpha(:,:,1) + one_e_dm_mo_beta(:,:,1) & + - (one_e_dm_mo_alpha(:,:,istate) + one_e_dm_mo_beta(:,:,istate)) + enddo +END_PROVIDER + + BEGIN_PROVIDER [double precision, difference_dm_eigvect, (mo_num, mo_num, N_states) ] +&BEGIN_PROVIDER [double precision, difference_dm_eigval, (mo_num, N_states) ] + implicit none + BEGIN_DOC +! eigenvalues and eigevenctors of the difference_dm + END_DOC + integer :: istate,i + do istate = 2, N_states + call lapack_diag(difference_dm_eigval(1,istate),difference_dm_eigvect(1,1,istate)& + ,difference_dm(1,1,istate),mo_num,mo_num) + print*,'Eigenvalues of difference_dm for state ',istate + do i = 1, mo_num + print*,i,difference_dm_eigval(i,istate) + enddo + enddo +END_PROVIDER + + BEGIN_PROVIDER [ integer , n_attachment, (N_states)] +&BEGIN_PROVIDER [ integer , n_dettachment, (N_states)] +&BEGIN_PROVIDER [ integer , list_attachment, (mo_num,N_states)] +&BEGIN_PROVIDER [ integer , list_dettachment, (mo_num,N_states)] + implicit none + integer :: i,istate + integer :: list_attachment_tmp(mo_num) + n_attachment = 0 + n_dettachment = 0 + do istate = 2, N_states + do i = 1, mo_num + if(difference_dm_eigval(i,istate).lt.0.d0)then ! dettachment_orbitals + n_dettachment(istate) += 1 + list_dettachment(n_dettachment(istate),istate) = i ! they are already sorted + else + n_attachment(istate) += 1 + list_attachment_tmp(n_attachment(istate)) = i ! they are not sorted + endif + enddo + ! sorting the attachment + do i = 0, n_attachment(istate) - 1 + list_attachment(i+1,istate) = list_attachment_tmp(n_attachment(istate) - i) + enddo + enddo + +END_PROVIDER + + BEGIN_PROVIDER [ double precision, attachment_numbers_sorted, (mo_num, N_states)] +&BEGIN_PROVIDER [ double precision, dettachment_numbers_sorted, (mo_num, N_states)] + implicit none + integer :: i,istate + do istate = 2, N_states + print*,'dettachment' + do i = 1, n_dettachment(istate) + dettachment_numbers_sorted(i,istate) = difference_dm_eigval(list_dettachment(i,istate),istate) + print*,i,list_dettachment(i,istate),dettachment_numbers_sorted(i,istate) + enddo + print*,'attachment' + do i = 1, n_attachment(istate) + attachment_numbers_sorted(i,istate) = difference_dm_eigval(list_attachment(i,istate),istate) + print*,i,list_attachment(i,istate),attachment_numbers_sorted(i,istate) + enddo + enddo + END_PROVIDER + + BEGIN_PROVIDER [ double precision, attachment_orbitals, (ao_num, mo_num, N_states)] +&BEGIN_PROVIDER [ double precision, dettachment_orbitals, (ao_num, mo_num, N_states)] + implicit none + integer :: i,j,k,istate + attachment_orbitals = 0.d0 + dettachment_orbitals = 0.d0 + do istate = 2, N_states + do i = 1, n_dettachment(istate) + do j = 1, mo_num + do k = 1, ao_num + dettachment_orbitals(k,list_dettachment(i,istate),istate) += mo_coef(k,j) * difference_dm_eigvect(j,list_dettachment(i,istate),istate) + enddo + enddo + enddo + do i = 1, n_attachment(istate) + do j = 1, mo_num + do k = 1, ao_num + attachment_orbitals(k,i,istate) += mo_coef(k,j) * difference_dm_eigvect(j,list_attachment(i,istate),istate) + enddo + enddo + enddo + enddo + +END_PROVIDER diff --git a/src/determinants/dipole_moments.irp.f b/src/determinants/dipole_moments.irp.f index e445c56b..dae04369 100644 --- a/src/determinants/dipole_moments.irp.f +++ b/src/determinants/dipole_moments.irp.f @@ -26,10 +26,10 @@ enddo enddo -! print*,'electron part for z_dipole = ',z_dipole_moment -! print*,'electron part for y_dipole = ',y_dipole_moment -! print*,'electron part for x_dipole = ',x_dipole_moment -! + print*,'electron part for z_dipole = ',z_dipole_moment + print*,'electron part for y_dipole = ',y_dipole_moment + print*,'electron part for x_dipole = ',x_dipole_moment + nuclei_part_z = 0.d0 nuclei_part_y = 0.d0 nuclei_part_x = 0.d0 @@ -38,10 +38,10 @@ nuclei_part_y += nucl_charge(i) * nucl_coord(i,2) nuclei_part_x += nucl_charge(i) * nucl_coord(i,1) enddo -! print*,'nuclei part for z_dipole = ',nuclei_part_z -! print*,'nuclei part for y_dipole = ',nuclei_part_y -! print*,'nuclei part for x_dipole = ',nuclei_part_x -! + print*,'nuclei part for z_dipole = ',nuclei_part_z + print*,'nuclei part for y_dipole = ',nuclei_part_y + print*,'nuclei part for x_dipole = ',nuclei_part_x + do istate = 1, N_states z_dipole_moment(istate) += nuclei_part_z y_dipole_moment(istate) += nuclei_part_y diff --git a/src/tc_bi_ortho/tc_bi_ortho_prop.irp.f b/src/tc_bi_ortho/tc_bi_ortho_prop.irp.f index 9168fb3d..a5fe9249 100644 --- a/src/tc_bi_ortho/tc_bi_ortho_prop.irp.f +++ b/src/tc_bi_ortho/tc_bi_ortho_prop.irp.f @@ -34,4 +34,19 @@ subroutine test do i= 1, 3 print*,tc_bi_ortho_dipole(i,1) enddo + integer, allocatable :: occ(:,:) + integer :: n_occ_ab(2) + allocate(occ(N_int*bit_kind_size,2)) + call bitstring_to_list_ab(ref_bitmask, occ, n_occ_ab, N_int) + integer :: ispin,j,jorb + double precision :: accu + accu = 0.d0 + do ispin=1, 2 + do i = 1, n_occ_ab(ispin) + jorb = occ(i,ispin) + accu += mo_bi_orth_bipole_z(jorb,jorb) + enddo + enddo + print*,'accu = ',accu + end diff --git a/src/tc_bi_ortho/tc_prop.irp.f b/src/tc_bi_ortho/tc_prop.irp.f index a13dc9a2..3375fed6 100644 --- a/src/tc_bi_ortho/tc_prop.irp.f +++ b/src/tc_bi_ortho/tc_prop.irp.f @@ -90,6 +90,7 @@ enddo enddo enddo + print*,'tc_bi_ortho_dipole(3) elec = ',tc_bi_ortho_dipole(3,1) nuclei_part = 0.d0 do m = 1, 3 diff --git a/src/tools/attachement_orb.irp.f b/src/tools/attachement_orb.irp.f new file mode 100644 index 00000000..92a51ca8 --- /dev/null +++ b/src/tools/attachement_orb.irp.f @@ -0,0 +1,168 @@ +program molden_detachment_attachment + implicit none + read_wf=.True. + touch read_wf + call molden_attachment +end + +subroutine molden_attachment + implicit none + BEGIN_DOC + ! Produces a Molden file + END_DOC + character*(128) :: output + integer :: i_unit_output,getUnitAndOpen + integer :: i,j,k,l + double precision, parameter :: a0 = 0.529177249d0 + + PROVIDE ezfio_filename + + output=trim(ezfio_filename)//'.attachement.mol' + print*,'output = ',trim(output) + + i_unit_output = getUnitAndOpen(output,'w') + + write(i_unit_output,'(A)') '[Molden Format]' + + write(i_unit_output,'(A)') '[Atoms] Angs' + do i = 1, nucl_num + write(i_unit_output,'(A2,2X,I4,2X,I4,3(2X,F15.10))') & + trim(element_name(int(nucl_charge(i)))), & + i, & + int(nucl_charge(i)), & + nucl_coord(i,1)*a0, nucl_coord(i,2)*a0, nucl_coord(i,3)*a0 + enddo + + write(i_unit_output,'(A)') '[GTO]' + + character*(1) :: character_shell + integer :: i_shell,i_prim,i_ao + integer :: iorder(ao_num) + integer :: nsort(ao_num) + + i_shell = 0 + i_prim = 0 + do i=1,nucl_num + write(i_unit_output,*) i, 0 + do j=1,nucl_num_shell_aos(i) + i_shell +=1 + i_ao = nucl_list_shell_aos(i,j) + character_shell = trim(ao_l_char(i_ao)) + write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00' + do k = 1, ao_prim_num(i_ao) + i_prim +=1 + write(i_unit_output,'(ES20.10,2X,ES20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k) + enddo + l = i_ao + do while ( ao_l(l) == ao_l(i_ao) ) + nsort(l) = i*10000 + j*100 + l += 1 + if (l > ao_num) exit + enddo + enddo + write(i_unit_output,*)'' + enddo + + + do i=1,ao_num + iorder(i) = i + ! p + if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 1 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 2 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 3 + ! d + else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 1 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 2 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 2 )) then + nsort(i) += 3 + else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 4 + else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 5 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 6 + ! f + else if ((ao_power(i,1) == 3 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 1 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 3 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 2 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 3 )) then + nsort(i) += 3 + else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 4 + else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 5 + else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 6 + else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 2 )) then + nsort(i) += 7 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 2 )) then + nsort(i) += 8 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 9 + else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 10 + ! g + else if ((ao_power(i,1) == 4 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 1 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 4 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 2 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 4 )) then + nsort(i) += 3 + else if ((ao_power(i,1) == 3 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 4 + else if ((ao_power(i,1) == 3 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 5 + else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 3 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 6 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 3 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 7 + else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 3 )) then + nsort(i) += 8 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 3 )) then + nsort(i) += 9 + else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 0 )) then + nsort(i) += 10 + else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 2 )) then + nsort(i) += 11 + else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 2 )) then + nsort(i) += 12 + else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 13 + else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 1 )) then + nsort(i) += 14 + else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 2 )) then + nsort(i) += 15 + endif + enddo + + call isort(nsort,iorder,ao_num) + write(i_unit_output,'(A)') '[MO]' + integer :: istate + istate = 2 + do i=1,n_dettachment(istate) + write (i_unit_output,*) 'Sym= 1' + write (i_unit_output,*) 'Ene=', dettachment_numbers_sorted(i,istate) + write (i_unit_output,*) 'Spin= Alpha' + write (i_unit_output,*) 'Occup=', dettachment_numbers_sorted(i,istate) + do j=1,ao_num + write(i_unit_output, '(I6,2X,ES20.10)') j, dettachment_orbitals(iorder(j),i,istate) + enddo + enddo + do i=1,n_attachment(istate) + write (i_unit_output,*) 'Sym= 1' + write (i_unit_output,*) 'Ene=', attachment_numbers_sorted(i,istate) + write (i_unit_output,*) 'Spin= Alpha' + write (i_unit_output,*) 'Occup=', attachment_numbers_sorted(i,istate) + do j=1,ao_num + write(i_unit_output, '(I6,2X,ES20.10)') j, attachment_orbitals(iorder(j),i,istate) + enddo + enddo + close(i_unit_output) +end + From aefd81dffe2e57d87ec463d98b0a8c7b76b478c2 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 5 Oct 2023 15:05:01 +0200 Subject: [PATCH 319/337] Updated EZFIO --- external/ezfio | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/ezfio b/external/ezfio index dba01c4f..66d3dd5d 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit dba01c4fe0ff7b84c5ecfb1c7c77ec68781311b3 +Subproject commit 66d3dd5d8e05ca564a0c815d636cb58d213a8828 From 2b62bfc999e0205b62910ed882829d9fa0320871 Mon Sep 17 00:00:00 2001 From: eginer Date: Fri, 6 Oct 2023 11:28:20 +0200 Subject: [PATCH 320/337] working on casscf_cipsi --- src/casscf_cipsi/EZFIO.cfg | 6 ++ src/casscf_cipsi/README.rst | 36 +++++++++++ src/casscf_cipsi/casscf.irp.f | 83 ++++++++++++++++++------- src/casscf_cipsi/densities.irp.f | 29 +++++++++ src/casscf_cipsi/mcscf_fock.irp.f | 13 +++- src/cipsi/stochastic_cipsi.irp.f | 5 +- src/fci/fci.irp.f | 4 +- src/mo_optimization/cipsi_orb_opt.irp.f | 4 +- 8 files changed, 153 insertions(+), 27 deletions(-) diff --git a/src/casscf_cipsi/EZFIO.cfg b/src/casscf_cipsi/EZFIO.cfg index 2a1f1926..18e0b6b1 100644 --- a/src/casscf_cipsi/EZFIO.cfg +++ b/src/casscf_cipsi/EZFIO.cfg @@ -73,3 +73,9 @@ type: logical doc: If |true|, the pt2_max value in the CIPSI iterations will automatically adapt, otherwise it is fixed at the value given in the EZFIO folder interface: ezfio,provider,ocaml default: True + +[small_active_space] +type: logical +doc: If |true|, the pt2_max value in the CIPSI is set to 10-10 and will not change +interface: ezfio,provider,ocaml +default: False diff --git a/src/casscf_cipsi/README.rst b/src/casscf_cipsi/README.rst index 08bfd95b..fb60f13f 100644 --- a/src/casscf_cipsi/README.rst +++ b/src/casscf_cipsi/README.rst @@ -3,3 +3,39 @@ casscf ====== |CASSCF| program with the CIPSI algorithm. + +Example of inputs +----------------- + +a) Small active space : standard CASSCF +--------------------------------------- +Let's do O2 (triplet) in aug-cc-pvdz with the following geometry (xyz format, Bohr units) +3 + + O 0.0000000000 0.0000000000 -1.1408000000 + O 0.0000000000 0.0000000000 1.1408000000 + +# Create the ezfio folder +qp create_ezfio -b aug-cc-pvdz O2.xyz -m 3 -a -o O2_avdz + +# Start with an ROHF guess +qp run scf | tee ${EZFIO_FILE}.rohf.out + +# Get the ROHF energy for check +qp get hartree_fock energy # should be -149.4684509 + +# Define the full valence active space: the two 1s are doubly occupied, the other 8 valence orbitals are active +# CASSCF(12e,10orb) +qp set_mo_class -c "[1-2]" -a "[3-10]" -v "[11-46]" + +# Specify that you want an near exact CASSCF, i.e. the CIPSI selection will stop at pt2_max = 10^-10 +qp set casscf_cipsi small_active_space True +# RUN THE CASSCF +qp run casscf | tee ${EZFIO_FILE}.casscf.out + + +b) Large active space : Exploit the selected CI in the active space +------------------------------------------------------------------- +Let us start from the small active space calculation orbitals and add another shell of + + diff --git a/src/casscf_cipsi/casscf.irp.f b/src/casscf_cipsi/casscf.irp.f index 02954ebf..68e5c4fb 100644 --- a/src/casscf_cipsi/casscf.irp.f +++ b/src/casscf_cipsi/casscf.irp.f @@ -8,17 +8,22 @@ program casscf ! touch no_vvvv_integrals n_det_max_full = 500 touch n_det_max_full - pt2_relative_error = 0.04 + if(small_active_space)then + pt2_relative_error = 0.00001 + else + pt2_relative_error = 0.04 + endif touch pt2_relative_error -! call run_stochastic_cipsi call run end subroutine run implicit none - double precision :: energy_old, energy, pt2_max_before, ept2_before,delta_E + double precision :: energy_old, energy, pt2_max_before,delta_E logical :: converged,state_following_casscf_cipsi_save - integer :: iteration + integer :: iteration,istate + double precision, allocatable :: E_PT2(:), PT2(:), Ev(:), ept2_before(:) + allocate(E_PT2(N_states), PT2(N_states), Ev(N_states), ept2_before(N_states)) converged = .False. energy = 0.d0 @@ -28,13 +33,19 @@ subroutine run state_following_casscf = .True. touch state_following_casscf ept2_before = 0.d0 - if(adaptive_pt2_max)then - pt2_max = 0.005 + if(small_active_space)then + pt2_max = 1.d-10 SOFT_TOUCH pt2_max + else + if(adaptive_pt2_max)then + pt2_max = 0.005 + SOFT_TOUCH pt2_max + endif endif do while (.not.converged) print*,'pt2_max = ',pt2_max - call run_stochastic_cipsi + call run_stochastic_cipsi(Ev,PT2) + E_PT2(1:N_states) = Ev(1:N_states) + PT2(1:N_states) energy_old = energy energy = eone+etwo+ecore pt2_max_before = pt2_max @@ -42,15 +53,13 @@ subroutine run call write_time(6) call write_int(6,iteration,'CAS-SCF iteration = ') call write_double(6,energy,'CAS-SCF energy = ') - if(n_states == 1)then - double precision :: E_PT2, PT2 - call ezfio_get_casscf_cipsi_energy_pt2(E_PT2) - call ezfio_get_casscf_cipsi_energy(PT2) - PT2 -= E_PT2 - call write_double(6,E_PT2,'E + PT2 energy = ') - call write_double(6,PT2,' PT2 = ') +! if(n_states == 1)then +! call ezfio_get_casscf_cipsi_energy_pt2(E_PT2) +! call ezfio_get_casscf_cipsi_energy(PT2) + call write_double(6,E_PT2(1:N_states),'E + PT2 energy = ') + call write_double(6,PT2(1:N_states),' PT2 = ') call write_double(6,pt2_max,' PT2_MAX = ') - endif +! endif print*,'' call write_double(6,norm_grad_vec2,'Norm of gradients = ') @@ -65,15 +74,20 @@ subroutine run else if (criterion_casscf == "gradients")then converged = norm_grad_vec2 < thresh_scf else if (criterion_casscf == "e_pt2")then - delta_E = dabs(E_PT2 - ept2_before) + delta_E = 0.d0 + do istate = 1, N_states + delta_E += dabs(E_PT2(istate) - ept2_before(istate)) + enddo converged = dabs(delta_E) < thresh_casscf endif ept2_before = E_PT2 - if(adaptive_pt2_max)then - pt2_max = dabs(energy_improvement / (pt2_relative_error)) - pt2_max = min(pt2_max, pt2_max_before) - if(n_act_orb.ge.n_big_act_orb)then - pt2_max = max(pt2_max,pt2_min_casscf) + if(.not.small_active_space)then + if(adaptive_pt2_max)then + pt2_max = dabs(energy_improvement / (pt2_relative_error)) + pt2_max = min(pt2_max, pt2_max_before) + if(n_act_orb.ge.n_big_act_orb)then + pt2_max = max(pt2_max,pt2_min_casscf) + endif endif endif print*,'' @@ -94,8 +108,10 @@ subroutine run read_wf = .True. call clear_mo_map SOFT_TOUCH mo_coef N_det psi_det psi_coef - if(adaptive_pt2_max)then - SOFT_TOUCH pt2_max + if(.not.small_active_space)then + if(adaptive_pt2_max)then + SOFT_TOUCH pt2_max + endif endif if(iteration .gt. 3)then state_following_casscf = state_following_casscf_cipsi_save @@ -104,6 +120,27 @@ subroutine run endif enddo + integer :: i +! print*,'Converged CASSCF ' +! print*,'--------------------------' +! write(6,*) ' occupation numbers of orbitals ' +! do i=1,mo_num +! write(6,*) i,occnum(i) +! end do +! +! write(6,*) +! write(6,*) ' the diagonal of the inactive effective Fock matrix ' +! write(6,'(5(i3,F12.5))') (i,Fipq(i,i),i=1,mo_num) +! write(6,*) + print*,'Fock ROHF ' + do i = 1, ao_num + write(33,*)fock_matrix_ao_alpha(i,1:ao_num) + enddo + print*,'Fock MCSCF' + do i = 1, ao_num + write(34,*)mcscf_fock_alpha(i,1:ao_num) + enddo + end diff --git a/src/casscf_cipsi/densities.irp.f b/src/casscf_cipsi/densities.irp.f index bebcf5d7..54ff86e1 100644 --- a/src/casscf_cipsi/densities.irp.f +++ b/src/casscf_cipsi/densities.irp.f @@ -17,6 +17,35 @@ BEGIN_PROVIDER [real*8, D0tu, (n_act_orb,n_act_orb) ] END_PROVIDER + BEGIN_PROVIDER [double precision, D0tu_alpha_ao, (ao_num, ao_num)] +&BEGIN_PROVIDER [double precision, D0tu_beta_ao, (ao_num, ao_num)] + implicit none + integer :: i,ii,j,u,t,uu,tt + double precision, allocatable :: D0_tmp_alpha(:,:),D0_tmp_beta(:,:) + allocate(D0_tmp_alpha(mo_num, mo_num),D0_tmp_beta(mo_num, mo_num)) + D0_tmp_beta = 0.d0 + D0_tmp_alpha = 0.d0 + do i = 1, n_core_inact_orb + ii = list_core_inact(i) + D0_tmp_alpha(ii,ii) = 1.d0 + D0_tmp_beta(ii,ii) = 1.d0 + enddo + print*,'Diagonal elements of the 1RDM in the active space' + do u=1,n_act_orb + uu = list_act(u) + print*,uu,one_e_dm_mo_alpha_average(uu,uu),one_e_dm_mo_beta_average(uu,uu) + do t=1,n_act_orb + tt = list_act(t) + D0_tmp_alpha(tt,uu) = one_e_dm_mo_alpha_average(tt,uu) + D0_tmp_beta(tt,uu) = one_e_dm_mo_beta_average(tt,uu) + enddo + enddo + + call mo_to_ao_no_overlap(D0_tmp_alpha,mo_num,D0tu_alpha_ao,ao_num) + call mo_to_ao_no_overlap(D0_tmp_beta,mo_num,D0tu_beta_ao,ao_num) + +END_PROVIDER + BEGIN_PROVIDER [real*8, P0tuvx, (n_act_orb,n_act_orb,n_act_orb,n_act_orb) ] BEGIN_DOC ! The second-order density matrix in the basis of the starting MOs ONLY IN THE RANGE OF ACTIVE MOS diff --git a/src/casscf_cipsi/mcscf_fock.irp.f b/src/casscf_cipsi/mcscf_fock.irp.f index e4568405..519dfff7 100644 --- a/src/casscf_cipsi/mcscf_fock.irp.f +++ b/src/casscf_cipsi/mcscf_fock.irp.f @@ -77,4 +77,15 @@ BEGIN_PROVIDER [real*8, Fapq, (mo_num,mo_num) ] END_PROVIDER - + BEGIN_PROVIDER [ double precision, mcscf_fock_alpha, (ao_num, ao_num)] +&BEGIN_PROVIDER [ double precision, mcscf_fock_beta, (ao_num, ao_num)] + implicit none + BEGIN_DOC + ! mcscf_fock_alpha are set to usual Fock like operator but computed with the MCSCF densities + END_DOC + SCF_density_matrix_ao_alpha = D0tu_alpha_ao + SCF_density_matrix_ao_beta = D0tu_beta_ao + soft_touch SCF_density_matrix_ao_alpha SCF_density_matrix_ao_beta + mcscf_fock_beta = fock_matrix_ao_beta + mcscf_fock_alpha = fock_matrix_ao_alpha +END_PROVIDER diff --git a/src/cipsi/stochastic_cipsi.irp.f b/src/cipsi/stochastic_cipsi.irp.f index 339f7084..3a895280 100644 --- a/src/cipsi/stochastic_cipsi.irp.f +++ b/src/cipsi/stochastic_cipsi.irp.f @@ -1,10 +1,11 @@ -subroutine run_stochastic_cipsi +subroutine run_stochastic_cipsi(Ev,PT2) use selection_types implicit none BEGIN_DOC ! Selected Full Configuration Interaction with Stochastic selection and PT2. END_DOC integer :: i,j,k + double precision, intent(out) :: Ev(N_states), PT2(N_states) double precision, allocatable :: zeros(:) integer :: to_select type(pt2_type) :: pt2_data, pt2_data_err @@ -139,6 +140,8 @@ subroutine run_stochastic_cipsi call print_mol_properties() call write_cipsi_json(pt2_data,pt2_data_err) endif + Ev(1:N_states) = psi_energy_with_nucl_rep(1:N_states) + PT2(1:N_states) = pt2_data % pt2(1:N_states) call pt2_dealloc(pt2_data) call pt2_dealloc(pt2_data_err) diff --git a/src/fci/fci.irp.f b/src/fci/fci.irp.f index bb2a93f8..9de48a01 100644 --- a/src/fci/fci.irp.f +++ b/src/fci/fci.irp.f @@ -41,8 +41,10 @@ program fci write(json_unit,json_array_open_fmt) 'fci' + double precision, allocatable :: Ev(:),PT2(:) + allocate(Ev(N_states), PT2(N_state)) if (do_pt2) then - call run_stochastic_cipsi + call run_stochastic_cipsi(Ev,PT2) else call run_cipsi endif diff --git a/src/mo_optimization/cipsi_orb_opt.irp.f b/src/mo_optimization/cipsi_orb_opt.irp.f index ae3aa1bf..7e3a79eb 100644 --- a/src/mo_optimization/cipsi_orb_opt.irp.f +++ b/src/mo_optimization/cipsi_orb_opt.irp.f @@ -11,11 +11,13 @@ subroutine run_optimization implicit none double precision :: e_cipsi, e_opt, delta_e + double precision, allocatable :: Ev(:),PT2(:) integer :: nb_iter,i logical :: not_converged character (len=100) :: filename PROVIDE psi_det psi_coef mo_two_e_integrals_in_map ao_pseudo_integrals + allocate(Ev(N_states),PT2(N_states)) not_converged = .True. nb_iter = 0 @@ -38,7 +40,7 @@ subroutine run_optimization print*,'' print*,'********** cipsi step **********' ! cispi calculation - call run_stochastic_cipsi + call run_stochastic_cipsi(Ev,PT2) ! State average energy after the cipsi step call state_average_energy(e_cipsi) From 106a2eafff25df0004f49395761fb107aad80d20 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 6 Oct 2023 11:46:24 +0200 Subject: [PATCH 321/337] Update qmckl --- configure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure b/configure index 893c7148..c328c4f0 100755 --- a/configure +++ b/configure @@ -231,7 +231,7 @@ EOF EOF elif [[ ${PACKAGE} = qmckl ]] ; then - VERSION=0.5.3 + VERSION=0.5.4 execute << EOF cd "\${QP_ROOT}"/external wget https://github.com/TREX-CoE/qmckl/releases/download/v${VERSION}/qmckl-${VERSION}.tar.gz From 1739ec4f4ad2a5ee438e5293100f3b3f2ec8c9d4 Mon Sep 17 00:00:00 2001 From: eginer Date: Fri, 6 Oct 2023 14:50:17 +0200 Subject: [PATCH 322/337] added some mcscf fock printing --- src/casscf_cipsi/README.rst | 2 + src/casscf_cipsi/casscf.irp.f | 22 +++--- src/casscf_cipsi/mcscf_fock.irp.f | 114 ++++++++++++++++++++++++++++-- 3 files changed, 121 insertions(+), 17 deletions(-) diff --git a/src/casscf_cipsi/README.rst b/src/casscf_cipsi/README.rst index fb60f13f..155d90da 100644 --- a/src/casscf_cipsi/README.rst +++ b/src/casscf_cipsi/README.rst @@ -39,3 +39,5 @@ b) Large active space : Exploit the selected CI in the active space Let us start from the small active space calculation orbitals and add another shell of + +TODO : print FOCK MCSCF NEW in the MO BASIS AT THE END OF THE CASSCF diff --git a/src/casscf_cipsi/casscf.irp.f b/src/casscf_cipsi/casscf.irp.f index 68e5c4fb..06a2bc52 100644 --- a/src/casscf_cipsi/casscf.irp.f +++ b/src/casscf_cipsi/casscf.irp.f @@ -121,24 +121,22 @@ subroutine run enddo integer :: i -! print*,'Converged CASSCF ' -! print*,'--------------------------' -! write(6,*) ' occupation numbers of orbitals ' -! do i=1,mo_num -! write(6,*) i,occnum(i) -! end do + print*,'Converged CASSCF ' + print*,'--------------------------' + write(6,*) ' occupation numbers of orbitals ' + do i=1,mo_num + write(6,*) i,occnum(i) + end do + print*,'--------------' ! ! write(6,*) ! write(6,*) ' the diagonal of the inactive effective Fock matrix ' ! write(6,'(5(i3,F12.5))') (i,Fipq(i,i),i=1,mo_num) ! write(6,*) - print*,'Fock ROHF ' - do i = 1, ao_num - write(33,*)fock_matrix_ao_alpha(i,1:ao_num) - enddo print*,'Fock MCSCF' - do i = 1, ao_num - write(34,*)mcscf_fock_alpha(i,1:ao_num) + do i = 1, mo_num + write(*,*)i,mcscf_fock_diag_mo(i) +! write(*,*)mcscf_fock_alpha_mo(i,i) enddo diff --git a/src/casscf_cipsi/mcscf_fock.irp.f b/src/casscf_cipsi/mcscf_fock.irp.f index 519dfff7..0f4b7a99 100644 --- a/src/casscf_cipsi/mcscf_fock.irp.f +++ b/src/casscf_cipsi/mcscf_fock.irp.f @@ -77,15 +77,119 @@ BEGIN_PROVIDER [real*8, Fapq, (mo_num,mo_num) ] END_PROVIDER - BEGIN_PROVIDER [ double precision, mcscf_fock_alpha, (ao_num, ao_num)] -&BEGIN_PROVIDER [ double precision, mcscf_fock_beta, (ao_num, ao_num)] + BEGIN_PROVIDER [ double precision, mcscf_fock_alpha_ao, (ao_num, ao_num)] +&BEGIN_PROVIDER [ double precision, mcscf_fock_beta_ao, (ao_num, ao_num)] implicit none BEGIN_DOC - ! mcscf_fock_alpha are set to usual Fock like operator but computed with the MCSCF densities + ! mcscf_fock_alpha_ao are set to usual Fock like operator but computed with the MCSCF densities on the AO basis END_DOC SCF_density_matrix_ao_alpha = D0tu_alpha_ao SCF_density_matrix_ao_beta = D0tu_beta_ao soft_touch SCF_density_matrix_ao_alpha SCF_density_matrix_ao_beta - mcscf_fock_beta = fock_matrix_ao_beta - mcscf_fock_alpha = fock_matrix_ao_alpha + mcscf_fock_beta_ao = fock_matrix_ao_beta + mcscf_fock_alpha_ao = fock_matrix_ao_alpha +END_PROVIDER + + + BEGIN_PROVIDER [ double precision, mcscf_fock_alpha_mo, (mo_num, mo_num)] +&BEGIN_PROVIDER [ double precision, mcscf_fock_beta_mo, (mo_num, mo_num)] + implicit none + BEGIN_DOC + ! Mo_mcscf_fock_alpha are set to usual Fock like operator but computed with the MCSCF densities on the MO basis + END_DOC + + call ao_to_mo(mcscf_fock_alpha_ao,ao_num,mcscf_fock_alpha_mo,mo_num) + call ao_to_mo(mcscf_fock_beta_ao,ao_num,mcscf_fock_beta_mo,mo_num) + +END_PROVIDER + + BEGIN_PROVIDER [ double precision, mcscf_fock_mo, (mo_num,mo_num) ] +&BEGIN_PROVIDER [ double precision, mcscf_fock_diag_mo, (mo_num)] + implicit none + BEGIN_DOC + ! MCSF Fock matrix on the MO basis. + ! For open shells, the ROHF Fock Matrix is :: + ! + ! | Rcc | F^b | Fcv | + ! |-----------------------| + ! | F^b | Roo | F^a | + ! |-----------------------| + ! | Fcv | F^a | Rvv | + ! + ! C: Core, O: Open, V: Virtual + ! + ! Rcc = Acc Fcc^a + Bcc Fcc^b + ! Roo = Aoo Foo^a + Boo Foo^b + ! Rvv = Avv Fvv^a + Bvv Fvv^b + ! Fcv = (F^a + F^b)/2 + ! + ! F^a: Fock matrix alpha (MO), F^b: Fock matrix beta (MO) + ! A,B: Coupling parameters + ! + ! J. Chem. Phys. 133, 141102 (2010), https://doi.org/10.1063/1.3503173 + ! Coupling parameters from J. Chem. Phys. 125, 204110 (2006); https://doi.org/10.1063/1.2393223. + ! cc oo vv + ! A -0.5 0.5 1.5 + ! B 1.5 0.5 -0.5 + ! + END_DOC + integer :: i,j,n + if (elec_alpha_num == elec_beta_num) then + mcscf_fock_mo = mcscf_fock_alpha_mo + else + ! Core + do j = 1, elec_beta_num + ! Core + do i = 1, elec_beta_num + mcscf_fock_mo(i,j) = - 0.5d0 * mcscf_fock_alpha_mo(i,j) & + + 1.5d0 * mcscf_fock_beta_mo(i,j) + enddo + ! Open + do i = elec_beta_num+1, elec_alpha_num + mcscf_fock_mo(i,j) = mcscf_fock_beta_mo(i,j) + enddo + ! Virtual + do i = elec_alpha_num+1, mo_num + mcscf_fock_mo(i,j) = 0.5d0 * mcscf_fock_alpha_mo(i,j) & + + 0.5d0 * mcscf_fock_beta_mo(i,j) + enddo + enddo + ! Open + do j = elec_beta_num+1, elec_alpha_num + ! Core + do i = 1, elec_beta_num + mcscf_fock_mo(i,j) = mcscf_fock_beta_mo(i,j) + enddo + ! Open + do i = elec_beta_num+1, elec_alpha_num + mcscf_fock_mo(i,j) = 0.5d0 * mcscf_fock_alpha_mo(i,j) & + + 0.5d0 * mcscf_fock_beta_mo(i,j) + enddo + ! Virtual + do i = elec_alpha_num+1, mo_num + mcscf_fock_mo(i,j) = mcscf_fock_alpha_mo(i,j) + enddo + enddo + ! Virtual + do j = elec_alpha_num+1, mo_num + ! Core + do i = 1, elec_beta_num + mcscf_fock_mo(i,j) = 0.5d0 * mcscf_fock_alpha_mo(i,j) & + + 0.5d0 * mcscf_fock_beta_mo(i,j) + enddo + ! Open + do i = elec_beta_num+1, elec_alpha_num + mcscf_fock_mo(i,j) = mcscf_fock_alpha_mo(i,j) + enddo + ! Virtual + do i = elec_alpha_num+1, mo_num + mcscf_fock_mo(i,j) = 1.5d0 * mcscf_fock_alpha_mo(i,j) & + - 0.5d0 * mcscf_fock_beta_mo(i,j) + enddo + enddo + endif + + do i = 1, mo_num + mcscf_fock_diag_mo(i) = mcscf_fock_mo(i,i) + enddo END_PROVIDER From d9b2298d9a0d380b2ea26b1702d7d0d805b8c06a Mon Sep 17 00:00:00 2001 From: eginer Date: Fri, 6 Oct 2023 15:36:38 +0200 Subject: [PATCH 323/337] improved casscf and added README.rst --- src/casscf_cipsi/README.rst | 12 ++++++++---- src/casscf_cipsi/casscf.irp.f | 2 ++ src/cipsi/stochastic_cipsi.irp.f | 4 ++-- src/two_body_rdm/state_av_act_2rdm.irp.f | 2 +- src/two_body_rdm/test_2_rdm.irp.f | 2 +- 5 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/casscf_cipsi/README.rst b/src/casscf_cipsi/README.rst index 155d90da..f84cde75 100644 --- a/src/casscf_cipsi/README.rst +++ b/src/casscf_cipsi/README.rst @@ -32,12 +32,16 @@ qp set_mo_class -c "[1-2]" -a "[3-10]" -v "[11-46]" qp set casscf_cipsi small_active_space True # RUN THE CASSCF qp run casscf | tee ${EZFIO_FILE}.casscf.out +# you should find around -149.7243542 b) Large active space : Exploit the selected CI in the active space ------------------------------------------------------------------- -Let us start from the small active space calculation orbitals and add another shell of +#Let us start from the small active space calculation orbitals and add another 10 virtuals: CASSCF(12e,20orb) +qp set_mo_class -c "[1-2]" -a "[3-20]" -v "[21-46]" +# As this active space is larger, you unset the small_active_space feature +qp set casscf_cipsi small_active_space False +# As it is a large active space, the energy convergence thereshold is set to be 0.0001 +qp run casscf | tee ${EZFIO_FILE}.casscf_large.out +# you should find around -149.9046 - - -TODO : print FOCK MCSCF NEW in the MO BASIS AT THE END OF THE CASSCF diff --git a/src/casscf_cipsi/casscf.irp.f b/src/casscf_cipsi/casscf.irp.f index 06a2bc52..ba4d8eea 100644 --- a/src/casscf_cipsi/casscf.irp.f +++ b/src/casscf_cipsi/casscf.irp.f @@ -11,6 +11,7 @@ program casscf if(small_active_space)then pt2_relative_error = 0.00001 else + thresh_scf = 1.d-4 pt2_relative_error = 0.04 endif touch pt2_relative_error @@ -45,6 +46,7 @@ subroutine run do while (.not.converged) print*,'pt2_max = ',pt2_max call run_stochastic_cipsi(Ev,PT2) + print*,'Ev,PT2',Ev(1),PT2(1) E_PT2(1:N_states) = Ev(1:N_states) + PT2(1:N_states) energy_old = energy energy = eone+etwo+ecore diff --git a/src/cipsi/stochastic_cipsi.irp.f b/src/cipsi/stochastic_cipsi.irp.f index 3a895280..289040f0 100644 --- a/src/cipsi/stochastic_cipsi.irp.f +++ b/src/cipsi/stochastic_cipsi.irp.f @@ -80,12 +80,14 @@ subroutine run_stochastic_cipsi(Ev,PT2) to_select = max(N_states_diag, to_select) + Ev(1:N_states) = psi_energy_with_nucl_rep(1:N_states) call pt2_dealloc(pt2_data) call pt2_dealloc(pt2_data_err) call pt2_alloc(pt2_data, N_states) call pt2_alloc(pt2_data_err, N_states) call ZMQ_pt2(psi_energy_with_nucl_rep,pt2_data,pt2_data_err,relative_error,to_select) ! Stochastic PT2 and selection + PT2(1:N_states) = pt2_data % pt2(1:N_states) correlation_energy_ratio = (psi_energy_with_nucl_rep(1) - hf_energy_ref) / & (psi_energy_with_nucl_rep(1) + pt2_data % rpt2(1) - hf_energy_ref) correlation_energy_ratio = min(1.d0,correlation_energy_ratio) @@ -140,8 +142,6 @@ subroutine run_stochastic_cipsi(Ev,PT2) call print_mol_properties() call write_cipsi_json(pt2_data,pt2_data_err) endif - Ev(1:N_states) = psi_energy_with_nucl_rep(1:N_states) - PT2(1:N_states) = pt2_data % pt2(1:N_states) call pt2_dealloc(pt2_data) call pt2_dealloc(pt2_data_err) diff --git a/src/two_body_rdm/state_av_act_2rdm.irp.f b/src/two_body_rdm/state_av_act_2rdm.irp.f index ea636212..e1bd6439 100644 --- a/src/two_body_rdm/state_av_act_2rdm.irp.f +++ b/src/two_body_rdm/state_av_act_2rdm.irp.f @@ -123,7 +123,7 @@ state_av_act_2_rdm_spin_trace_mo = state_av_act_2_rdm_ab_mo & + state_av_act_2_rdm_aa_mo & + state_av_act_2_rdm_bb_mo - +! ! call orb_range_2_rdm_state_av_openmp(state_av_act_2_rdm_spin_trace_mo,n_act_orb,n_act_orb,list_act,state_weights,ispin,psi_coef,size(psi_coef,2),size(psi_coef,1)) call wall_time(wall_2) diff --git a/src/two_body_rdm/test_2_rdm.irp.f b/src/two_body_rdm/test_2_rdm.irp.f index 4eb8f9f0..123261d8 100644 --- a/src/two_body_rdm/test_2_rdm.irp.f +++ b/src/two_body_rdm/test_2_rdm.irp.f @@ -2,7 +2,7 @@ program test_2_rdm implicit none read_wf = .True. touch read_wf -! call routine_active_only + call routine_active_only call routine_full_mos end From 999839b83938ff9797db92b777dcc6de88fbfda1 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 11 Oct 2023 09:42:10 +0200 Subject: [PATCH 324/337] Fixed reversed order in print of extrapolation --- external/ezfio | 2 +- src/iterations/print_extrapolation.irp.f | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/external/ezfio b/external/ezfio index 66d3dd5d..d5805497 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit 66d3dd5d8e05ca564a0c815d636cb58d213a8828 +Subproject commit d5805497fa0ef30e70e055cde1ecec2963303e93 diff --git a/src/iterations/print_extrapolation.irp.f b/src/iterations/print_extrapolation.irp.f index a7f85693..24c9845f 100644 --- a/src/iterations/print_extrapolation.irp.f +++ b/src/iterations/print_extrapolation.irp.f @@ -37,7 +37,7 @@ subroutine print_extrapolated_energy write(*,*) 'minimum PT2 ', 'Extrapolated energy ', ' Excitation (a.u) ', ' Excitation (eV) ' write(*,*) '=========== ', '=================== ', '=================== ', '===================' do k=2,N_iter_p - write(*,'(F11.4,X,3(X,F18.8))') pt2_iterations(i,k), extrapolated_energy(k,i), & + write(*,'(F11.4,X,3(X,F18.8))') pt2_iterations(i,N_iter_p+1-k), extrapolated_energy(k,i), & extrapolated_energy(k,i) - extrapolated_energy(k,1), & (extrapolated_energy(k,i) - extrapolated_energy(k,1) ) * 27.211396641308d0 enddo From bce700526d73f5b77e837bc8f34a453c3a504b92 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 11 Oct 2023 10:24:55 +0200 Subject: [PATCH 325/337] Better behavior when DSYGV Failed --- .../diagonalization_hs2_dressed.irp.f | 53 ++++++++++--------- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index 7b559925..1ead9d78 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -286,7 +286,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ ! Small h(N_st_diag*itermax,N_st_diag*itermax), & - h_p(N_st_diag*itermax,N_st_diag*itermax), & +! h_p(N_st_diag*itermax,N_st_diag*itermax), & y(N_st_diag*itermax,N_st_diag*itermax), & s_(N_st_diag*itermax,N_st_diag*itermax), & s_tmp(N_st_diag*itermax,N_st_diag*itermax), & @@ -340,7 +340,10 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ exit endif - do iter=1,itermax-1 + iter = 0 + do while (iter < itermax-1) + iter += 1 +! do iter=1,itermax-1 shift = N_st_diag*(iter-1) shift2 = N_st_diag*iter @@ -430,30 +433,30 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ call dgemm('T','N', shift2, shift2, sze, & 1.d0, U, size(U,1), W, size(W,1), & - 0.d0, h, size(h_p,1)) + 0.d0, h, size(h,1)) call dgemm('T','N', shift2, shift2, sze, & 1.d0, U, size(U,1), U, size(U,1), & 0.d0, s_tmp, size(s_tmp,1)) - ! Penalty method - ! -------------- - - if (s2_eig) then - h_p = s_ - do k=1,shift2 - h_p(k,k) = h_p(k,k) - expected_s2 - enddo - if (only_expected_s2) then - alpha = 0.1d0 - h_p = h + alpha*h_p - else - alpha = 0.0001d0 - h_p = h + alpha*h_p - endif - else - h_p = h - alpha = 0.d0 - endif +! ! Penalty method +! ! -------------- +! +! if (s2_eig) then +! h_p = s_ +! do k=1,shift2 +! h_p(k,k) = h_p(k,k) - expected_s2 +! enddo +! if (only_expected_s2) then +! alpha = 0.1d0 +! h_p = h + alpha*h_p +! else +! alpha = 0.0001d0 +! h_p = h + alpha*h_p +! endif +! else +! h_p = h +! alpha = 0.d0 +! endif ! Diagonalize h_p ! --------------- @@ -473,8 +476,10 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ call dsygv(1,'V','U',shift2,y,size(y,1), & s_tmp,size(s_tmp,1), lambda, work,lwork,info) deallocate(work) - if (info /= 0) then - stop 'DSYGV Diagonalization failed' + if (info > 0) then + ! Numerical errors propagate. We need to reduce the number of iterations + itermax = iter-1 + exit endif ! Compute Energy for each eigenvector From a64d02ab427a2b886e19390a33c4fa395f69ed9c Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 11 Oct 2023 15:45:51 +0200 Subject: [PATCH 326/337] trying to work on natorb --- bin/qp_reset | 2 + external/ezfio | 2 +- src/non_hermit_dav/biorthog.irp.f | 6 +- .../lapack_diag_non_hermit.irp.f | 90 ++++++++++++++----- src/tc_bi_ortho/save_tc_bi_ortho_nat.irp.f | 8 +- src/tc_scf/routines_rotates.irp.f | 1 + 6 files changed, 81 insertions(+), 28 deletions(-) diff --git a/bin/qp_reset b/bin/qp_reset index d94ab24c..b144c4ce 100755 --- a/bin/qp_reset +++ b/bin/qp_reset @@ -97,6 +97,8 @@ if [[ $dets -eq 1 ]] ; then rm --force -- ${ezfio}/determinants/psi_{det,coef}.gz rm --force -- ${ezfio}/determinants/n_det_qp_edit rm --force -- ${ezfio}/determinants/psi_{det,coef}_qp_edit.gz + rm --force -- ${ezfio}/tc_bi_ortho/psi_{l,r}_coef_bi_ortho.gz + fi if [[ $mos -eq 1 ]] ; then diff --git a/external/ezfio b/external/ezfio index d5805497..dba01c4f 160000 --- a/external/ezfio +++ b/external/ezfio @@ -1 +1 @@ -Subproject commit d5805497fa0ef30e70e055cde1ecec2963303e93 +Subproject commit dba01c4fe0ff7b84c5ecfb1c7c77ec68781311b3 diff --git a/src/non_hermit_dav/biorthog.irp.f b/src/non_hermit_dav/biorthog.irp.f index 78fddf54..da33f75a 100644 --- a/src/non_hermit_dav/biorthog.irp.f +++ b/src/non_hermit_dav/biorthog.irp.f @@ -331,7 +331,7 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei !thr = 100d0 thr = Im_thresh_tcscf do i = 1, n - !print*, 'Re(i) + Im(i)', WR(i), WI(i) + print*, 'Re(i) + Im(i)', WR(i), WI(i) if(dabs(WI(i)) .lt. thr) then n_good += 1 else @@ -405,7 +405,7 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv))/dble(n_real_eigv) .lt. thr_d) ) then - !print *, ' lapack vectors are normalized and bi-orthogonalized' + print *, ' lapack vectors are normalized and bi-orthogonalized' deallocate(S) return @@ -422,7 +422,7 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei else - !print *, ' lapack vectors are not normalized neither bi-orthogonalized' + print *, ' lapack vectors are not normalized neither bi-orthogonalized' ! --- diff --git a/src/non_hermit_dav/lapack_diag_non_hermit.irp.f b/src/non_hermit_dav/lapack_diag_non_hermit.irp.f index 0d652af4..6e5719c1 100644 --- a/src/non_hermit_dav/lapack_diag_non_hermit.irp.f +++ b/src/non_hermit_dav/lapack_diag_non_hermit.irp.f @@ -1857,7 +1857,7 @@ subroutine check_biorthog(n, m, Vl, Vr, accu_d, accu_nd, S, thr_d, thr_nd, stop_ integer :: i, j double precision, allocatable :: SS(:,:) - !print *, ' check bi-orthogonality' + print *, ' check bi-orthogonality' ! --- @@ -1865,10 +1865,10 @@ subroutine check_biorthog(n, m, Vl, Vr, accu_d, accu_nd, S, thr_d, thr_nd, stop_ , Vl, size(Vl, 1), Vr, size(Vr, 1) & , 0.d0, S, size(S, 1) ) - !print *, ' overlap matrix:' - !do i = 1, m - ! write(*,'(1000(F16.10,X))') S(i,:) - !enddo + print *, ' overlap matrix:' + do i = 1, m + write(*,'(1000(F16.10,X))') S(i,:) + enddo accu_d = 0.d0 accu_nd = 0.d0 @@ -1883,8 +1883,8 @@ subroutine check_biorthog(n, m, Vl, Vr, accu_d, accu_nd, S, thr_d, thr_nd, stop_ enddo accu_nd = dsqrt(accu_nd) / dble(m) - !print *, ' accu_nd = ', accu_nd - !print *, ' accu_d = ', dabs(accu_d-dble(m))/dble(m) + print *, ' accu_nd = ', accu_nd + print *, ' accu_d = ', dabs(accu_d-dble(m))/dble(m) ! --- @@ -1987,11 +1987,11 @@ subroutine impose_biorthog_degen_eigvec(n, e0, L0, R0) enddo enddo - !do i = 1, n - ! if(deg_num(i) .gt. 1) then - ! print *, ' degen on', i, deg_num(i), e0(i) - ! endif - !enddo + do i = 1, n + if(deg_num(i) .gt. 1) then + print *, ' degen on', i, deg_num(i), e0(i) + endif + enddo ! --- @@ -2010,7 +2010,7 @@ subroutine impose_biorthog_degen_eigvec(n, e0, L0, R0) ! --- - call impose_orthog_svd(n, m, L) +! call impose_orthog_svd(n, m, L) call impose_orthog_svd(n, m, R) !call impose_orthog_GramSchmidt(n, m, L) !call impose_orthog_GramSchmidt(n, m, R) @@ -2030,7 +2030,8 @@ subroutine impose_biorthog_degen_eigvec(n, e0, L0, R0) !call bi_ortho_s_inv_half(m, L, R, S_inv_half) !deallocate(S, S_inv_half) - call impose_biorthog_svd(n, m, L, R) +! call impose_biorthog_svd(n, m, L, R) +! call impose_biorthog_inverse(n, m, L, R) !call impose_biorthog_qr(n, m, thr_d, thr_nd, L, R) @@ -2045,6 +2046,7 @@ subroutine impose_biorthog_degen_eigvec(n, e0, L0, R0) endif enddo + call impose_biorthog_inverse(n, n, L0, R0) end subroutine impose_biorthog_degen_eigvec @@ -2420,10 +2422,10 @@ subroutine impose_biorthog_svd(n, m, L, R) , L, size(L, 1), R, size(R, 1) & , 0.d0, S, size(S, 1) ) - !print *, ' overlap bef SVD: ' - !do i = 1, m - ! write(*, '(1000(F16.10,X))') S(i,:) - !enddo + print *, ' overlap bef SVD: ' + do i = 1, m + write(*, '(1000(F16.10,X))') S(i,:) + enddo ! --- @@ -2495,10 +2497,10 @@ subroutine impose_biorthog_svd(n, m, L, R) , L, size(L, 1), R, size(R, 1) & , 0.d0, S, size(S, 1) ) - !print *, ' overlap aft SVD: ' - !do i = 1, m - ! write(*, '(1000(F16.10,X))') S(i,:) - !enddo + print *, ' overlap aft SVD: ' + do i = 1, m + write(*, '(1000(F16.10,X))') S(i,:) + enddo deallocate(S) @@ -2506,6 +2508,50 @@ subroutine impose_biorthog_svd(n, m, L, R) end subroutine impose_biorthog_svd +subroutine impose_biorthog_inverse(n, m, L, R) + + implicit none + + integer, intent(in) :: n, m + double precision, intent(inout) :: L(n,m) + double precision, intent(in) :: R(n,m) + double precision, allocatable :: Lt(:,:),S(:,:) + integer :: i,j + allocate(Lt(m,n)) + allocate(S(m,m)) + + call dgemm( 'T', 'N', m, m, n, 1.d0 & + , L, size(L, 1), R, size(R, 1) & + , 0.d0, S, size(S, 1) ) + + print *, ' overlap bef SVD: ' + do i = 1, m + write(*, '(1000(F16.10,X))') S(i,:) + enddo + + call get_pseudo_inverse(R,n,n,m,Lt,m,1.d-6) + do i = 1, m + do j = 1, n + L(j,i) = Lt(i,j) + enddo + enddo + ! --- + + call dgemm( 'T', 'N', m, m, n, 1.d0 & + , L, size(L, 1), R, size(R, 1) & + , 0.d0, S, size(S, 1) ) + + print *, ' overlap aft SVD: ' + do i = 1, m + write(*, '(1000(F16.10,X))') S(i,:) + enddo + + deallocate(S,Lt) + + +end subroutine impose_biorthog_svd + + ! --- subroutine impose_weighted_biorthog_qr(m, n, thr_d, thr_nd, Vl, W, Vr) diff --git a/src/tc_bi_ortho/save_tc_bi_ortho_nat.irp.f b/src/tc_bi_ortho/save_tc_bi_ortho_nat.irp.f index 47ade8df..6b239cfc 100644 --- a/src/tc_bi_ortho/save_tc_bi_ortho_nat.irp.f +++ b/src/tc_bi_ortho/save_tc_bi_ortho_nat.irp.f @@ -22,6 +22,7 @@ program tc_natorb_bi_ortho call print_energy_and_mos() call save_tc_natorb() + call print_angles_tc() !call minimize_tc_orb_angles() end @@ -35,9 +36,12 @@ subroutine save_tc_natorb() print*,'Saving the natorbs ' provide natorb_tc_leigvec_ao natorb_tc_reigvec_ao + mo_l_coef = natorb_tc_leigvec_ao + mo_r_coef = natorb_tc_reigvec_ao + touch mo_l_coef mo_r_coef - call ezfio_set_bi_ortho_mos_mo_l_coef(natorb_tc_leigvec_ao) - call ezfio_set_bi_ortho_mos_mo_r_coef(natorb_tc_reigvec_ao) + call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef) + call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef) call save_ref_determinant_nstates_1() call ezfio_set_determinants_read_wf(.False.) diff --git a/src/tc_scf/routines_rotates.irp.f b/src/tc_scf/routines_rotates.irp.f index 588382b5..cc825429 100644 --- a/src/tc_scf/routines_rotates.irp.f +++ b/src/tc_scf/routines_rotates.irp.f @@ -402,6 +402,7 @@ subroutine print_energy_and_mos(good_angles) print *, ' TC energy = ', TC_HF_energy print *, ' TC SCF energy gradient = ', grad_non_hermit print *, ' Max angle Left/right = ', max_angle_left_right + call print_angles_tc() if(max_angle_left_right .lt. thresh_lr_angle) then print *, ' Maximum angle BELOW 45 degrees, everthing is OK !' From 8a026209082893932c8b96ccca754f6cf3a3a88e Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 12 Oct 2023 16:15:17 +0200 Subject: [PATCH 327/337] minor modif --- src/fci/fci.irp.f | 2 +- src/non_hermit_dav/biorthog.irp.f | 82 ++++++++++++++-- .../lapack_diag_non_hermit.irp.f | 94 ++++++++++++++++++- src/tc_bi_ortho/tc_natorb.irp.f | 7 ++ 4 files changed, 176 insertions(+), 9 deletions(-) diff --git a/src/fci/fci.irp.f b/src/fci/fci.irp.f index 9de48a01..2059a53b 100644 --- a/src/fci/fci.irp.f +++ b/src/fci/fci.irp.f @@ -42,7 +42,7 @@ program fci write(json_unit,json_array_open_fmt) 'fci' double precision, allocatable :: Ev(:),PT2(:) - allocate(Ev(N_states), PT2(N_state)) + allocate(Ev(N_states), PT2(N_states)) if (do_pt2) then call run_stochastic_cipsi(Ev,PT2) else diff --git a/src/non_hermit_dav/biorthog.irp.f b/src/non_hermit_dav/biorthog.irp.f index da33f75a..13917c5a 100644 --- a/src/non_hermit_dav/biorthog.irp.f +++ b/src/non_hermit_dav/biorthog.irp.f @@ -270,7 +270,7 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei integer, intent(out) :: n_real_eigv double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n) - integer :: i, j + integer :: i, j,k integer :: n_good double precision :: thr, thr_cut, thr_diag, thr_norm double precision :: accu_d, accu_nd @@ -278,6 +278,8 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei integer, allocatable :: list_good(:), iorder(:) double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:) double precision, allocatable :: S(:,:) + double precision, allocatable :: phi_1_tilde(:),phi_2_tilde(:),chi_1_tilde(:),chi_2_tilde(:) + allocate(phi_1_tilde(n),phi_2_tilde(n),chi_1_tilde(n),chi_2_tilde(n)) ! ------------------------------------------------------------------------------------- @@ -301,11 +303,78 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei call lapack_diag_non_sym(n, A, WR, WI, VL, VR) !call lapack_diag_non_sym_new(n, A, WR, WI, VL, VR) - !print *, ' ' - !print *, ' eigenvalues' - !do i = 1, n - ! write(*, '(1000(F16.10,X))') WR(i), WI(i) - !enddo + + + print *, ' ' + print *, ' eigenvalues' + i = 1 + do while(i .le. n) + write(*, '(I3,X,1000(F16.10,X))')i, WR(i), WI(i) + if(.false.)then + if(WI(i).ne.0.d0)then + print*,'*****************' + print*,'WARNING ! IMAGINARY EIGENVALUES !!!' + write(*, '(1000(F16.10,X))') WR(i), WI(i+1) + ! phi = VR(:,i), psi = VR(:,i+1), |Phi_i> = phi + j psi , |Phi_i+1> = phi - j psi + ! chi = VL(:,i), xhi = VL(:,i+1), |Chi_i> = chi + j xhi , |Chi_i+1> = chi - j xhi + ! + accu_chi_phi = 0.d0 + accu_xhi_psi = 0.d0 + accu_chi_psi = 0.d0 + accu_xhi_phi = 0.d0 + double precision :: accu_chi_phi, accu_xhi_psi, accu_chi_psi, accu_xhi_phi + double precision :: mat_ovlp(2,2),eigval_tmp(2),eigvec(2,2),mat_ovlp_orig(2,2) + do j = 1, n + accu_chi_phi += VL(j,i) * VR(j,i) + accu_xhi_psi += VL(j,i+1) * VR(j,i+1) + accu_chi_psi += VL(j,i) * VR(j,i+1) + accu_xhi_phi += VL(j,i+1) * VR(j,i) + enddo + mat_ovlp_orig(1,1) = accu_chi_phi + mat_ovlp_orig(2,1) = accu_xhi_phi + mat_ovlp_orig(1,2) = accu_chi_psi + mat_ovlp_orig(2,2) = accu_xhi_psi + print*,'old overlap matrix ' + write(*,'(100(F16.10,X))')mat_ovlp_orig(1:2,1) + write(*,'(100(F16.10,X))')mat_ovlp_orig(1:2,2) + + + mat_ovlp(1,1) = accu_xhi_phi + mat_ovlp(2,1) = accu_chi_phi + mat_ovlp(1,2) = accu_xhi_psi + mat_ovlp(2,2) = accu_chi_psi + !print*,'accu_chi_phi = ',accu_chi_phi + !print*,'accu_xhi_psi = ',accu_xhi_psi + !print*,'accu_chi_psi = ',accu_chi_psi + !print*,'accu_xhi_phi = ',accu_xhi_phi + print*,'new overlap matrix ' + write(*,'(100(F16.10,X))')mat_ovlp(1:2,1) + write(*,'(100(F16.10,X))')mat_ovlp(1:2,2) + call lapack_diag(eigval_tmp,eigvec,mat_ovlp,2,2) + print*,'eigval_tmp(1) = ',eigval_tmp(1) + print*,'eigvec(1) = ',eigvec(1:2,1) + print*,'eigval_tmp(2) = ',eigval_tmp(2) + print*,'eigvec(2) = ',eigvec(1:2,2) + print*,'*****************' + phi_1_tilde = 0.d0 + phi_2_tilde = 0.d0 + chi_1_tilde = 0.d0 + chi_2_tilde = 0.d0 + do j = 1, n + phi_1_tilde(j) += VR(j,i) * eigvec(1,1) + VR(j,i+1) * eigvec(2,1) + phi_2_tilde(j) += VR(j,i) * eigvec(1,2) + VR(j,i+1) * eigvec(2,2) + chi_1_tilde(j) += VL(j,i+1) * eigvec(1,1) + VL(j,i) * eigvec(2,1) + chi_2_tilde(j) += VL(j,i+1) * eigvec(1,2) + VL(j,i) * eigvec(2,2) + enddo + VR(1:n,i) = phi_1_tilde(1:n) + VR(1:n,i+1) = phi_2_tilde(1:n) +! Vl(1:n,i) = -chi_1_tilde(1:n) +! Vl(1:n,i+1) = chi_2_tilde(1:n) + i+=1 + endif + endif + i+=1 + enddo !print *, ' right eigenvect bef' !do i = 1, n ! write(*, '(1000(F16.10,X))') VR(:,i) @@ -429,6 +498,7 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei ! call impose_orthog_degen_eigvec(n, eigval, reigvec) ! call impose_orthog_degen_eigvec(n, eigval, leigvec) + call reorder_degen_eigvec(n, eigval, leigvec, reigvec) call impose_biorthog_degen_eigvec(n, eigval, leigvec, reigvec) diff --git a/src/non_hermit_dav/lapack_diag_non_hermit.irp.f b/src/non_hermit_dav/lapack_diag_non_hermit.irp.f index 6e5719c1..836bf707 100644 --- a/src/non_hermit_dav/lapack_diag_non_hermit.irp.f +++ b/src/non_hermit_dav/lapack_diag_non_hermit.irp.f @@ -1944,6 +1944,96 @@ subroutine check_orthog(n, m, V, accu_d, accu_nd, S) end subroutine check_orthog ! --- +subroutine reorder_degen_eigvec(n, e0, L0, R0) + + implicit none + + integer, intent(in) :: n + double precision, intent(in) :: e0(n) + double precision, intent(inout) :: L0(n,n), R0(n,n) + + logical :: complex_root + integer :: i, j, k, m + double precision :: ei, ej, de, de_thr + double precision :: accu_d, accu_nd + integer, allocatable :: deg_num(:) + double precision, allocatable :: L(:,:), R(:,:), S(:,:), S_inv_half(:,:) + + ! --- + + allocate( deg_num(n) ) + do i = 1, n + deg_num(i) = 1 + enddo + + de_thr = thr_degen_tc + + do i = 1, n-1 + ei = e0(i) + + ! already considered in degen vectors + if(deg_num(i).eq.0) cycle + + do j = i+1, n + ej = e0(j) + de = dabs(ei - ej) + + if(de .lt. de_thr) then + deg_num(i) = deg_num(i) + 1 + deg_num(j) = 0 + endif + + enddo + enddo + + do i = 1, n + if(deg_num(i) .gt. 1) then + print *, ' degen on', i, deg_num(i), e0(i) + endif + enddo + + ! --- + + do i = 1, n + m = deg_num(i) + + if(m .gt. 1) then + + allocate(L(n,m)) + allocate(R(n,m),S(m,m)) + + do j = 1, m + L(1:n,j) = L0(1:n,i+j-1) + R(1:n,j) = R0(1:n,i+j-1) + enddo + + call dgemm( 'T', 'N', m, m, n, 1.d0 & + , L, size(L, 1), R, size(R, 1) & + , 0.d0, S, size(S, 1) ) + print*,'Overlap matrix ' + accu_nd = 0.D0 + do j = 1, m + write(*,'(100(F16.10,X))')S(1:m,j) + do k = 1, m + if(j==k)cycle + accu_nd += dabs(S(j,k)) + enddo + enddo + print*,'accu_nd = ',accu_nd +! if(accu_nd .gt.1.d-10)then +! stop +! endif + do j = 1, m + L0(1:n,i+j-1) = L(1:n,j) + R0(1:n,i+j-1) = R(1:n,j) + enddo + + deallocate(L, R,S) + + endif + enddo + +end subroutine reorder_degen_eigvec subroutine impose_biorthog_degen_eigvec(n, e0, L0, R0) @@ -2030,7 +2120,7 @@ subroutine impose_biorthog_degen_eigvec(n, e0, L0, R0) !call bi_ortho_s_inv_half(m, L, R, S_inv_half) !deallocate(S, S_inv_half) -! call impose_biorthog_svd(n, m, L, R) + call impose_biorthog_svd(n, m, L, R) ! call impose_biorthog_inverse(n, m, L, R) !call impose_biorthog_qr(n, m, thr_d, thr_nd, L, R) @@ -2046,7 +2136,7 @@ subroutine impose_biorthog_degen_eigvec(n, e0, L0, R0) endif enddo - call impose_biorthog_inverse(n, n, L0, R0) +! call impose_biorthog_inverse(n, n, L0, R0) end subroutine impose_biorthog_degen_eigvec diff --git a/src/tc_bi_ortho/tc_natorb.irp.f b/src/tc_bi_ortho/tc_natorb.irp.f index a72d356a..17238231 100644 --- a/src/tc_bi_ortho/tc_natorb.irp.f +++ b/src/tc_bi_ortho/tc_natorb.irp.f @@ -32,6 +32,13 @@ thr_d = 1.d-6 thr_nd = 1.d-6 thr_deg = 1.d-3 + do i = 1, mo_num + do j = 1, mo_num + if(dabs(dm_tmp(j,i)).lt.thr_d)then + dm_tmp(j,i) = 0.d0 + endif + enddo + enddo ! if(n_core_orb.ne.0)then ! call diag_mat_per_fock_degen_core( fock_diag, dm_tmp, list_core, n_core_orb, mo_num, thr_d, thr_nd, thr_deg & ! , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval) From a4a799837bc7dba8ca2c7e39abc6716056cf5de7 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 13 Oct 2023 15:10:51 +0200 Subject: [PATCH 328/337] Fix natorb with numerical integrals of Jastrow --- configure | 19 +++++++++++++++++-- src/tc_bi_ortho/save_tc_bi_ortho_nat.irp.f | 15 ++++++++++++++- src/tc_bi_ortho/tc_natorb.irp.f | 6 ++++++ 3 files changed, 37 insertions(+), 3 deletions(-) diff --git a/configure b/configure index c328c4f0..3ccdf37b 100755 --- a/configure +++ b/configure @@ -211,6 +211,7 @@ EOF execute << EOF cd "\${QP_ROOT}"/external wget https://github.com/TREX-CoE/trexio/releases/download/v${VERSION}/trexio-${VERSION}.tar.gz + rm -rf trexio-${VERSION} tar -zxf trexio-${VERSION}.tar.gz && rm trexio-${VERSION}.tar.gz cd trexio-${VERSION} ./configure --prefix=\${QP_ROOT} --without-hdf5 CFLAGS='-g' @@ -224,6 +225,7 @@ EOF execute << EOF cd "\${QP_ROOT}"/external wget https://github.com/TREX-CoE/trexio/releases/download/v${VERSION}/trexio-${VERSION}.tar.gz + rm -rf trexio-${VERSION} tar -zxf trexio-${VERSION}.tar.gz && rm trexio-${VERSION}.tar.gz cd trexio-${VERSION} ./configure --prefix=\${QP_ROOT} CFLAGS="-g" @@ -235,11 +237,24 @@ EOF execute << EOF cd "\${QP_ROOT}"/external wget https://github.com/TREX-CoE/qmckl/releases/download/v${VERSION}/qmckl-${VERSION}.tar.gz + rm -rf qmckl-${VERSION} tar -zxf qmckl-${VERSION}.tar.gz && rm qmckl-${VERSION}.tar.gz cd qmckl-${VERSION} ./configure --prefix=\${QP_ROOT} --enable-hpc --disable-doc CFLAGS='-g' make && make -j 4 check && make install EOF + elif [[ ${PACKAGE} = qmckl-intel ]] ; then + + VERSION=0.5.4 + execute << EOF + cd "\${QP_ROOT}"/external + wget https://github.com/TREX-CoE/qmckl/releases/download/v${VERSION}/qmckl-${VERSION}.tar.gz + rm -rf qmckl-${VERSION} + tar -zxf qmckl-${VERSION}.tar.gz && rm qmckl-${VERSION}.tar.gz + cd qmckl-${VERSION} + ./configure --prefix=\${QP_ROOT} --enable-hpc --disable-doc --with-icc --with-ifort CFLAGS='-g' + make && make -j 4 check && make install +EOF elif [[ ${PACKAGE} = gmp ]] ; then @@ -378,13 +393,13 @@ fi TREXIO=$(find_lib -ltrexio) if [[ ${TREXIO} = $(not_found) ]] ; then - error "TREXIO (trexio,trexio-nohdf5) is not installed. If you don't have HDF5, use trexio-nohdf5" + error "TREXIO (trexio | trexio-nohdf5) is not installed. If you don't have HDF5, use trexio-nohdf5" fail fi QMCKL=$(find_lib -lqmckl) if [[ ${QMCKL} = $(not_found) ]] ; then - error "QMCkl (qmckl) is not installed." + error "QMCkl (qmckl | qmckl-intel) is not installed." fail fi diff --git a/src/tc_bi_ortho/save_tc_bi_ortho_nat.irp.f b/src/tc_bi_ortho/save_tc_bi_ortho_nat.irp.f index 6b239cfc..ffcd9b22 100644 --- a/src/tc_bi_ortho/save_tc_bi_ortho_nat.irp.f +++ b/src/tc_bi_ortho/save_tc_bi_ortho_nat.irp.f @@ -15,7 +15,20 @@ program tc_natorb_bi_ortho PROVIDE tc_grid1_a tc_grid1_r my_n_pt_r_grid = tc_grid1_r my_n_pt_a_grid = tc_grid1_a - touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + + if(j1b_type .ge. 100) then + my_extra_grid_becke = .True. + PROVIDE tc_grid2_a tc_grid2_r + my_n_pt_r_extra_grid = tc_grid2_r + my_n_pt_a_extra_grid = tc_grid2_a + touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid + + call write_int(6, my_n_pt_r_extra_grid, 'radial internal grid over') + call write_int(6, my_n_pt_a_extra_grid, 'angular internal grid over') + endif + + read_wf = .True. touch read_wf diff --git a/src/tc_bi_ortho/tc_natorb.irp.f b/src/tc_bi_ortho/tc_natorb.irp.f index a72d356a..50f448d6 100644 --- a/src/tc_bi_ortho/tc_natorb.irp.f +++ b/src/tc_bi_ortho/tc_natorb.irp.f @@ -29,6 +29,12 @@ write(*, '(100(F16.10,X))') -dm_tmp(:,i) enddo + print *, ' Transition density matrix AO' + do i = 1, ao_num + write(*, '(100(F16.10,X))') tc_transition_matrix_ao(:,i,1,1) + enddo + stop + thr_d = 1.d-6 thr_nd = 1.d-6 thr_deg = 1.d-3 From 676d5c3a7366bb11e98d6f42905e52904f986e86 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 15 Oct 2023 14:01:49 +0200 Subject: [PATCH 329/337] Fixed missing variables in openmp block --- src/ao_one_e_ints/kin_ao_ints.irp.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ao_one_e_ints/kin_ao_ints.irp.f b/src/ao_one_e_ints/kin_ao_ints.irp.f index a5ee0670..3a97d095 100644 --- a/src/ao_one_e_ints/kin_ao_ints.irp.f +++ b/src/ao_one_e_ints/kin_ao_ints.irp.f @@ -52,7 +52,7 @@ !$OMP DEFAULT(NONE) & !$OMP PRIVATE(A_center,B_center,power_A,power_B,& !$OMP overlap_y, overlap_z, overlap, & - !$OMP alpha, beta,i,j,c,d_a_2,d_2,deriv_tmp, & + !$OMP alpha, beta, n, l, i,j,c,d_a_2,d_2,deriv_tmp, & !$OMP overlap_x0,overlap_y0,overlap_z0) & !$OMP SHARED(nucl_coord,ao_power,ao_prim_num, & !$OMP ao_deriv2_x,ao_deriv2_y,ao_deriv2_z,ao_num,ao_coef_normalized_ordered_transp,ao_nucl, & From 8b34372baa47feb8f4dfb350eb6d24cbc08e1d61 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 16 Oct 2023 16:18:58 +0200 Subject: [PATCH 330/337] Merged erf modules, and moved mu_erf into hamiltonian module --- scripts/module/module_handler.py | 4 +--- src/ao_tc_eff_map/NEED | 2 +- src/ao_two_e_erf_ints/EZFIO.cfg | 13 ------------ src/ao_two_e_erf_ints/NEED | 1 - src/ao_two_e_erf_ints/README.rst | 19 ------------------ src/ao_two_e_ints/EZFIO.cfg | 7 +++++++ src/ao_two_e_ints/NEED | 1 + .../integrals_erf_in_map_slave.irp.f | 0 .../map_integrals_erf.irp.f | 0 .../providers_ao_erf.irp.f | 0 .../routines_save_integrals_erf.irp.f | 0 .../two_e_integrals_erf.irp.f | 0 src/dft_one_e/NEED | 2 -- src/dummy/NEED | 3 +-- src/hamiltonian/EZFIO.cfg | 8 ++++++++ src/hamiltonian/NEED | 0 src/hamiltonian/README.rst | 5 +++++ src/mo_two_e_erf_ints/EZFIO.cfg | 6 ------ src/mo_two_e_erf_ints/NEED | 3 --- src/mo_two_e_erf_ints/README.rst | 20 ------------------- src/mo_two_e_ints/EZFIO.cfg | 7 +++++++ .../core_quantities_erf.irp.f | 0 .../ints_erf_3_index.irp.f | 0 .../map_integrals_erf.irp.f | 0 .../mo_bi_integrals_erf.irp.f | 0 .../routines_save_integrals_erf.irp.f | 0 src/tools/NEED | 1 - 27 files changed, 31 insertions(+), 71 deletions(-) delete mode 100644 src/ao_two_e_erf_ints/EZFIO.cfg delete mode 100644 src/ao_two_e_erf_ints/NEED delete mode 100644 src/ao_two_e_erf_ints/README.rst rename src/{ao_two_e_erf_ints => ao_two_e_ints}/integrals_erf_in_map_slave.irp.f (100%) rename src/{ao_two_e_erf_ints => ao_two_e_ints}/map_integrals_erf.irp.f (100%) rename src/{ao_two_e_erf_ints => ao_two_e_ints}/providers_ao_erf.irp.f (100%) rename src/{ao_two_e_erf_ints => ao_two_e_ints}/routines_save_integrals_erf.irp.f (100%) rename src/{ao_two_e_erf_ints => ao_two_e_ints}/two_e_integrals_erf.irp.f (100%) create mode 100644 src/hamiltonian/EZFIO.cfg create mode 100644 src/hamiltonian/NEED create mode 100644 src/hamiltonian/README.rst delete mode 100644 src/mo_two_e_erf_ints/EZFIO.cfg delete mode 100644 src/mo_two_e_erf_ints/NEED delete mode 100644 src/mo_two_e_erf_ints/README.rst rename src/{mo_two_e_erf_ints => mo_two_e_ints}/core_quantities_erf.irp.f (100%) rename src/{mo_two_e_erf_ints => mo_two_e_ints}/ints_erf_3_index.irp.f (100%) rename src/{mo_two_e_erf_ints => mo_two_e_ints}/map_integrals_erf.irp.f (100%) rename src/{mo_two_e_erf_ints => mo_two_e_ints}/mo_bi_integrals_erf.irp.f (100%) rename src/{mo_two_e_erf_ints => mo_two_e_ints}/routines_save_integrals_erf.irp.f (100%) diff --git a/scripts/module/module_handler.py b/scripts/module/module_handler.py index fbdee171..43030fc8 100755 --- a/scripts/module/module_handler.py +++ b/scripts/module/module_handler.py @@ -115,9 +115,7 @@ def get_l_module_descendant(d_child, l_module): except KeyError: print("Error: ", file=sys.stderr) print("`{0}` is not a submodule".format(module), file=sys.stderr) - print("Check the typo (spelling, case, '/', etc.) ", file=sys.stderr) -# pass - sys.exit(1) + raise return list(set(l)) diff --git a/src/ao_tc_eff_map/NEED b/src/ao_tc_eff_map/NEED index d9edb325..f768b75f 100644 --- a/src/ao_tc_eff_map/NEED +++ b/src/ao_tc_eff_map/NEED @@ -1,4 +1,4 @@ -ao_two_e_erf_ints +ao_two_e_ints mo_one_e_ints ao_many_one_e_ints dft_utils_in_r diff --git a/src/ao_two_e_erf_ints/EZFIO.cfg b/src/ao_two_e_erf_ints/EZFIO.cfg deleted file mode 100644 index 0af0e1d8..00000000 --- a/src/ao_two_e_erf_ints/EZFIO.cfg +++ /dev/null @@ -1,13 +0,0 @@ -[io_ao_two_e_integrals_erf] -type: Disk_access -doc: Read/Write |AO| integrals with the long range interaction from/to disk [ Write | Read | None ] -interface: ezfio,provider,ocaml -default: None - -[mu_erf] -type: double precision -doc: cutting of the interaction in the range separated model -interface: ezfio,provider,ocaml -default: 0.5 -ezfio_name: mu_erf - diff --git a/src/ao_two_e_erf_ints/NEED b/src/ao_two_e_erf_ints/NEED deleted file mode 100644 index b30cc39d..00000000 --- a/src/ao_two_e_erf_ints/NEED +++ /dev/null @@ -1 +0,0 @@ -ao_two_e_ints diff --git a/src/ao_two_e_erf_ints/README.rst b/src/ao_two_e_erf_ints/README.rst deleted file mode 100644 index 45c72b84..00000000 --- a/src/ao_two_e_erf_ints/README.rst +++ /dev/null @@ -1,19 +0,0 @@ -====================== -ao_two_e_erf_ints -====================== - -Here, all two-electron integrals (:math:`erf(\mu r_{12})/r_{12}`) are computed. -As they have 4 indices and many are zero, they are stored in a map, as defined -in :file:`utils/map_module.f90`. - -The main parameter of this module is :option:`ao_two_e_erf_ints mu_erf` which is the range-separation parameter. - -To fetch an |AO| integral, use the -`get_ao_two_e_integral_erf(i,j,k,l,ao_integrals_erf_map)` function. - - -The conventions are: -* For |AO| integrals : (ij|kl) = (11|22) = = <12|12> - - - diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg index 9c017813..a489516e 100644 --- a/src/ao_two_e_ints/EZFIO.cfg +++ b/src/ao_two_e_ints/EZFIO.cfg @@ -35,3 +35,10 @@ type: logical doc: Perform Cholesky decomposition of AO integrals interface: ezfio,provider,ocaml default: False + +[io_ao_two_e_integrals_erf] +type: Disk_access +doc: Read/Write |AO| erf integrals from/to disk [ Write | Read | None ] +interface: ezfio,provider,ocaml +default: None + diff --git a/src/ao_two_e_ints/NEED b/src/ao_two_e_ints/NEED index ffc5e8be..542962ec 100644 --- a/src/ao_two_e_ints/NEED +++ b/src/ao_two_e_ints/NEED @@ -1,3 +1,4 @@ +hamiltonian ao_one_e_ints pseudo bitmask diff --git a/src/ao_two_e_erf_ints/integrals_erf_in_map_slave.irp.f b/src/ao_two_e_ints/integrals_erf_in_map_slave.irp.f similarity index 100% rename from src/ao_two_e_erf_ints/integrals_erf_in_map_slave.irp.f rename to src/ao_two_e_ints/integrals_erf_in_map_slave.irp.f diff --git a/src/ao_two_e_erf_ints/map_integrals_erf.irp.f b/src/ao_two_e_ints/map_integrals_erf.irp.f similarity index 100% rename from src/ao_two_e_erf_ints/map_integrals_erf.irp.f rename to src/ao_two_e_ints/map_integrals_erf.irp.f diff --git a/src/ao_two_e_erf_ints/providers_ao_erf.irp.f b/src/ao_two_e_ints/providers_ao_erf.irp.f similarity index 100% rename from src/ao_two_e_erf_ints/providers_ao_erf.irp.f rename to src/ao_two_e_ints/providers_ao_erf.irp.f diff --git a/src/ao_two_e_erf_ints/routines_save_integrals_erf.irp.f b/src/ao_two_e_ints/routines_save_integrals_erf.irp.f similarity index 100% rename from src/ao_two_e_erf_ints/routines_save_integrals_erf.irp.f rename to src/ao_two_e_ints/routines_save_integrals_erf.irp.f diff --git a/src/ao_two_e_erf_ints/two_e_integrals_erf.irp.f b/src/ao_two_e_ints/two_e_integrals_erf.irp.f similarity index 100% rename from src/ao_two_e_erf_ints/two_e_integrals_erf.irp.f rename to src/ao_two_e_ints/two_e_integrals_erf.irp.f diff --git a/src/dft_one_e/NEED b/src/dft_one_e/NEED index 615ee97e..667859a5 100644 --- a/src/dft_one_e/NEED +++ b/src/dft_one_e/NEED @@ -4,6 +4,4 @@ mo_one_e_ints mo_two_e_ints ao_one_e_ints ao_two_e_ints -mo_two_e_erf_ints -ao_two_e_erf_ints mu_of_r diff --git a/src/dummy/NEED b/src/dummy/NEED index 3d5eb1f7..1dcb7a25 100644 --- a/src/dummy/NEED +++ b/src/dummy/NEED @@ -1,6 +1,5 @@ ao_basis ao_one_e_ints -ao_two_e_erf_ints ao_two_e_ints aux_quantities becke_numerical_grid @@ -24,13 +23,13 @@ functionals generators_cas generators_full hartree_fock +hamiltonian iterations kohn_sham kohn_sham_rs mo_basis mo_guess mo_one_e_ints -mo_two_e_erf_ints mo_two_e_ints mpi nuclei diff --git a/src/hamiltonian/EZFIO.cfg b/src/hamiltonian/EZFIO.cfg new file mode 100644 index 00000000..672bfdfa --- /dev/null +++ b/src/hamiltonian/EZFIO.cfg @@ -0,0 +1,8 @@ +[mu_erf] +type: double precision +doc: cutting of the interaction in the range separated model +interface: ezfio,provider,ocaml +default: 0.5 +ezfio_name: mu_erf + + diff --git a/src/hamiltonian/NEED b/src/hamiltonian/NEED new file mode 100644 index 00000000..e69de29b diff --git a/src/hamiltonian/README.rst b/src/hamiltonian/README.rst new file mode 100644 index 00000000..c237f8d2 --- /dev/null +++ b/src/hamiltonian/README.rst @@ -0,0 +1,5 @@ +=========== +hamiltonian +=========== + +Parameters of the Hamiltonian. diff --git a/src/mo_two_e_erf_ints/EZFIO.cfg b/src/mo_two_e_erf_ints/EZFIO.cfg deleted file mode 100644 index 57137e65..00000000 --- a/src/mo_two_e_erf_ints/EZFIO.cfg +++ /dev/null @@ -1,6 +0,0 @@ -[io_mo_two_e_integrals_erf] -type: Disk_access -doc: Read/Write MO integrals with the long range interaction from/to disk [ Write | Read | None ] -interface: ezfio,provider,ocaml -default: None - diff --git a/src/mo_two_e_erf_ints/NEED b/src/mo_two_e_erf_ints/NEED deleted file mode 100644 index 7adb17a1..00000000 --- a/src/mo_two_e_erf_ints/NEED +++ /dev/null @@ -1,3 +0,0 @@ -ao_two_e_erf_ints -mo_two_e_ints -mo_basis diff --git a/src/mo_two_e_erf_ints/README.rst b/src/mo_two_e_erf_ints/README.rst deleted file mode 100644 index b118e0c7..00000000 --- a/src/mo_two_e_erf_ints/README.rst +++ /dev/null @@ -1,20 +0,0 @@ -====================== -mo_two_e_erf_ints -====================== - -Here, all two-electron integrals (:math:`erf({\mu}_{erf} * r_{12})/r_{12}`) are computed. -As they have 4 indices and many are zero, they are stored in a map, as defined -in :file:`Utils/map_module.f90`. - -The range separation parameter :math:`{\mu}_{erf}` is the variable :option:`ao_two_e_erf_ints mu_erf`. - -To fetch an |MO| integral, use -`get_mo_two_e_integral_erf(i,j,k,l,mo_integrals_map_erf)` - -The conventions are: - -* For |MO| integrals : = <12|12> - -Be aware that it might not be the same conventions for |MO| and |AO| integrals. - - diff --git a/src/mo_two_e_ints/EZFIO.cfg b/src/mo_two_e_ints/EZFIO.cfg index ea47c51c..088a2416 100644 --- a/src/mo_two_e_ints/EZFIO.cfg +++ b/src/mo_two_e_ints/EZFIO.cfg @@ -17,3 +17,10 @@ doc: If `True`, computes all integrals except for the integrals having 3 or 4 vi interface: ezfio,provider,ocaml default: false +[io_mo_two_e_integrals_erf] +type: Disk_access +doc: Read/Write MO integrals with the long range interaction from/to disk [ Write | Read | None ] +interface: ezfio,provider,ocaml +default: None + + diff --git a/src/mo_two_e_erf_ints/core_quantities_erf.irp.f b/src/mo_two_e_ints/core_quantities_erf.irp.f similarity index 100% rename from src/mo_two_e_erf_ints/core_quantities_erf.irp.f rename to src/mo_two_e_ints/core_quantities_erf.irp.f diff --git a/src/mo_two_e_erf_ints/ints_erf_3_index.irp.f b/src/mo_two_e_ints/ints_erf_3_index.irp.f similarity index 100% rename from src/mo_two_e_erf_ints/ints_erf_3_index.irp.f rename to src/mo_two_e_ints/ints_erf_3_index.irp.f diff --git a/src/mo_two_e_erf_ints/map_integrals_erf.irp.f b/src/mo_two_e_ints/map_integrals_erf.irp.f similarity index 100% rename from src/mo_two_e_erf_ints/map_integrals_erf.irp.f rename to src/mo_two_e_ints/map_integrals_erf.irp.f diff --git a/src/mo_two_e_erf_ints/mo_bi_integrals_erf.irp.f b/src/mo_two_e_ints/mo_bi_integrals_erf.irp.f similarity index 100% rename from src/mo_two_e_erf_ints/mo_bi_integrals_erf.irp.f rename to src/mo_two_e_ints/mo_bi_integrals_erf.irp.f diff --git a/src/mo_two_e_erf_ints/routines_save_integrals_erf.irp.f b/src/mo_two_e_ints/routines_save_integrals_erf.irp.f similarity index 100% rename from src/mo_two_e_erf_ints/routines_save_integrals_erf.irp.f rename to src/mo_two_e_ints/routines_save_integrals_erf.irp.f diff --git a/src/tools/NEED b/src/tools/NEED index 0f4e17b0..ea465e92 100644 --- a/src/tools/NEED +++ b/src/tools/NEED @@ -1,5 +1,4 @@ fci -mo_two_e_erf_ints aux_quantities hartree_fock two_body_rdm From ad498b073e9103d1c0fdd4f17426c274aacefce8 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 16 Oct 2023 16:29:28 +0200 Subject: [PATCH 331/337] Added use_only_lr for long-range only integrals --- src/ao_two_e_ints/EZFIO.cfg | 5 +++++ src/ao_two_e_ints/two_e_integrals.irp.f | 16 ++++++++-------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/ao_two_e_ints/EZFIO.cfg b/src/ao_two_e_ints/EZFIO.cfg index a489516e..ff932b0c 100644 --- a/src/ao_two_e_ints/EZFIO.cfg +++ b/src/ao_two_e_ints/EZFIO.cfg @@ -42,3 +42,8 @@ doc: Read/Write |AO| erf integrals from/to disk [ Write | Read | None ] interface: ezfio,provider,ocaml default: None +[use_only_lr] +type: logical +doc: If true, use only the long range part of the two-electron integrals instead of 1/r12 +interface: ezfio, provider, ocaml +default: False diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f index 148ebb62..b55b5f0d 100644 --- a/src/ao_two_e_ints/two_e_integrals.irp.f +++ b/src/ao_two_e_ints/two_e_integrals.irp.f @@ -21,9 +21,9 @@ double precision function ao_two_e_integral(i, j, k, l) double precision :: P_new(0:max_dim,3),P_center(3),fact_p,pp double precision :: Q_new(0:max_dim,3),Q_center(3),fact_q,qq - double precision :: ao_two_e_integral_schwartz_accel - - double precision :: ao_two_e_integral_cosgtos + double precision, external :: ao_two_e_integral_erf + double precision, external :: ao_two_e_integral_cosgtos + double precision, external :: ao_two_e_integral_schwartz_accel if(use_cosgtos) then @@ -31,13 +31,15 @@ double precision function ao_two_e_integral(i, j, k, l) ao_two_e_integral = ao_two_e_integral_cosgtos(i, j, k, l) - else + else if (use_only_lr) then - if (ao_prim_num(i) * ao_prim_num(j) * ao_prim_num(k) * ao_prim_num(l) > 1024 ) then + ao_two_e_integral = ao_two_e_integral_erf(i, j, k, l) + + else if (ao_prim_num(i) * ao_prim_num(j) * ao_prim_num(k) * ao_prim_num(l) > 1024 ) then ao_two_e_integral = ao_two_e_integral_schwartz_accel(i,j,k,l) - else + else dim1 = n_pt_max_integrals @@ -117,8 +119,6 @@ double precision function ao_two_e_integral(i, j, k, l) enddo ! q enddo ! p - endif - endif endif From 14d5268d1b986654c90d328c293b0d28ec6a05fa Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Mon, 16 Oct 2023 16:37:08 +0200 Subject: [PATCH 332/337] Fixing compilation --- src/ao_two_e_ints/providers_ao_erf.irp.f | 2 +- src/ao_two_e_ints/routines_save_integrals_erf.irp.f | 2 +- src/dft_one_e/mu_erf_dft.irp.f | 2 +- src/mo_two_e_ints/mo_bi_integrals_erf.irp.f | 2 +- src/mo_two_e_ints/routines_save_integrals_erf.irp.f | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ao_two_e_ints/providers_ao_erf.irp.f b/src/ao_two_e_ints/providers_ao_erf.irp.f index 293df29f..ddc1ec45 100644 --- a/src/ao_two_e_ints/providers_ao_erf.irp.f +++ b/src/ao_two_e_ints/providers_ao_erf.irp.f @@ -90,7 +90,7 @@ BEGIN_PROVIDER [ logical, ao_two_e_integrals_erf_in_map ] if (write_ao_two_e_integrals_erf) then call ezfio_set_work_empty(.False.) call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints_erf',ao_integrals_erf_map) - call ezfio_set_ao_two_e_erf_ints_io_ao_two_e_integrals_erf("Read") + call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals_erf("Read") endif END_PROVIDER diff --git a/src/ao_two_e_ints/routines_save_integrals_erf.irp.f b/src/ao_two_e_ints/routines_save_integrals_erf.irp.f index 4b0cfad0..d980bc05 100644 --- a/src/ao_two_e_ints/routines_save_integrals_erf.irp.f +++ b/src/ao_two_e_ints/routines_save_integrals_erf.irp.f @@ -4,7 +4,7 @@ subroutine save_erf_two_e_integrals_ao PROVIDE ao_two_e_integrals_erf_in_map call ezfio_set_work_empty(.False.) call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints_erf',ao_integrals_erf_map) - call ezfio_set_ao_two_e_erf_ints_io_ao_two_e_integrals_erf('Read') + call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals_erf('Read') end subroutine save_erf_two_e_ints_ao_into_ints_ao diff --git a/src/dft_one_e/mu_erf_dft.irp.f b/src/dft_one_e/mu_erf_dft.irp.f index 0b870564..08779f0e 100644 --- a/src/dft_one_e/mu_erf_dft.irp.f +++ b/src/dft_one_e/mu_erf_dft.irp.f @@ -3,7 +3,7 @@ BEGIN_PROVIDER [double precision, mu_erf_dft] BEGIN_DOC ! range separation parameter used in RS-DFT. ! -! It is set to mu_erf in order to be consistent with the module "ao_two_e_erf_ints" +! It is set to mu_erf in order to be consistent with the module "hamiltonian" END_DOC mu_erf_dft = mu_erf diff --git a/src/mo_two_e_ints/mo_bi_integrals_erf.irp.f b/src/mo_two_e_ints/mo_bi_integrals_erf.irp.f index e009b7d9..e7765d71 100644 --- a/src/mo_two_e_ints/mo_bi_integrals_erf.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals_erf.irp.f @@ -55,7 +55,7 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_erf_in_map ] if (write_mo_two_e_integrals_erf) then call ezfio_set_work_empty(.False.) call map_save_to_disk(trim(ezfio_filename)//'/work/mo_ints_erf',mo_integrals_erf_map) - call ezfio_set_mo_two_e_erf_ints_io_mo_two_e_integrals_erf("Read") + call ezfio_set_mo_two_e_ints_io_mo_two_e_integrals_erf("Read") endif END_PROVIDER diff --git a/src/mo_two_e_ints/routines_save_integrals_erf.irp.f b/src/mo_two_e_ints/routines_save_integrals_erf.irp.f index 52fb8f63..9915b206 100644 --- a/src/mo_two_e_ints/routines_save_integrals_erf.irp.f +++ b/src/mo_two_e_ints/routines_save_integrals_erf.irp.f @@ -4,7 +4,7 @@ subroutine save_erf_two_e_integrals_mo PROVIDE mo_two_e_integrals_erf_in_map call ezfio_set_work_empty(.False.) call map_save_to_disk(trim(ezfio_filename)//'/work/mo_ints_erf',mo_integrals_erf_map) - call ezfio_set_mo_two_e_erf_ints_io_mo_two_e_integrals_erf('Read') + call ezfio_set_mo_two_e_ints_io_mo_two_e_integrals_erf('Read') end From d4d42f2851f815ff520f78c3bcd7615b8240dcc2 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 17 Oct 2023 17:52:43 +0200 Subject: [PATCH 333/337] Fixing tests --- src/kohn_sham_rs/61.rsks.bats | 2 +- src/tc_scf/11.tc_scf.bats | 46 +++++++++++++++++------------------ 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/kohn_sham_rs/61.rsks.bats b/src/kohn_sham_rs/61.rsks.bats index 90b82142..29d43c3b 100644 --- a/src/kohn_sham_rs/61.rsks.bats +++ b/src/kohn_sham_rs/61.rsks.bats @@ -13,7 +13,7 @@ function run() { qp set scf_utils thresh_scf 1.e-10 qp set dft_keywords exchange_functional $functional qp set dft_keywords correlation_functional $functional - qp set ao_two_e_erf_ints mu_erf 0.5 + qp set hamiltonian mu_erf 0.5 qp set becke_numerical_grid grid_type_sgn 1 qp_reset --mos $1 qp run rs_ks_scf diff --git a/src/tc_scf/11.tc_scf.bats b/src/tc_scf/11.tc_scf.bats index 91b52540..b81c2f4b 100644 --- a/src/tc_scf/11.tc_scf.bats +++ b/src/tc_scf/11.tc_scf.bats @@ -8,15 +8,15 @@ function run_Ne() { rm -rf Ne_tc_scf echo Ne > Ne.xyz qp create_ezfio -b cc-pcvdz Ne.xyz -o Ne_tc_scf - qp run scf + qp run scf - qp set ao_two_e_erf_ints mu_erf 0.87 + qp set hamiltonian mu_erf 0.87 qp set tc_keywords j1b_type 3 - qp set tc_keywords j1b_pen [1.5] - qp set tc_keywords bi_ortho True + qp set tc_keywords j1b_pen [1.5] + qp set tc_keywords bi_ortho True qp set tc_keywords test_cycle_tc True - qp run tc_scf | tee ${EZFIO_FILE}.tc_scf.out + qp run tc_scf | tee ${EZFIO_FILE}.tc_scf.out eref=-128.552134 energy="$(qp get tc_scf bitc_energy)" eq $energy $eref 1e-6 @@ -24,22 +24,22 @@ function run_Ne() { @test "Ne" { - run_Ne + run_Ne } function run_C() { rm -rf C_tc_scf echo C > C.xyz qp create_ezfio -b cc-pcvdz C.xyz -o C_tc_scf -m 3 - qp run scf + qp run scf - qp set ao_two_e_erf_ints mu_erf 0.87 + qp set hamiltonian mu_erf 0.87 qp set tc_keywords j1b_type 3 - qp set tc_keywords j1b_pen [1.5] - qp set tc_keywords bi_ortho True + qp set tc_keywords j1b_pen [1.5] + qp set tc_keywords bi_ortho True qp set tc_keywords test_cycle_tc True - qp run tc_scf | tee ${EZFIO_FILE}.tc_scf.out + qp run tc_scf | tee ${EZFIO_FILE}.tc_scf.out eref=-37.691254356408791 energy="$(qp get tc_scf bitc_energy)" eq $energy $eref 1e-6 @@ -47,7 +47,7 @@ function run_C() { @test "C" { - run_C + run_C } @@ -55,15 +55,15 @@ function run_O() { rm -rf O_tc_scf echo O > O.xyz qp create_ezfio -b cc-pcvdz O.xyz -o O_tc_scf -m 3 - qp run scf + qp run scf - qp set ao_two_e_erf_ints mu_erf 0.87 + qp set hamiltonian mu_erf 0.87 qp set tc_keywords j1b_type 3 - qp set tc_keywords j1b_pen [1.5] - qp set tc_keywords bi_ortho True + qp set tc_keywords j1b_pen [1.5] + qp set tc_keywords bi_ortho True qp set tc_keywords test_cycle_tc True - qp run tc_scf | tee ${EZFIO_FILE}.tc_scf.out + qp run tc_scf | tee ${EZFIO_FILE}.tc_scf.out eref=-74.814687229354590 energy="$(qp get tc_scf bitc_energy)" eq $energy $eref 1e-6 @@ -71,7 +71,7 @@ function run_O() { @test "O" { - run_O + run_O } @@ -79,16 +79,16 @@ function run_O() { function run_ch2() { rm -rf ch2_tc_scf cp ${QP_ROOT}/tests/input/ch2.xyz . - qp create_ezfio -b "C:cc-pcvdz|H:cc-pvdz" ch2.xyz -o ch2_tc_scf - qp run scf + qp create_ezfio -b "C:cc-pcvdz|H:cc-pvdz" ch2.xyz -o ch2_tc_scf + qp run scf - qp set ao_two_e_erf_ints mu_erf 0.87 + qp set hamiltonian mu_erf 0.87 qp set tc_keywords j1b_type 3 qp set tc_keywords j1b_pen '[1.5,10000,10000]' qp set tc_keywords bi_ortho True qp set tc_keywords test_cycle_tc True - qp run tc_scf | tee ${EZFIO_FILE}.tc_scf.out + qp run tc_scf | tee ${EZFIO_FILE}.tc_scf.out eref=-38.903247818077737 energy="$(qp get tc_scf bitc_energy)" eq $energy $eref 1e-6 @@ -96,6 +96,6 @@ function run_ch2() { @test "ch2" { - run_ch2 + run_ch2 } From 16565bbda4955d024d758e78e3fda592f834337c Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 17 Oct 2023 23:28:08 +0200 Subject: [PATCH 334/337] Fixing tests --- src/basis_correction/51.basis_c.bats | 4 ++-- src/casscf_cipsi/50.casscf.bats | 4 ++-- src/cis/20.cis.bats | 10 +++++----- src/cisd/30.cisd.bats | 4 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/basis_correction/51.basis_c.bats b/src/basis_correction/51.basis_c.bats index 2682361b..914b482b 100644 --- a/src/basis_correction/51.basis_c.bats +++ b/src/basis_correction/51.basis_c.bats @@ -10,8 +10,8 @@ function run() { qp set perturbation do_pt2 False qp set determinants n_det_max 8000 qp set determinants n_states 1 - qp set davidson threshold_davidson 1.e-10 - qp set davidson n_states_diag 8 + qp set davidson_keywords threshold_davidson 1.e-10 + qp set davidson_keywords n_states_diag 8 qp run fci energy1="$(ezfio get fci energy | tr '[]' ' ' | cut -d ',' -f 1)" eq $energy1 $1 $thresh diff --git a/src/casscf_cipsi/50.casscf.bats b/src/casscf_cipsi/50.casscf.bats index a0db725d..9f63dfe2 100644 --- a/src/casscf_cipsi/50.casscf.bats +++ b/src/casscf_cipsi/50.casscf.bats @@ -9,8 +9,8 @@ function run_stoch() { test_exe casscf || skip qp set perturbation do_pt2 True qp set determinants n_det_max $3 - qp set davidson threshold_davidson 1.e-10 - qp set davidson n_states_diag 4 + qp set davidson_keywords threshold_davidson 1.e-10 + qp set davidson_keywords n_states_diag 4 qp run casscf | tee casscf.out energy1="$(ezfio get casscf energy_pt2 | tr '[]' ' ' | cut -d ',' -f 1)" eq $energy1 $1 $thresh diff --git a/src/cis/20.cis.bats b/src/cis/20.cis.bats index 4f255c7b..4a5c6e45 100644 --- a/src/cis/20.cis.bats +++ b/src/cis/20.cis.bats @@ -9,7 +9,7 @@ function run() { qp set_file $1 qp edit --check qp set determinants n_states 3 - qp set davidson threshold_davidson 1.e-12 + qp set davidson_keywords threshold_davidson 1.e-12 qp set mo_two_e_ints io_mo_two_e_integrals Write qp set_frozen_core qp run cis @@ -59,7 +59,7 @@ function run() { @test "ClO" { # 1.65582s 2.06465s [[ -n $TRAVIS ]] && skip - run clo.ezfio -534.263560525680 -534.256601571199 -534.062020844428 + run clo.ezfio -534.2635737789097 -534.2566081298855 -534.0620070783308 } @test "SO" { # 1.9667s 2.91234s @@ -69,7 +69,7 @@ function run() { @test "OH" { # 2.201s 2.65573s [[ -n $TRAVIS ]] && skip - run oh.ezfio -75.4314648243896 -75.4254639668256 -75.2707675632313 + run oh.ezfio -75.4314822573358 -75.4254733392003 -75.2707586997333 } @test "H2O2" { # 2.27079s 3.07875s @@ -109,7 +109,7 @@ function run() { @test "DHNO" { # 6.42976s 12.9899s [[ -n $TRAVIS ]] && skip - run dhno.ezfio -130.4472288472718 -130.3571808164850 -130.2196257046987 + run dhno.ezfio -130.447238897118 -130.357186843611 -130.219626716369 } @test "CH4" { # 6.4969s 10.9157s @@ -129,7 +129,7 @@ function run() { @test "[Cu(NH3)4]2+" { # 29.7711s 3.45478m [[ -n ${TRAVIS} ]] && skip - run cu_nh3_4_2plus.ezfio -1862.97958885180 -1862.92457657404 -1862.91134959451 + run cu_nh3_4_2plus.ezfio -1862.97958844302 -1862.92454785007 -1862.91130869967 } diff --git a/src/cisd/30.cisd.bats b/src/cisd/30.cisd.bats index 6b8fddb6..fefc3e6d 100644 --- a/src/cisd/30.cisd.bats +++ b/src/cisd/30.cisd.bats @@ -8,8 +8,8 @@ function run() { test_exe cisd || skip qp edit --check qp set determinants n_states 2 - qp set davidson threshold_davidson 1.e-12 - qp set davidson n_states_diag 24 + qp set davidson_keywords threshold_davidson 1.e-12 + qp set davidson_keywords n_states_diag 24 qp run cis qp run cisd energy1="$(qp get cisd energy | tr '[]' ' ' | cut -d ',' -f 1)" From 50800f41c39eb45901a91d458840cc7953f73b49 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 18 Oct 2023 00:13:10 +0200 Subject: [PATCH 335/337] Fixing tests --- src/cisd/30.cisd.bats | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/cisd/30.cisd.bats b/src/cisd/30.cisd.bats index fefc3e6d..5ec11e4b 100644 --- a/src/cisd/30.cisd.bats +++ b/src/cisd/30.cisd.bats @@ -10,8 +10,7 @@ function run() { qp set determinants n_states 2 qp set davidson_keywords threshold_davidson 1.e-12 qp set davidson_keywords n_states_diag 24 - qp run cis - qp run cisd + qp run cisd energy1="$(qp get cisd energy | tr '[]' ' ' | cut -d ',' -f 1)" energy2="$(qp get cisd energy | tr '[]' ' ' | cut -d ',' -f 2)" eq $energy1 $1 $thresh @@ -19,7 +18,7 @@ function run() { } -@test "B-B" { # +@test "B-B" { # qp set_file b2_stretched.ezfio qp set_frozen_core run -49.120607088648597 -49.055152453388231 @@ -34,7 +33,7 @@ function run() { @test "HBO" { # 4.42968s 19.6099s qp set_file hbo.ezfio qp set_frozen_core - run -100.2019254455993 -99.79484127741013 + run -100.2019254455993 -99.79484127741013 } @test "HCO" { # 6.6077s 28.6801s @@ -46,7 +45,7 @@ function run() { @test "H2O" { # 7.0651s 30.6642s qp set_file h2o.ezfio qp set_frozen_core - run -76.22975602077072 -75.80609108747208 + run -76.22975602077072 -75.80609108747208 } @@ -78,7 +77,7 @@ function run() { [[ -n $TRAVIS ]] && skip qp set_file oh.ezfio qp set_frozen_core - run -75.6087472926588 -75.5370393736601 + run -75.6088105201621 -75.5370802925698 } @test "CH4" { # 19.821s 1.38648m @@ -105,8 +104,9 @@ function run() { @test "DHNO" { # 24.7077s 1.46487m [[ -n $TRAVIS ]] && skip qp set_file dhno.ezfio - qp set_mo_class --core="[1-7]" --act="[8-64]" - run -130.458814562403 -130.356308303681 + qp set_mo_class --core="[1-7]" --act="[8-64]" + run -130.4659881027444 -130.2692384198501 +# run -130.458814562403 -130.356308303681 } @test "H3COH" { # 24.7248s 1.85043m @@ -120,7 +120,7 @@ function run() { [[ -n $TRAVIS ]] && skip qp set_file cu_nh3_4_2plus.ezfio qp set_mo_class --core="[1-24]" --act="[25-45]" --del="[46-87]" - run -1862.98689579931 -1862.6883044626563 + run -1862.98310702274 -1862.88506319755 } @@ -135,14 +135,14 @@ function run() { [[ -n $TRAVIS ]] && skip qp set_file c2h2.ezfio qp set_mo_class --act="[1-30]" --del="[31-36]" - run -12.3566731164213 -11.9495394759914 + run -12.3566731164213 -11.9495394759914 } @test "ClO" { # 37.6949s [[ -n $TRAVIS ]] && skip qp set_file clo.ezfio qp set_frozen_core - run -534.5404021326773 -534.3818725793897 + run -534.540464615019 -534.381904487587 } @test "F2" { # 45.2078s @@ -155,7 +155,7 @@ function run() { @test "SO2" { # 47.6922s [[ -n $TRAVIS ]] && skip qp set_file so2.ezfio - qp set_mo_class --core="[1-8]" --act="[9-87]" + qp set_mo_class --core="[1-8]" --act="[9-87]" run -41.5746738710350 -41.3800467740750 } @@ -177,7 +177,7 @@ function run() { [[ -n $TRAVIS ]] && skip qp set_file n2.ezfio qp set_mo_class --core="[1,2]" --act="[3-40]" --del="[41-60]" - run -109.275693633982 -108.757794570948 + run -109.275693633982 -108.757794570948 } @test "HCN" { # 133.8696s From a3db8bb242c16f822d7a112b6c043be69c7abb21 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Wed, 18 Oct 2023 09:08:37 +0200 Subject: [PATCH 336/337] Fix ezfio save --- src/ao_two_e_ints/providers_ao_erf.irp.f | 2 +- src/ao_two_e_ints/routines_save_integrals_erf.irp.f | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ao_two_e_ints/providers_ao_erf.irp.f b/src/ao_two_e_ints/providers_ao_erf.irp.f index 293df29f..ff8c31a2 100644 --- a/src/ao_two_e_ints/providers_ao_erf.irp.f +++ b/src/ao_two_e_ints/providers_ao_erf.irp.f @@ -90,7 +90,7 @@ BEGIN_PROVIDER [ logical, ao_two_e_integrals_erf_in_map ] if (write_ao_two_e_integrals_erf) then call ezfio_set_work_empty(.False.) call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints_erf',ao_integrals_erf_map) - call ezfio_set_ao_two_e_erf_ints_io_ao_two_e_integrals_erf("Read") + call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals_erf('Read') endif END_PROVIDER diff --git a/src/ao_two_e_ints/routines_save_integrals_erf.irp.f b/src/ao_two_e_ints/routines_save_integrals_erf.irp.f index 4b0cfad0..d980bc05 100644 --- a/src/ao_two_e_ints/routines_save_integrals_erf.irp.f +++ b/src/ao_two_e_ints/routines_save_integrals_erf.irp.f @@ -4,7 +4,7 @@ subroutine save_erf_two_e_integrals_ao PROVIDE ao_two_e_integrals_erf_in_map call ezfio_set_work_empty(.False.) call map_save_to_disk(trim(ezfio_filename)//'/work/ao_ints_erf',ao_integrals_erf_map) - call ezfio_set_ao_two_e_erf_ints_io_ao_two_e_integrals_erf('Read') + call ezfio_set_ao_two_e_ints_io_ao_two_e_integrals_erf('Read') end subroutine save_erf_two_e_ints_ao_into_ints_ao From 06b8370e42d24e29da1a82b9667ed48586e6b821 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Thu, 19 Oct 2023 17:51:17 +0200 Subject: [PATCH 337/337] Update irpf90 --- external/irpf90 | 2 +- src/mo_two_e_ints/mo_bi_integrals_erf.irp.f | 14 ++++++++------ src/mu_of_r/mu_of_r_conditions.irp.f | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/external/irpf90 b/external/irpf90 index 0007f72f..4ab1b175 160000 --- a/external/irpf90 +++ b/external/irpf90 @@ -1 +1 @@ -Subproject commit 0007f72f677fe7d61c5e1ed461882cb239517102 +Subproject commit 4ab1b175fc7ed0d96c1912f13dc53579b24157a6 diff --git a/src/mo_two_e_ints/mo_bi_integrals_erf.irp.f b/src/mo_two_e_ints/mo_bi_integrals_erf.irp.f index e7765d71..1afc1f3c 100644 --- a/src/mo_two_e_ints/mo_bi_integrals_erf.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals_erf.irp.f @@ -9,25 +9,27 @@ subroutine mo_two_e_integrals_erf_index(i,j,k,l,i1) integer(key_kind) :: p,q,r,s,i2 p = min(i,k) r = max(i,k) - p = p+ishft(r*r-r,-1) + p = p+shiftr(r*r-r,1) q = min(j,l) s = max(j,l) - q = q+ishft(s*s-s,-1) + q = q+shiftr(s*s-s,1) i1 = min(p,q) i2 = max(p,q) - i1 = i1+ishft(i2*i2-i2,-1) + i1 = i1+shiftr(i2*i2-i2,1) end BEGIN_PROVIDER [ logical, mo_two_e_integrals_erf_in_map ] use map_module implicit none - integer(bit_kind) :: mask_ijkl(N_int,4) - integer(bit_kind) :: mask_ijk(N_int,3) - BEGIN_DOC ! If True, the map of MO two-electron integrals is provided END_DOC + integer(bit_kind) :: mask_ijkl(N_int,4) + integer(bit_kind) :: mask_ijk(N_int,3) + double precision :: cpu_1, cpu_2, wall_1, wall_2 + + PROVIDE mo_class real :: map_mb diff --git a/src/mu_of_r/mu_of_r_conditions.irp.f b/src/mu_of_r/mu_of_r_conditions.irp.f index f9c3b3b3..959950a6 100644 --- a/src/mu_of_r/mu_of_r_conditions.irp.f +++ b/src/mu_of_r/mu_of_r_conditions.irp.f @@ -128,7 +128,7 @@ BEGIN_PROVIDER [double precision, mu_average_prov, (N_states)] implicit none BEGIN_DOC - ! average value of mu(r) weighted with the total one-e density and divised by the number of electrons + ! average value of mu(r) weighted with the total one-e density and divided by the number of electrons ! ! !!!!!! WARNING !!!!!! if no_core_density == .True. then all contributions from the core orbitals !