From e710d2623765dce23896f576cb0b1a37b393a9f1 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 17 Mar 2023 19:23:07 +0100 Subject: [PATCH 01/16] Improve I/O on TC integrals --- src/non_h_ints_mu/grad_squared.irp.f | 70 ++++++--------- src/non_h_ints_mu/grad_squared_manu.irp.f | 76 +++++++--------- src/non_h_ints_mu/new_grad_tc.irp.f | 28 ++---- src/non_h_ints_mu/new_grad_tc_manu.irp.f | 103 ++++++++-------------- src/tc_keywords/EZFIO.cfg | 34 +++---- 5 files changed, 118 insertions(+), 193 deletions(-) diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f index 7925fa7c..1fd39f6a 100644 --- a/src/non_h_ints_mu/grad_squared.irp.f +++ b/src/non_h_ints_mu/grad_squared.irp.f @@ -2,7 +2,7 @@ ! --- ! TODO : strong optmization : write the loops in a different way -! : for each couple of AO, the gaussian product are done once for all +! : for each couple of AO, the gaussian product are done once for all BEGIN_PROVIDER [ double precision, gradu_squared_u_ij_mu, (ao_num, ao_num, n_points_final_grid) ] @@ -20,14 +20,14 @@ BEGIN_PROVIDER [ double precision, gradu_squared_u_ij_mu, (ao_num, ao_num, n_poi ! gradu_squared_u_ij_mu = -0.50 x \int r2 \phi_i(2) \phi_j(2) [ v1^2 v2^2 ((grad_1 u12)^2 + (grad_2 u12^2)]) + u12^2 v2^2 (grad_1 v1)^2 + 2 u12 v1 v2^2 (grad_1 u12) . (grad_1 v1) ] ! = -0.25 x v1^2 \int r2 \phi_i(2) \phi_j(2) [1 - erf(mu r12)]^2 v2^2 ! + -0.50 x (grad_1 v1)^2 \int r2 \phi_i(2) \phi_j(2) u12^2 v2^2 - ! + -1.00 x v1 (grad_1 v1) \int r2 \phi_i(2) \phi_j(2) (grad_1 u12) v2^2 + ! + -1.00 x v1 (grad_1 v1) \int r2 \phi_i(2) \phi_j(2) (grad_1 u12) v2^2 ! = v1^2 x int2_grad1u2_grad2u2_j1b2 ! + -0.5 x (grad_1 v1)^2 x int2_u2_j1b2 ! + -1.0 X V1 x (grad_1 v1) \cdot [ int2_u_grad1u_j1b2 x r - int2_u_grad1u_x_j1b ] ! ! END_DOC - + implicit none integer :: ipoint, i, j, m, igauss double precision :: x, y, z, r(3), delta, coef @@ -100,7 +100,7 @@ BEGIN_PROVIDER [ double precision, gradu_squared_u_ij_mu, (ao_num, ao_num, n_poi call wall_time(time1) print*, ' Wall time for gradu_squared_u_ij_mu = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- @@ -151,7 +151,7 @@ END_PROVIDER ! ! deallocate(ac_mat) ! -!END_PROVIDER +!END_PROVIDER ! --- @@ -214,12 +214,12 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_loop, (ao_num, ao_num, ao_nu call wall_time(time1) print*, ' Wall time for tc_grad_square_ao_loop = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- BEGIN_PROVIDER [ double precision, grad12_j12, (ao_num, ao_num, n_points_final_grid) ] - + implicit none integer :: ipoint, i, j, m, igauss double precision :: r(3), delta, coef @@ -267,7 +267,7 @@ BEGIN_PROVIDER [ double precision, grad12_j12, (ao_num, ao_num, n_points_final_g call wall_time(time1) print*, ' Wall time for grad12_j12 = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- @@ -297,12 +297,12 @@ BEGIN_PROVIDER [ double precision, u12sq_j1bsq, (ao_num, ao_num, n_points_final_ call wall_time(time1) print*, ' Wall time for u12sq_j1bsq = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b, (ao_num, ao_num, n_points_final_grid) ] - + implicit none integer :: ipoint, i, j, m, igauss double precision :: x, y, z @@ -347,7 +347,7 @@ BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b, (ao_num, ao_num, call wall_time(time1) print*, ' Wall time for u12_grad1_u12_j1b_grad1_j1b = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- @@ -370,26 +370,18 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao if(read_tc_integ) then - open(unit=11, form="unformatted", file='tc_grad_square_ao', action="read") - do i = 1, ao_num - do j = 1, ao_num - do k = 1, ao_num - do l = 1, ao_num - read(11) tc_grad_square_ao(l,k,j,i) - enddo - enddo - enddo - enddo + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_square_ao', action="read") + read(11) tc_grad_square_ao close(11) else allocate(b_mat(n_points_final_grid,ao_num,ao_num), tmp(ao_num,ao_num,n_points_final_grid)) - + b_mat = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint) & + !$OMP PRIVATE (i, k, ipoint) & !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) !$OMP DO SCHEDULE (static) do i = 1, ao_num @@ -401,11 +393,11 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao enddo !$OMP END DO !$OMP END PARALLEL - + tmp = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (j, l, ipoint) & + !$OMP PRIVATE (j, l, ipoint) & !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq, u12_grad1_u12_j1b_grad1_j1b, grad12_j12) !$OMP DO SCHEDULE (static) do ipoint = 1, n_points_final_grid @@ -417,25 +409,25 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao enddo !$OMP END DO !$OMP END PARALLEL - + tc_grad_square_ao = 0.d0 call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & , tmp(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & , 1.d0, tc_grad_square_ao, ao_num*ao_num) deallocate(tmp, b_mat) - + call sum_A_At(tc_grad_square_ao(1,1,1,1), ao_num*ao_num) - + !!$OMP PARALLEL & !!$OMP DEFAULT (NONE) & - !!$OMP PRIVATE (i, j, k, l) & + !!$OMP PRIVATE (i, j, k, l) & !!$OMP SHARED (ac_mat, tc_grad_square_ao, ao_num) !!$OMP DO SCHEDULE (static) ! do j = 1, ao_num ! do l = 1, ao_num ! do i = 1, ao_num ! do k = 1, ao_num - ! tc_grad_square_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) + ! tc_grad_square_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) ! enddo ! enddo ! enddo @@ -444,23 +436,17 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao !!$OMP END PARALLEL endif - if(write_tc_integ) then - open(unit=11, form="unformatted", file='tc_grad_square_ao', action="write") - do i = 1, ao_num - do j = 1, ao_num - do k = 1, ao_num - do l = 1, ao_num - write(11) tc_grad_square_ao(l,k,j,i) - enddo - enddo - enddo - enddo + if(write_tc_integ.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_square_ao', action="write") + call ezfio_set_work_empty(.False.) + write(11) tc_grad_square_ao close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') endif call wall_time(time1) print*, ' Wall time for tc_grad_square_ao = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- diff --git a/src/non_h_ints_mu/grad_squared_manu.irp.f b/src/non_h_ints_mu/grad_squared_manu.irp.f index cb9e15c4..66f3c693 100644 --- a/src/non_h_ints_mu/grad_squared_manu.irp.f +++ b/src/non_h_ints_mu/grad_squared_manu.irp.f @@ -17,29 +17,21 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu call wall_time(time0) if(read_tc_integ) then - - open(unit=11, form="unformatted", file='tc_grad_square_ao_test', action="read") - do i = 1, ao_num - do j = 1, ao_num - do k = 1, ao_num - do l = 1, ao_num - read(11) tc_grad_square_ao_test(l,k,j,i) - enddo - enddo - enddo - enddo + + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_square_ao_test', action="read") + read(11) tc_grad_square_ao_test close(11) else provide u12sq_j1bsq_test u12_grad1_u12_j1b_grad1_j1b_test grad12_j12_test - + allocate(b_mat(n_points_final_grid,ao_num,ao_num), tmp(ao_num,ao_num,n_points_final_grid)) - + b_mat = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint) & + !$OMP PRIVATE (i, k, ipoint) & !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) !$OMP DO SCHEDULE (static) do i = 1, ao_num @@ -51,11 +43,11 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu enddo !$OMP END DO !$OMP END PARALLEL - + tmp = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (j, l, ipoint) & + !$OMP PRIVATE (j, l, ipoint) & !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq_test, u12_grad1_u12_j1b_grad1_j1b_test, grad12_j12_test) !$OMP DO SCHEDULE (static) do ipoint = 1, n_points_final_grid @@ -67,23 +59,23 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu enddo !$OMP END DO !$OMP END PARALLEL - + tc_grad_square_ao_test = 0.d0 call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & , tmp(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & , 1.d0, tc_grad_square_ao_test, ao_num*ao_num) deallocate(tmp, b_mat) - + call sum_A_At(tc_grad_square_ao_test(1,1,1,1), ao_num*ao_num) !do i = 1, ao_num ! do j = 1, ao_num ! do k = i, ao_num - + ! do l = max(j,k), ao_num ! tc_grad_square_ao_test(i,j,k,l) = 0.5d0 * (tc_grad_square_ao_test(i,j,k,l) + tc_grad_square_ao_test(k,l,i,j)) ! tc_grad_square_ao_test(k,l,i,j) = tc_grad_square_ao_test(i,j,k,l) ! end do - + ! !if (j.eq.k) then ! ! do l = j+1, ao_num ! ! tc_grad_square_ao_test(i,j,k,l) = 0.5d0 * (tc_grad_square_ao_test(i,j,k,l) + tc_grad_square_ao_test(k,l,i,j)) @@ -95,14 +87,14 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu ! ! tc_grad_square_ao_test(k,l,i,j) = tc_grad_square_ao_test(i,j,k,l) ! ! enddo ! !endif - + ! enddo ! enddo !enddo !tc_grad_square_ao_test = 2.d0 * tc_grad_square_ao_test ! !$OMP PARALLEL & ! !$OMP DEFAULT (NONE) & - ! !$OMP PRIVATE (i, j, k, l) & + ! !$OMP PRIVATE (i, j, k, l) & ! !$OMP SHARED (tc_grad_square_ao_test, ao_num) ! !$OMP DO SCHEDULE (static) ! integer :: ii @@ -121,10 +113,10 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu ! print *, ' ii =', ii ! !$OMP END DO ! !$OMP END PARALLEL - + ! !$OMP PARALLEL & ! !$OMP DEFAULT (NONE) & - ! !$OMP PRIVATE (i, j, k, l) & + ! !$OMP PRIVATE (i, j, k, l) & ! !$OMP SHARED (tc_grad_square_ao_test, ao_num) ! !$OMP DO SCHEDULE (static) ! do j = 1, ao_num @@ -144,24 +136,18 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu endif - if(write_tc_integ) then - open(unit=11, form="unformatted", file='tc_grad_square_ao_test', action="write") - do i = 1, ao_num - do j = 1, ao_num - do k = 1, ao_num - do l = 1, ao_num - write(11) tc_grad_square_ao_test(l,k,j,i) - enddo - enddo - enddo - enddo + if(write_tc_integ.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_square_ao_test', action="write") + call ezfio_set_work_empty(.False.) + write(11) tc_grad_square_ao_test close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') endif call wall_time(time1) print*, ' Wall time for tc_grad_square_ao_test = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- @@ -189,7 +175,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test_ref, (ao_num, ao_num, a b_mat = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint) & + !$OMP PRIVATE (i, k, ipoint) & !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector) !$OMP DO SCHEDULE (static) do i = 1, ao_num @@ -205,7 +191,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test_ref, (ao_num, ao_num, a tmp = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (j, l, ipoint) & + !$OMP PRIVATE (j, l, ipoint) & !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq_test, u12_grad1_u12_j1b_grad1_j1b_test, grad12_j12_test) !$OMP DO SCHEDULE (static) do ipoint = 1, n_points_final_grid @@ -226,7 +212,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test_ref, (ao_num, ao_num, a !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, j, k, l) & + !$OMP PRIVATE (i, j, k, l) & !$OMP SHARED (ac_mat, tc_grad_square_ao_test_ref, ao_num) !$OMP DO SCHEDULE (static) do j = 1, ao_num @@ -246,7 +232,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test_ref, (ao_num, ao_num, a call wall_time(time1) print*, ' Wall time for tc_grad_square_ao_test_ref = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- @@ -276,12 +262,12 @@ BEGIN_PROVIDER [ double precision, u12sq_j1bsq_test, (ao_num, ao_num, n_points_f call wall_time(time1) print*, ' Wall time for u12sq_j1bsq_test = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b_test, (ao_num, ao_num, n_points_final_grid) ] - + implicit none integer :: ipoint, i, j, m, igauss double precision :: x, y, z @@ -328,12 +314,12 @@ BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b_test, (ao_num, ao call wall_time(time1) print*, ' Wall time for u12_grad1_u12_j1b_grad1_j1b_test = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- BEGIN_PROVIDER [ double precision, grad12_j12_test, (ao_num, ao_num, n_points_final_grid) ] - + implicit none integer :: ipoint, i, j, m, igauss double precision :: r(3), delta, coef @@ -381,7 +367,7 @@ BEGIN_PROVIDER [ double precision, grad12_j12_test, (ao_num, ao_num, n_points_fi call wall_time(time1) print*, ' Wall time for grad12_j12_test = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- diff --git a/src/non_h_ints_mu/new_grad_tc.irp.f b/src/non_h_ints_mu/new_grad_tc.irp.f index a15f690a..754e1240 100644 --- a/src/non_h_ints_mu/new_grad_tc.irp.f +++ b/src/non_h_ints_mu/new_grad_tc.irp.f @@ -36,16 +36,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_ if(read_tc_integ) then - open(unit=11, form="unformatted", file='int2_grad1_u12_ao', action="read") - do m = 1, 3 - do ipoint = 1, n_points_final_grid - do j = 1, ao_num - do i = 1, ao_num - read(11) int2_grad1_u12_ao(i,j,ipoint,m) - enddo - enddo - enddo - enddo + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="read") + read(11) int2_grad1_u12_ao close(11) else @@ -89,18 +81,12 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_ endif - if(write_tc_integ) then - open(unit=11, form="unformatted", file='int2_grad1_u12_ao', action="write") - do m = 1, 3 - do ipoint = 1, n_points_final_grid - do j = 1, ao_num - do i = 1, ao_num - write(11) int2_grad1_u12_ao(i,j,ipoint,m) - enddo - enddo - enddo - enddo + if(write_tc_integ.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="write") + call ezfio_set_work_empty(.False.) + write(11) int2_grad1_u12_ao close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') endif call wall_time(time1) diff --git a/src/non_h_ints_mu/new_grad_tc_manu.irp.f b/src/non_h_ints_mu/new_grad_tc_manu.irp.f index 47b05e52..901e3048 100644 --- a/src/non_h_ints_mu/new_grad_tc_manu.irp.f +++ b/src/non_h_ints_mu/new_grad_tc_manu.irp.f @@ -3,7 +3,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (ao_num, ao_num, n_po BEGIN_DOC ! - ! int2_grad1_u12_ao_test(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) + ! int2_grad1_u12_ao_test(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) ! ! where r1 = r(ipoint) ! @@ -15,9 +15,9 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (ao_num, ao_num, n_po ! if J(r1,r2) = u12 x v1 x v2 ! ! int2_grad1_u12_ao_test(i,j,ipoint,:) = v1 x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ] - ! - \grad_1 v1 x [ \int dr2 u12 v2 \phi_i(r2) \phi_j(r2) ] - ! = 0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) - ! - 0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) + ! - \grad_1 v1 x [ \int dr2 u12 v2 \phi_i(r2) \phi_j(r2) ] + ! = 0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) + ! - 0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) ! - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint) ! ! @@ -35,25 +35,18 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (ao_num, ao_num, n_po if(read_tc_integ) then - open(unit=11, form="unformatted", file='int2_grad1_u12_ao_test', action="read") - do m = 1, 3 - do ipoint = 1, n_points_final_grid - do j = 1, ao_num - do i = 1, ao_num - read(11) int2_grad1_u12_ao_test(i,j,ipoint,m) - enddo - enddo - enddo - enddo + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao_test', action="read") + read(11) int2_grad1_u12_ao_test close(11) + else - + if(j1b_type .eq. 3) then do ipoint = 1, n_points_final_grid x = final_grid_points(1,ipoint) y = final_grid_points(2,ipoint) - z = final_grid_points(3,ipoint) + z = final_grid_points(3,ipoint) tmp0 = 0.5d0 * v_1b(ipoint) tmp_x = v_1b_grad(1,ipoint) tmp_y = v_1b_grad(2,ipoint) @@ -87,24 +80,18 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (ao_num, ao_num, n_po endif - if(write_tc_integ) then - open(unit=11, form="unformatted", file='int2_grad1_u12_ao_test', action="write") - do m = 1, 3 - do ipoint = 1, n_points_final_grid - do j = 1, ao_num - do i = 1, ao_num - write(11) int2_grad1_u12_ao_test(i,j,ipoint,m) - enddo - enddo - enddo - enddo + if(write_tc_integ.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao_test', action="write") + call ezfio_set_work_empty(.False.) + write(11) int2_grad1_u12_ao_test close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') endif call wall_time(time1) print*, ' Wall time for int2_grad1_u12_ao_test = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- @@ -114,9 +101,9 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_ ! ! tc_grad_and_lapl_ao_test(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) | ij > ! - ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) + ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) ! - ! This is obtained by integration by parts. + ! This is obtained by integration by parts. ! END_DOC @@ -131,40 +118,32 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_ call wall_time(time0) if(read_tc_integ) then - - open(unit=11, form="unformatted", file='tc_grad_and_lapl_ao_test', action="read") - do i = 1, ao_num - do j = 1, ao_num - do k = 1, ao_num - do l = 1, ao_num - read(11) tc_grad_and_lapl_ao_test(l,k,j,i) - enddo - enddo - enddo - enddo + + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_and_lapl_ao_test', action="read") + read(11) tc_grad_and_lapl_ao_test close(11) else - provide int2_grad1_u12_ao_test - + provide int2_grad1_u12_ao_test + allocate(b_mat(n_points_final_grid,ao_num,ao_num,3), ac_mat(ao_num,ao_num,ao_num,ao_num)) - + b_mat = 0.d0 !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & - !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & + !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r) & + !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & !$OMP ao_num, n_points_final_grid, final_weight_at_r_vector) !$OMP DO SCHEDULE (static) do i = 1, ao_num do k = 1, ao_num do ipoint = 1, n_points_final_grid - + weight1 = 0.5d0 * final_weight_at_r_vector(ipoint) ao_i_r = aos_in_r_array_transp(ipoint,i) ao_k_r = aos_in_r_array_transp(ipoint,k) - + b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) @@ -173,19 +152,19 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_ enddo !$OMP END DO !$OMP END PARALLEL - + ac_mat = 0.d0 do m = 1, 3 call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & , int2_grad1_u12_ao_test(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid & , 1.d0, ac_mat, ao_num*ao_num) - + enddo deallocate(b_mat) - + !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i, j, k, l) & + !$OMP PRIVATE (i, j, k, l) & !$OMP SHARED (ac_mat, tc_grad_and_lapl_ao_test, ao_num) !$OMP DO SCHEDULE (static) do j = 1, ao_num @@ -199,29 +178,23 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_ enddo !$OMP END DO !$OMP END PARALLEL - + deallocate(ac_mat) endif - if(write_tc_integ) then - open(unit=11, form="unformatted", file='tc_grad_and_lapl_ao_test', action="write") - do i = 1, ao_num - do j = 1, ao_num - do k = 1, ao_num - do l = 1, ao_num - write(11) tc_grad_and_lapl_ao_test(l,k,j,i) - enddo - enddo - enddo - enddo + if(write_tc_integ.and.mpi_master) then + open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_and_lapl_ao_test', action="write") + call ezfio_set_work_empty(.False.) + write(11) tc_grad_and_lapl_ao_test close(11) + call ezfio_set_tc_keywords_io_tc_integ('Read') endif call wall_time(time1) print*, ' Wall time for tc_grad_and_lapl_ao_test = ', time1 - time0 -END_PROVIDER +END_PROVIDER ! --- diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index e397e700..3f34c088 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -6,7 +6,7 @@ default: False [comp_left_eigv] type: logical -doc: If |true|, computes also the left-eigenvector +doc: If |true|, computes also the left-eigenvector interface: ezfio,provider,ocaml default: False @@ -14,7 +14,7 @@ default: False type: logical doc: If |true|, three-body terms are included interface: ezfio,provider,ocaml -default: True +default: True [pure_three_body_h_tc] type: logical @@ -30,13 +30,13 @@ default: False [core_tc_op] type: logical -doc: If |true|, takes the usual Hamiltonian for core orbitals (assumed to be doubly occupied) +doc: If |true|, takes the usual Hamiltonian for core orbitals (assumed to be doubly occupied) interface: ezfio,provider,ocaml default: False [full_tc_h_solver] type: logical -doc: If |true|, you diagonalize the full TC H matrix +doc: If |true|, you diagonalize the full TC H matrix interface: ezfio,provider,ocaml default: False @@ -60,11 +60,11 @@ default: 0.000005 [thresh_psi_r_norm] type: logical -doc: If |true|, you prune the WF to compute the PT1 coef based on the norm. If False, the pruning is done through the amplitude on the right-coefficient. +doc: If |true|, you prune the WF to compute the PT1 coef based on the norm. If False, the pruning is done through the amplitude on the right-coefficient. interface: ezfio,provider,ocaml default: False -[state_following_tc] +[state_following_tc] type: logical doc: If |true|, the states are re-ordered to match the input states default: False @@ -78,7 +78,7 @@ default: True [symetric_fock_tc] type: logical -doc: If |true|, using F+F^t as Fock TC +doc: If |true|, using F+F^t as Fock TC interface: ezfio,provider,ocaml default: False @@ -126,7 +126,7 @@ default: 1.e-6 [maxovl_tc] type: logical -doc: If |true|, maximize the overlap between orthogonalized left- and right eigenvectors +doc: If |true|, maximize the overlap between orthogonalized left- and right eigenvectors interface: ezfio,provider,ocaml default: False @@ -152,7 +152,7 @@ default: 0. type: character*(32) doc: Type of TCSCF algorithm used. Possible choices are [Simple | DIIS] interface: ezfio,provider,ocaml -default: Simple +default: DIIS [im_thresh_tcscf] type: Threshold @@ -180,21 +180,15 @@ default: 1.e-6 [var_tc] type: logical -doc: If |true|, use VAR-TC +doc: If |true|, use VAR-TC interface: ezfio,provider,ocaml default: False -[read_tc_integ] -type: logical -doc: If |true|, read integrals: int2_grad1_u12_ao, tc_grad_square_ao and tc_grad_and_lapl_ao +[io_tc_integ] +type: Disk_access +doc: Read/Write integrals int2_grad1_u12_ao, tc_grad_square_ao and tc_grad_and_lapl_ao from/to disk [ Write | Read | None ] interface: ezfio,provider,ocaml -default: False - -[write_tc_integ] -type: logical -doc: If |true|, write integrals: int2_grad1_u12_ao, tc_grad_square_ao and tc_grad_and_lapl_ao -interface: ezfio,provider,ocaml -default: False +default: None [debug_tc_pt2] type: integer From ba447be2e8627df7cbafba7e0ccbac23c2071552 Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 23 Mar 2023 11:12:02 +0100 Subject: [PATCH 02/16] added sort_wf --- src/tools/sort_wf.irp.f | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 src/tools/sort_wf.irp.f diff --git a/src/tools/sort_wf.irp.f b/src/tools/sort_wf.irp.f new file mode 100644 index 00000000..2e0014ea --- /dev/null +++ b/src/tools/sort_wf.irp.f @@ -0,0 +1,7 @@ +program sort_wf + implicit none + read_wf = .true. + touch read_wf + call save_wavefunction + +end From 2ef2e8044d338c6c823d7f965f6a62a0c55640b8 Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 23 Mar 2023 11:24:26 +0100 Subject: [PATCH 03/16] added sort_wf.irp.f --- external/qp2-dependencies | 2 +- src/tools/sort_wf.irp.f | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/external/qp2-dependencies b/external/qp2-dependencies index ce14f57b..6e23ebac 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit ce14f57b50511825a9fedb096749200779d3f4d4 +Subproject commit 6e23ebac001acae91d1c762ca934e09a9b7d614a diff --git a/src/tools/sort_wf.irp.f b/src/tools/sort_wf.irp.f index 2e0014ea..038b24c8 100644 --- a/src/tools/sort_wf.irp.f +++ b/src/tools/sort_wf.irp.f @@ -2,6 +2,19 @@ program sort_wf implicit none read_wf = .true. touch read_wf - call save_wavefunction + call routine + +end + +subroutine routine + implicit none + integer :: i + character*(128) :: output + integer :: i_unit_output,getUnitAndOpen + output=trim(ezfio_filename)//'.wf_sorted' + i_unit_output = getUnitAndOpen(output,'w') + do i = 1, N_det + write(i_unit_output, *)i,dabs(psi_coef_sorted(i,1)) + enddo end From 7aee93997a9e98a6ebe8798a8e3a00ca10f1119d Mon Sep 17 00:00:00 2001 From: eginer Date: Thu, 23 Mar 2023 13:08:09 +0100 Subject: [PATCH 04/16] added Hn.py --- scripts/Hn.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 scripts/Hn.py diff --git a/scripts/Hn.py b/scripts/Hn.py new file mode 100644 index 00000000..a4119145 --- /dev/null +++ b/scripts/Hn.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python +import sys +from math import * +arg = sys.argv +#f = open('data_dft','r') +n = int(sys.argv[1]) +r = float(sys.argv[2]) +f = open('H'+str(n)+'_'+str(r),'w') +string=str(n)+"\n" +f.write(string) +string="\n" +f.write(string) +for i in range(n): + x = r * cos(2.* i* pi/n) + y = r * sin(2.* i* pi/n) + z = 0. + string="H "+str(x)+" "+str(y)+" "+str(z)+"\n" + f.write(string) + +#lines = f.readlines() +#cipsi_dft= [] +# +#dissoc = [] +#dissoc.append(float(-76.0179223470363)) +#dissoc.append(float(-76.0592367866993)) +#dissoc.append(float(-76.0678739715659)) +#delta_e = [] +# +#for line in lines: +# data = line.split() +# if(len(data)>0): +# dft=float(data[1]) +# fci=float(data[2]) +# e=fci+dft +# cipsi_dft.append(e) +# +#print(*cipsi_dft,sep=" & ") +# +#for i in 0,1,2: +# delta_e.append(1000.*(dissoc[i] - cipsi_dft[i])) +# +#print(*delta_e,sep=" & ") +# From 436b8815807c3ad04dc5035244c6ed9e7c4f2580 Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 28 Mar 2023 11:21:19 +0200 Subject: [PATCH 05/16] added thresh_cycle --- src/ao_many_one_e_ints/grad2_jmu_manu.irp.f | 48 ++-- .../grad_lapl_jmu_manu.irp.f | 46 ++-- src/ao_many_one_e_ints/listj1b_sorted.irp.f | 42 ++-- src/non_h_ints_mu/total_tc_int.irp.f | 19 ++ src/tc_keywords/EZFIO.cfg | 6 + src/tc_scf/tc_scf.irp.f | 4 +- src/tc_scf/test_int.irp.f | 214 +++++++++++------- src/tools/sort_wf.irp.f | 2 +- 8 files changed, 212 insertions(+), 169 deletions(-) diff --git a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f index 8e253d75..14170ede 100644 --- a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f +++ b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f @@ -38,7 +38,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo, & !$OMP List_comb_thr_b3_cent, int2_grad1u2_grad2u2_j1b2_test, ao_abs_comb_b3_j1b, & - !$OMP ao_overlap_abs,sq_pi_3_2) + !$OMP ao_overlap_abs,sq_pi_3_2,thrsh_cycle_tc) !$OMP DO SCHEDULE(dynamic) do ipoint = 1, n_points_final_grid r(1) = final_grid_points(1,ipoint) @@ -46,7 +46,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n r(3) = final_grid_points(3,ipoint) do i = 1, ao_num do j = i, ao_num - if(ao_overlap_abs(j,i) .lt. 1.d-12) then + if(ao_overlap_abs(j,i) .lt. thrsh_cycle_tc) then cycle endif @@ -58,7 +58,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n do i_fit = 1, ng_fit_jast expo_fit = expo_gauss_1_erf_x_2(i_fit) coef_fit = -0.25d0 * coef_gauss_1_erf_x_2(i_fit) - if(dabs(coef_fit*int_j1b*sq_pi_3_2*(expo_fit)**(-1.5d0)).lt.1.d-10)cycle +! if(dabs(coef_fit*int_j1b*sq_pi_3_2*(expo_fit)**(-1.5d0)).lt.thrsh_cycle_tc)cycle int_gauss = overlap_gauss_r12_ao(r, expo_fit, i, j) int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) += coef_fit * int_gauss enddo @@ -81,8 +81,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n !DIR$ FORCEINLINE call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s) coef_fit = -0.25d0 * coef_gauss_1_erf_x_2(i_fit) * coef -! if(dabs(coef_fit*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version - if(dabs(coef_fit*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-1.5d0)).lt.1.d-10)cycle +! if(dabs(coef_fit*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-1.5d0)).lt.thrsh_cycle_tc)cycle ! call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, & ! expo_fit, i, j, int_fit_v, n_points_final_grid) int_gauss = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) @@ -145,14 +144,14 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_v, (ao_num, ao !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo, & !$OMP List_comb_thr_b3_cent, big_array,& - !$OMP ao_abs_comb_b3_j1b,ao_overlap_abs) + !$OMP ao_abs_comb_b3_j1b,ao_overlap_abs,thrsh_cycle_tc) ! allocate(int_fit_v(n_points_final_grid)) !$OMP DO SCHEDULE(dynamic) do i = 1, ao_num do j = i, ao_num - if(ao_overlap_abs(j,i) .lt. 1.d-12) then + if(ao_overlap_abs(j,i) .lt. thrsh_cycle_tc) then cycle endif @@ -161,7 +160,6 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_v, (ao_num, ao coef = List_comb_thr_b3_coef (i_1s,j,i) beta = List_comb_thr_b3_expo (i_1s,j,i) int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i) -! if(dabs(coef)*dabs(int_j1b).lt.1.d-15)cycle B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i) @@ -243,7 +241,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_ !$OMP final_grid_points, ng_fit_jast, & !$OMP expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2, & !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo,sq_pi_3_2, & - !$OMP List_comb_thr_b3_cent, int2_u2_j1b2_test,ao_abs_comb_b3_j1b) + !$OMP List_comb_thr_b3_cent, int2_u2_j1b2_test,ao_abs_comb_b3_j1b,thrsh_cycle_tc) !$OMP DO do ipoint = 1, n_points_final_grid r(1) = final_grid_points(1,ipoint) @@ -260,11 +258,11 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_ ! --- --- --- int_j1b = ao_abs_comb_b3_j1b(1,j,i) - if(dabs(int_j1b).lt.1.d-10) cycle + if(dabs(int_j1b).lt.thrsh_cycle_tc) cycle do i_fit = 1, ng_fit_jast expo_fit = expo_gauss_j_mu_x_2(i_fit) coef_fit = coef_gauss_j_mu_x_2(i_fit) - if(dabs(coef_fit*int_j1b*sq_pi_3_2*(expo_fit)**(-1.5d0)).lt.1.d-10)cycle +! if(dabs(coef_fit*int_j1b*sq_pi_3_2*(expo_fit)**(-1.5d0)).lt.thrsh_cycle_tc)cycle int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j) tmp += coef_fit * int_fit enddo @@ -278,7 +276,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_ coef = List_comb_thr_b3_coef (i_1s,j,i) beta = List_comb_thr_b3_expo (i_1s,j,i) int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle +! if(dabs(coef)*dabs(int_j1b).lt.thrsh_cycle_tc)cycle B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i) @@ -288,8 +286,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_ coef_fit = coef_gauss_j_mu_x_2(i_fit) !DIR$ FORCEINLINE call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s) -! if(dabs(coef_fit*coef*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version - if(dabs(coef_fit*coef*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-1.5d0)).lt.1.d-10)cycle +! if(dabs(coef_fit*coef*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-1.5d0)).lt.thrsh_cycle_tc)cycle int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) tmp += coef * coef_fit * int_fit enddo @@ -350,7 +347,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (ao_num, ao_num, n !$OMP final_grid_points, ng_fit_jast, & !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo, & - !$OMP List_comb_thr_b3_cent, int2_u_grad1u_x_j1b2_test,ao_abs_comb_b3_j1b,sq_pi_3_2) + !$OMP List_comb_thr_b3_cent, int2_u_grad1u_x_j1b2_test,ao_abs_comb_b3_j1b,sq_pi_3_2,thrsh_cycle_tc) !$OMP DO do ipoint = 1, n_points_final_grid @@ -369,7 +366,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (ao_num, ao_num, n coef = List_comb_thr_b3_coef (i_1s,j,i) beta = List_comb_thr_b3_expo (i_1s,j,i) int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle + if(dabs(coef)*dabs(int_j1b).lt.thrsh_cycle_tc)cycle B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i) @@ -392,8 +389,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (ao_num, ao_num, n expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist coef_tmp = coef * coef_fit * dexp(-expo_coef_1s) sq_alpha = alpha_1s_inv * dsqrt(alpha_1s_inv) -! if(dabs(coef_tmp*int_j1b) .lt. 1d-10) cycle ! old version - if(dabs(coef_tmp*int_j1b*sq_pi_3_2*sq_alpha) .lt. 1d-10) cycle +! if(dabs(coef_tmp*int_j1b*sq_pi_3_2*sq_alpha) .lt. thrsh_cycle_tc) cycle call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit) @@ -470,13 +466,13 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & !$OMP ao_prod_dist_grid, ao_prod_sigma, ao_overlap_abs_grid,ao_prod_center,dsqpi_3_2, & !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo, ao_abs_comb_b3_j1b, & - !$OMP List_comb_thr_b3_cent, int2_u_grad1u_j1b2_test) + !$OMP List_comb_thr_b3_cent, int2_u_grad1u_j1b2_test,thrsh_cycle_tc) !$OMP DO do ipoint = 1, n_points_final_grid do i = 1, ao_num do j = i, ao_num - if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10) cycle + if(dabs(ao_overlap_abs_grid(j,i)).lt.thrsh_cycle_tc) cycle r(1) = final_grid_points(1,ipoint) r(2) = final_grid_points(2,ipoint) @@ -489,10 +485,10 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p ! --- --- --- int_j1b = ao_abs_comb_b3_j1b(1,j,i) - if(dabs(int_j1b).lt.1.d-10) cycle +! if(dabs(int_j1b).lt.thrsh_cycle_tc) cycle do i_fit = 1, ng_fit_jast expo_fit = expo_gauss_j_mu_1_erf(i_fit) - if(dabs(int_j1b)*dsqpi_3_2*expo_fit**(-1.5d0).lt.1.d-15) cycle +! if(dabs(int_j1b)*dsqpi_3_2*expo_fit**(-1.5d0).lt.thrsh_cycle_tc) cycle coef_fit = coef_gauss_j_mu_1_erf(i_fit) int_fit = NAI_pol_mult_erf_ao_with1s(i, j, expo_fit, r, 1.d+9, r) tmp += coef_fit * int_fit @@ -507,7 +503,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p coef = List_comb_thr_b3_coef (i_1s,j,i) beta = List_comb_thr_b3_expo (i_1s,j,i) int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle +! if(dabs(coef)*dabs(int_j1b).lt.thrsh_cycle_tc)cycle B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i) @@ -517,7 +513,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p do i_fit = 1, ng_fit_jast expo_fit = expo_gauss_j_mu_1_erf(i_fit) call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s) - if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-1.5d0).lt.1.d-15)cycle +! if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-1.5d0).lt.thrsh_cycle_tc)cycle coef_fit = coef_gauss_j_mu_1_erf(i_fit) alpha_1s = beta + expo_fit @@ -527,9 +523,9 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p centr_1s(3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3)) expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist - if(expo_coef_1s .gt. 20.d0) cycle +! if(expo_coef_1s .gt. 20.d0) cycle coef_tmp = coef * coef_fit * dexp(-expo_coef_1s) - if(dabs(coef_tmp) .lt. 1d-08) cycle +! if(dabs(coef_tmp) .lt. 1d-08) cycle int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r) diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f index 5c9f81e9..66a2b961 100644 --- a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f +++ b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f @@ -31,7 +31,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num, !$OMP SHARED (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points, & !$OMP List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent,ao_abs_comb_b2_j1b, & !$OMP v_ij_erf_rk_cst_mu_j1b_test, mu_erf, & - !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2) + !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2,thrsh_cycle_tc) !$OMP DO !do ipoint = 1, 10 do ipoint = 1, n_points_final_grid @@ -41,7 +41,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num, do i = 1, ao_num do j = i, ao_num - if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle + if(dabs(ao_overlap_abs_grid(j,i)).lt.thrsh_cycle_tc)cycle tmp = 0.d0 do i_1s = 1, List_comb_thr_b2_size(j,i) @@ -49,7 +49,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num, coef = List_comb_thr_b2_coef (i_1s,j,i) beta = List_comb_thr_b2_expo (i_1s,j,i) int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle +! if(dabs(coef)*dabs(int_j1b).lt.thrsh_cycle_tc)cycle B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i) @@ -110,7 +110,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_nu !$OMP SHARED (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points,& !$OMP List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent, & !$OMP x_v_ij_erf_rk_cst_mu_j1b_test, mu_erf,ao_abs_comb_b2_j1b, & - !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma) + !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,thrsh_cycle_tc) ! !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2,expo_erfc_mu_gauss) !$OMP DO do ipoint = 1, n_points_final_grid @@ -120,7 +120,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_nu do i = 1, ao_num do j = i, ao_num - if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle + if(dabs(ao_overlap_abs_grid(j,i)).lt.thrsh_cycle_tc)cycle tmp_x = 0.d0 tmp_y = 0.d0 @@ -130,19 +130,11 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_nu coef = List_comb_thr_b2_coef (i_1s,j,i) beta = List_comb_thr_b2_expo (i_1s,j,i) int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle + ! if(dabs(coef)*dabs(int_j1b).lt.thrsh_cycle_tc)cycle B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i) -! if(ao_prod_center(1,j,i).ne.10000.d0)then -! ! approximate 1 - erf(mu r12) by a gaussian * 10 -! !DIR$ FORCEINLINE -! call gaussian_product(expo_erfc_mu_gauss,r, & -! ao_prod_sigma(j,i),ao_prod_center(1,j,i), & -! factor_ij_1s,beta_ij,center_ij_1s) -! if(dabs(coef * factor_ij_1s*int_j1b*10.d0 * dsqpi_3_2 * beta_ij**(-1.5d0)).lt.1.d-10)cycle -! endif call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints ) call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, 1.d+9, r, ints_coulomb) @@ -216,7 +208,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po !$OMP expo_gauss_j_mu_x, coef_gauss_j_mu_x, & !$OMP List_comb_thr_b2_coef, List_comb_thr_b2_expo,List_comb_thr_b2_size, & !$OMP List_comb_thr_b2_cent, v_ij_u_cst_mu_j1b_test,ao_abs_comb_b2_j1b, & - !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2) + !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2,thrsh_cycle_tc) !$OMP DO do ipoint = 1, n_points_final_grid r(1) = final_grid_points(1,ipoint) @@ -225,7 +217,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po do i = 1, ao_num do j = i, ao_num - if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle + if(dabs(ao_overlap_abs_grid(j,i)).lt.thrsh_cycle_tc)cycle tmp = 0.d0 @@ -234,11 +226,11 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po ! --- --- --- int_j1b = ao_abs_comb_b2_j1b(1,j,i) - if(dabs(int_j1b).lt.1.d-10) cycle + ! if(dabs(int_j1b).lt.thrsh_cycle_tc) cycle do i_fit = 1, ng_fit_jast expo_fit = expo_gauss_j_mu_x(i_fit) coef_fit = coef_gauss_j_mu_x(i_fit) - if(ao_overlap_abs_grid(j,i).lt.1.d-15) cycle + ! if(ao_overlap_abs_grid(j,i).lt.thrsh_cycle_tc) cycle int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j) tmp += coef_fit * int_fit enddo @@ -251,7 +243,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po coef = List_comb_thr_b2_coef (i_1s,j,i) beta = List_comb_thr_b2_expo (i_1s,j,i) int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle +! if(dabs(coef)*dabs(int_j1b).lt.thrsh_cycle_tc)cycle B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i) @@ -259,9 +251,9 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po expo_fit = expo_gauss_j_mu_x(i_fit) coef_fit = coef_gauss_j_mu_x(i_fit) coeftot = coef * coef_fit - if(dabs(coeftot).lt.1.d-15)cycle +! if(dabs(coeftot).lt.thrsh_cycle_tc)cycle call gaussian_product(beta,B_center,expo_fit,r,factor_ij_1s_u,beta_ij_u,center_ij_1s_u) - if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.1.d-15)cycle +! if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.thrsh_cycle_tc)cycle int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) tmp += coef * coef_fit * int_fit enddo @@ -325,7 +317,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, !$OMP expo_gauss_j_mu_x, coef_gauss_j_mu_x, & !$OMP List_comb_thr_b2_coef, List_comb_thr_b2_expo,List_comb_thr_b2_size, & !$OMP List_comb_thr_b2_cent, v_ij_u_cst_mu_j1b_ng_1_test,ao_abs_comb_b2_j1b, & - !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2) + !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2,thrsh_cycle_tc) !$OMP DO do ipoint = 1, n_points_final_grid r(1) = final_grid_points(1,ipoint) @@ -334,7 +326,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, do i = 1, ao_num do j = i, ao_num - if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle + if(dabs(ao_overlap_abs_grid(j,i)).lt.thrsh_cycle_tc)cycle tmp = 0.d0 @@ -343,7 +335,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, ! --- --- --- int_j1b = ao_abs_comb_b2_j1b(1,j,i) - if(dabs(int_j1b).lt.1.d-10) cycle +! if(dabs(int_j1b).lt.thrsh_cycle_tc) cycle expo_fit = expo_good_j_mu_1gauss int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j) tmp += int_fit @@ -356,7 +348,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, coef = List_comb_thr_b2_coef (i_1s,j,i) beta = List_comb_thr_b2_expo (i_1s,j,i) int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i) - if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle +! if(dabs(coef)*dabs(int_j1b).lt.thrsh_cycle_tc)cycle B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i) @@ -364,9 +356,9 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, expo_fit = expo_good_j_mu_1gauss coef_fit = 1.d0 coeftot = coef * coef_fit - if(dabs(coeftot).lt.1.d-15)cycle + if(dabs(coeftot).lt.thrsh_cycle_tc)cycle call gaussian_product(beta,B_center,expo_fit,r,factor_ij_1s_u,beta_ij_u,center_ij_1s_u) - if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.1.d-15)cycle + if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.thrsh_cycle_tc)cycle int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j) tmp += coef * coef_fit * int_fit ! enddo diff --git a/src/ao_many_one_e_ints/listj1b_sorted.irp.f b/src/ao_many_one_e_ints/listj1b_sorted.irp.f index bf493fbb..9bcce449 100644 --- a/src/ao_many_one_e_ints/listj1b_sorted.irp.f +++ b/src/ao_many_one_e_ints/listj1b_sorted.irp.f @@ -3,15 +3,16 @@ &BEGIN_PROVIDER [ integer, max_List_comb_thr_b2_size] implicit none integer :: i_1s,i,j,ipoint - double precision :: coef,beta,center(3),int_j1b,thr + double precision :: coef,beta,center(3),int_j1b double precision :: r(3),weight,dist - thr = 1.d-15 List_comb_thr_b2_size = 0 + print*,'List_all_comb_b2_size = ',List_all_comb_b2_size +! pause do i = 1, ao_num do j = i, ao_num do i_1s = 1, List_all_comb_b2_size coef = List_all_comb_b2_coef (i_1s) - if(dabs(coef).lt.1.d-15)cycle + if(dabs(coef).lt.thrsh_cycle_tc)cycle beta = List_all_comb_b2_expo (i_1s) beta = max(beta,1.d-12) center(1:3) = List_all_comb_b2_cent(1:3,i_1s) @@ -24,7 +25,7 @@ dist += ( center(3) - r(3) )*( center(3) - r(3) ) int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight enddo - if(dabs(coef)*dabs(int_j1b).gt.thr)then + if(dabs(coef)*dabs(int_j1b).gt.thrsh_cycle_tc)then List_comb_thr_b2_size(j,i) += 1 endif enddo @@ -40,6 +41,7 @@ list(i) = maxval(List_comb_thr_b2_size(:,i)) enddo max_List_comb_thr_b2_size = maxval(list) + print*,'max_List_comb_thr_b2_size = ',max_List_comb_thr_b2_size END_PROVIDER @@ -49,16 +51,15 @@ END_PROVIDER &BEGIN_PROVIDER [ double precision, ao_abs_comb_b2_j1b, ( max_List_comb_thr_b2_size ,ao_num, ao_num)] implicit none integer :: i_1s,i,j,ipoint,icount - double precision :: coef,beta,center(3),int_j1b,thr + double precision :: coef,beta,center(3),int_j1b double precision :: r(3),weight,dist - thr = 1.d-15 ao_abs_comb_b2_j1b = 10000000.d0 do i = 1, ao_num do j = i, ao_num icount = 0 do i_1s = 1, List_all_comb_b2_size coef = List_all_comb_b2_coef (i_1s) - if(dabs(coef).lt.1.d-12)cycle + if(dabs(coef).lt.thrsh_cycle_tc)cycle beta = List_all_comb_b2_expo (i_1s) center(1:3) = List_all_comb_b2_cent(1:3,i_1s) int_j1b = 0.d0 @@ -70,7 +71,7 @@ END_PROVIDER dist += ( center(3) - r(3) )*( center(3) - r(3) ) int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight enddo - if(dabs(coef)*dabs(int_j1b).gt.thr)then + if(dabs(coef)*dabs(int_j1b).gt.thrsh_cycle_tc)then icount += 1 List_comb_thr_b2_coef(icount,j,i) = coef List_comb_thr_b2_expo(icount,j,i) = beta @@ -98,17 +99,17 @@ END_PROVIDER &BEGIN_PROVIDER [ integer, max_List_comb_thr_b3_size] implicit none integer :: i_1s,i,j,ipoint - double precision :: coef,beta,center(3),int_j1b,thr + double precision :: coef,beta,center(3),int_j1b double precision :: r(3),weight,dist - thr = 1.d-15 List_comb_thr_b3_size = 0 + print*,'List_all_comb_b3_size = ',List_all_comb_b3_size do i = 1, ao_num do j = 1, ao_num do i_1s = 1, List_all_comb_b3_size coef = List_all_comb_b3_coef (i_1s) beta = List_all_comb_b3_expo (i_1s) center(1:3) = List_all_comb_b3_cent(1:3,i_1s) - if(dabs(coef).lt.thr)cycle + if(dabs(coef).lt.thrsh_cycle_tc)cycle int_j1b = 0.d0 do ipoint = 1, n_points_extra_final_grid r(1:3) = final_grid_points_extra(1:3,ipoint) @@ -118,7 +119,7 @@ END_PROVIDER dist += ( center(3) - r(3) )*( center(3) - r(3) ) int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight enddo - if(dabs(coef)*dabs(int_j1b).gt.thr)then + if(dabs(coef)*dabs(int_j1b).gt.thrsh_cycle_tc)then List_comb_thr_b3_size(j,i) += 1 endif enddo @@ -144,9 +145,8 @@ END_PROVIDER &BEGIN_PROVIDER [ double precision, ao_abs_comb_b3_j1b, ( max_List_comb_thr_b3_size ,ao_num, ao_num)] implicit none integer :: i_1s,i,j,ipoint,icount - double precision :: coef,beta,center(3),int_j1b,thr + double precision :: coef,beta,center(3),int_j1b double precision :: r(3),weight,dist - thr = 1.d-15 ao_abs_comb_b3_j1b = 10000000.d0 do i = 1, ao_num do j = 1, ao_num @@ -156,7 +156,7 @@ END_PROVIDER beta = List_all_comb_b3_expo (i_1s) beta = max(beta,1.d-12) center(1:3) = List_all_comb_b3_cent(1:3,i_1s) - if(dabs(coef).lt.thr)cycle + if(dabs(coef).lt.thrsh_cycle_tc)cycle int_j1b = 0.d0 do ipoint = 1, n_points_extra_final_grid r(1:3) = final_grid_points_extra(1:3,ipoint) @@ -166,7 +166,7 @@ END_PROVIDER dist += ( center(3) - r(3) )*( center(3) - r(3) ) int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight enddo - if(dabs(coef)*dabs(int_j1b).gt.thr)then + if(dabs(coef)*dabs(int_j1b).gt.thrsh_cycle_tc)then icount += 1 List_comb_thr_b3_coef(icount,j,i) = coef List_comb_thr_b3_expo(icount,j,i) = beta @@ -177,15 +177,5 @@ END_PROVIDER enddo enddo -! do i = 1, ao_num -! do j = 1, i-1 -! do icount = 1, List_comb_thr_b3_size(j,i) -! List_comb_thr_b3_coef(icount,j,i) = List_comb_thr_b3_coef(icount,i,j) -! List_comb_thr_b3_expo(icount,j,i) = List_comb_thr_b3_expo(icount,i,j) -! List_comb_thr_b3_cent(1:3,icount,j,i) = List_comb_thr_b3_cent(1:3,icount,i,j) -! enddo -! enddo -! enddo - END_PROVIDER diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f index 2fd2719c..4f8dc74d 100644 --- a/src/non_h_ints_mu/total_tc_int.irp.f +++ b/src/non_h_ints_mu/total_tc_int.irp.f @@ -68,7 +68,26 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao END_PROVIDER +BEGIN_PROVIDER [double precision, ao_tc_int_chemist_no_cycle, (ao_num, ao_num, ao_num, ao_num)] ! --- + implicit none + integer :: i, j, k, l + double precision :: wall1, wall0 + print *, ' providing ao_tc_int_chemist_no_cycle ...' + call wall_time(wall0) + do j = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do k = 1, ao_num + ao_tc_int_chemist_no_cycle(k,i,l,j) = tc_grad_square_ao(k,i,l,j) + tc_grad_and_lapl_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j) +! ao_tc_int_chemist(k,i,l,j) = ao_two_e_coul(k,i,l,j) + enddo + enddo + enddo + enddo + call wall_time(wall1) + print *, ' wall time for ao_tc_int_chemist_no_cycle ', wall1 - wall0 +END_PROVIDER BEGIN_PROVIDER [double precision, ao_tc_int_chemist_test, (ao_num, ao_num, ao_num, ao_num)] diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index e397e700..62b6d2bf 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -46,6 +46,12 @@ doc: Thresholds on the energy for iterative Davidson used in TC interface: ezfio,provider,ocaml default: 1.e-5 +[thrsh_cycle_tc] +type: Threshold +doc: Thresholds to cycle the integrals with the envelop +interface: ezfio,provider,ocaml +default: 1.e-15 + [max_it_dav] type: integer doc: nb max of iteration in Davidson used in TC diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f index 85389f30..ae3b609b 100644 --- a/src/tc_scf/tc_scf.irp.f +++ b/src/tc_scf/tc_scf.irp.f @@ -11,8 +11,8 @@ program tc_scf print *, ' starting ...' my_grid_becke = .True. - my_n_pt_r_grid = 30 - my_n_pt_a_grid = 50 + my_n_pt_r_grid = 60 + my_n_pt_a_grid = 110 ! my_n_pt_r_grid = 10 ! small grid for quick debug ! my_n_pt_a_grid = 26 ! small grid for quick debug touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f index 0866cdaf..b9287d58 100644 --- a/src/tc_scf/test_int.irp.f +++ b/src/tc_scf/test_int.irp.f @@ -21,25 +21,22 @@ program test_ints touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid !! OK -!call routine_int2_u_grad1u_j1b2 -!! OK -!call routine_v_ij_erf_rk_cst_mu_j1b -!! OK +! call routine_int2_u_grad1u_j1b2 +! OK +! call routine_v_ij_erf_rk_cst_mu_j1b +! OK ! call routine_x_v_ij_erf_rk_cst_mu_j1b -!! OK -! call routine_v_ij_u_cst_mu_j1b - -!! OK -!call routine_int2_u2_j1b2 - -!! OK -!call routine_int2_u_grad1u_x_j1b2 - -!! OK +! OK +! call routine_int2_u2_j1b2 +! OK +! call routine_int2_u_grad1u_x_j1b2 +! OK ! call routine_int2_grad1u2_grad2u2_j1b2 ! call routine_int2_u_grad1u_j1b2 ! call test_total_grad_lapl ! call test_total_grad_square +! call test_int2_grad1_u12_ao_test +! call routine_v_ij_u_cst_mu_j1b_test ! call test_ao_tc_int_chemist ! call test_grid_points_ao ! call test_tc_scf @@ -53,12 +50,12 @@ program test_ints !call test_two_e_tc_non_hermit_integral() - call test_tc_grad_square_ao_test() - - PROVIDE TC_HF_energy VARTC_HF_energy - print *, ' TC_HF_energy = ', TC_HF_energy - print *, ' VARTC_HF_energy = ', VARTC_HF_energy +! call test_tc_grad_square_ao_test() +!!PROVIDE TC_HF_energy VARTC_HF_energy +!!print *, ' TC_HF_energy = ', TC_HF_energy +!!print *, ' VARTC_HF_energy = ', VARTC_HF_energy + call test_old_ints end ! --- @@ -157,6 +154,9 @@ subroutine routine_int2_u_grad1u_j1b2 enddo enddo enddo + print*,'******' + print*,'******' + print*,'routine_int2_u_grad1u_j1b2' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -169,20 +169,6 @@ subroutine routine_v_ij_erf_rk_cst_mu_j1b integer :: i,j,ipoint,k,l double precision :: weight,accu_relat, accu_abs, contrib double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) -! print*,'ao_overlap_abs = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_overlap_abs(i,:) -! enddo -! print*,'center = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_center(2,i,:) -! enddo -! print*,'sigma = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_sigma(i,:) -! enddo - - allocate(array(ao_num, ao_num, ao_num, ao_num)) array = 0.d0 allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) @@ -215,6 +201,9 @@ subroutine routine_v_ij_erf_rk_cst_mu_j1b enddo enddo enddo + print*,'******' + print*,'******' + print*,'routine_v_ij_erf_rk_cst_mu_j1b' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -228,20 +217,6 @@ subroutine routine_x_v_ij_erf_rk_cst_mu_j1b integer :: i,j,ipoint,k,l,m double precision :: weight,accu_relat, accu_abs, contrib double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) -! print*,'ao_overlap_abs = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_overlap_abs(i,:) -! enddo -! print*,'center = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_center(2,i,:) -! enddo -! print*,'sigma = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_sigma(i,:) -! enddo - - allocate(array(ao_num, ao_num, ao_num, ao_num)) array = 0.d0 allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) @@ -276,6 +251,10 @@ subroutine routine_x_v_ij_erf_rk_cst_mu_j1b enddo enddo enddo + + print*,'******' + print*,'******' + print*,'routine_x_v_ij_erf_rk_cst_mu_j1b' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -290,20 +269,6 @@ subroutine routine_v_ij_u_cst_mu_j1b_test integer :: i,j,ipoint,k,l double precision :: weight,accu_relat, accu_abs, contrib double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) -! print*,'ao_overlap_abs = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_overlap_abs(i,:) -! enddo -! print*,'center = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_center(2,i,:) -! enddo -! print*,'sigma = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_sigma(i,:) -! enddo - - allocate(array(ao_num, ao_num, ao_num, ao_num)) array = 0.d0 allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) @@ -336,6 +301,9 @@ subroutine routine_v_ij_u_cst_mu_j1b_test enddo enddo enddo + print*,'******' + print*,'******' + print*,'routine_v_ij_u_cst_mu_j1b_test' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -427,19 +395,6 @@ subroutine routine_int2_u2_j1b2 integer :: i,j,ipoint,k,l double precision :: weight,accu_relat, accu_abs, contrib double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) -! print*,'ao_overlap_abs = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_overlap_abs(i,:) -! enddo -! print*,'center = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_center(2,i,:) -! enddo -! print*,'sigma = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_sigma(i,:) -! enddo - allocate(array(ao_num, ao_num, ao_num, ao_num)) array = 0.d0 @@ -473,6 +428,9 @@ subroutine routine_int2_u2_j1b2 enddo enddo enddo + print*,'******' + print*,'******' + print*,'routine_int2_u2_j1b2' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -486,19 +444,6 @@ subroutine routine_int2_u_grad1u_x_j1b2 integer :: i,j,ipoint,k,l,m double precision :: weight,accu_relat, accu_abs, contrib double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) -! print*,'ao_overlap_abs = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_overlap_abs(i,:) -! enddo -! print*,'center = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_center(2,i,:) -! enddo -! print*,'sigma = ' -! do i = 1, ao_num -! write(*,'(100(F10.5,X))')ao_prod_sigma(i,:) -! enddo - allocate(array(ao_num, ao_num, ao_num, ao_num)) array = 0.d0 @@ -534,6 +479,9 @@ subroutine routine_int2_u_grad1u_x_j1b2 enddo enddo enddo + print*,'******' + print*,'******' + print*,'routine_int2_u_grad1u_x_j1b2' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -579,6 +527,9 @@ subroutine routine_v_ij_u_cst_mu_j1b enddo enddo enddo + print*,'******' + print*,'******' + print*,'routine_v_ij_u_cst_mu_j1b' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -742,6 +693,9 @@ subroutine test_total_grad_lapl enddo enddo enddo + print*,'******' + print*,'******' + print*,' test_total_grad_lapl' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -767,6 +721,9 @@ subroutine test_total_grad_square enddo enddo enddo + print*,'******' + print*,'******' + print*,'test_total_grad_square' print*,'accu_abs = ',accu_abs/dble(ao_num)**4 print*,'accu_relat = ',accu_relat/dble(ao_num)**4 @@ -1057,3 +1014,86 @@ end ! --- + +subroutine test_old_ints + implicit none + integer :: i,j,k,l + double precision :: old, new, contrib, get_ao_tc_sym_two_e_pot + double precision :: integral_sym , integral_nsym,accu + PROVIDE ao_tc_sym_two_e_pot_in_map + accu = 0.d0 + do j = 1, ao_num + do l= 1, ao_num + do i = 1, ao_num + do k = 1, ao_num +! integral_sym = get_ao_tc_sym_two_e_pot(i, j, k, l, ao_tc_sym_two_e_pot_map) + ! ao_non_hermit_term_chemist(k,i,l,j) = < k l | [erf( mu r12) - 1] d/d_r12 | i j > on the AO basis +! integral_nsym = ao_non_hermit_term_chemist(k,i,l,j) +! old = integral_sym + integral_nsym +! old = tc_grad_square_ao(k,i,l,j) + tc_grad_and_lapl_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j) + new = ao_tc_int_chemist_test(k,i,l,j) + old = ao_tc_int_chemist_no_cycle(k,i,l,j) + contrib = dabs(old - new) + if(contrib.gt.1.d-6)then + print*,'problem !!' + print*,i,j,k,l + print*,old, new, contrib + endif + accu += contrib + enddo + enddo + enddo + enddo + print*,'******' + print*,'******' + print*,'in test_old_ints' + print*,'accu = ',accu/dble(ao_num**4) + +end + +subroutine test_int2_grad1_u12_ao_test + implicit none + integer :: i,j,ipoint,m,k,l + double precision :: weight,accu_relat, accu_abs, contrib + double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:) + allocate(array(ao_num, ao_num, ao_num, ao_num)) + array = 0.d0 + allocate(array_ref(ao_num, ao_num, ao_num, ao_num)) + array_ref = 0.d0 + do m = 1, 3 + do ipoint = 1, n_points_final_grid + weight = final_weight_at_r_vector(ipoint) + do k = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do j = 1, ao_num + array(j,i,l,k) += int2_grad1_u12_ao_test(j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight + array_ref(j,i,l,k) += int2_grad1_u12_ao(j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight + enddo + enddo + enddo + enddo + enddo + enddo + + accu_relat = 0.d0 + accu_abs = 0.d0 + do k = 1, ao_num + do l = 1, ao_num + do i = 1, ao_num + do j = 1, ao_num + contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k)) + accu_abs += contrib + if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then + accu_relat += contrib/dabs(array_ref(j,i,l,k)) + endif + enddo + enddo + enddo + enddo + print*,'******' + print*,'******' + print*,'test_int2_grad1_u12_ao_test' + print*,'accu_abs = ',accu_abs/dble(ao_num)**4 + print*,'accu_relat = ',accu_relat/dble(ao_num)**4 +end diff --git a/src/tools/sort_wf.irp.f b/src/tools/sort_wf.irp.f index 038b24c8..95b1a964 100644 --- a/src/tools/sort_wf.irp.f +++ b/src/tools/sort_wf.irp.f @@ -14,7 +14,7 @@ subroutine routine output=trim(ezfio_filename)//'.wf_sorted' i_unit_output = getUnitAndOpen(output,'w') do i = 1, N_det - write(i_unit_output, *)i,dabs(psi_coef_sorted(i,1)) + write(i_unit_output, *)i,dabs(psi_coef_sorted(i,1))/dabs(psi_coef_sorted(1,1)) enddo end From a38255cec989f639231d3254784787e725961be1 Mon Sep 17 00:00:00 2001 From: eginer Date: Tue, 28 Mar 2023 12:43:15 +0200 Subject: [PATCH 06/16] added Hn.py --- external/qp2-dependencies | 2 +- scripts/Hn.py | 2 +- scripts/get_fci_tc_conv.sh | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) create mode 100755 scripts/get_fci_tc_conv.sh diff --git a/external/qp2-dependencies b/external/qp2-dependencies index 6e23ebac..f40bde09 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit 6e23ebac001acae91d1c762ca934e09a9b7d614a +Subproject commit f40bde0925808bbec0424b57bfcef1b26473a1c8 diff --git a/scripts/Hn.py b/scripts/Hn.py index a4119145..0f938510 100644 --- a/scripts/Hn.py +++ b/scripts/Hn.py @@ -5,7 +5,7 @@ arg = sys.argv #f = open('data_dft','r') n = int(sys.argv[1]) r = float(sys.argv[2]) -f = open('H'+str(n)+'_'+str(r),'w') +f = open('H'+str(n)+'_'+str(r)+'.xyz','w') string=str(n)+"\n" f.write(string) string="\n" diff --git a/scripts/get_fci_tc_conv.sh b/scripts/get_fci_tc_conv.sh new file mode 100755 index 00000000..643f3ac0 --- /dev/null +++ b/scripts/get_fci_tc_conv.sh @@ -0,0 +1,2 @@ +file=$1 +grep "Ndet,E,E+PT2,E+RPT2,|PT2|=" $file | cut -d "=" -f 2 > ${file}.conv_fci_tc From 7e617bee131ccd63c90394e26ae82acf454c166e Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 28 Mar 2023 13:24:28 +0200 Subject: [PATCH 07/16] COLLAPSE --- external/qp2-dependencies | 2 +- src/bi_ort_ints/three_body_ijmk.irp.f | 12 ++++++------ src/bi_ort_ints/three_body_ijmkl.irp.f | 12 ++++++------ 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/external/qp2-dependencies b/external/qp2-dependencies index f40bde09..ce14f57b 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit f40bde0925808bbec0424b57bfcef1b26473a1c8 +Subproject commit ce14f57b50511825a9fedb096749200779d3f4d4 diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f index 853972f7..5afd49ab 100644 --- a/src/bi_ort_ints/three_body_ijmk.irp.f +++ b/src/bi_ort_ints/three_body_ijmk.irp.f @@ -27,7 +27,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,integral) & !$OMP SHARED (mo_num,three_e_4_idx_direct_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -74,7 +74,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort, (mo_num, mo_num !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,integral) & !$OMP SHARED (mo_num,three_e_4_idx_cycle_1_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -121,7 +121,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort, (mo_num, mo_num !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,integral) & !$OMP SHARED (mo_num,three_e_4_idx_cycle_2_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -168,7 +168,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,integral) & !$OMP SHARED (mo_num,three_e_4_idx_exch23_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -214,7 +214,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,integral) & !$OMP SHARED (mo_num,three_e_4_idx_exch13_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -261,7 +261,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,integral) & !$OMP SHARED (mo_num,three_e_4_idx_exch12_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index bd5c4977..ae4c9bd5 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -26,7 +26,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_direct_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -75,7 +75,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_cycle_1_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -124,7 +124,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_cycle_2_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -173,7 +173,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_exch23_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -222,7 +222,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_exch13_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num @@ -271,7 +271,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, !$OMP DEFAULT (NONE) & !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_exch12_bi_ort) - !$OMP DO SCHEDULE (dynamic) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num From 6d7d7ccfb4420556a341dd454caaa759bb721927 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 29 Mar 2023 15:47:59 +0200 Subject: [PATCH 08/16] set the threshold_cycle to 1e-10 by default --- src/bi_ort_ints/total_twoe_pot.irp.f | 1 + src/tc_keywords/EZFIO.cfg | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/bi_ort_ints/total_twoe_pot.irp.f b/src/bi_ort_ints/total_twoe_pot.irp.f index 78047d1b..f5f5959a 100644 --- a/src/bi_ort_ints/total_twoe_pot.irp.f +++ b/src/bi_ort_ints/total_twoe_pot.irp.f @@ -57,6 +57,7 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n PROVIDE ao_tc_sym_two_e_pot_in_map + !!! TODO :: OPENMP do j = 1, ao_num do l = 1, ao_num do i = 1, ao_num diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg index 62b6d2bf..e65a1400 100644 --- a/src/tc_keywords/EZFIO.cfg +++ b/src/tc_keywords/EZFIO.cfg @@ -50,7 +50,7 @@ default: 1.e-5 type: Threshold doc: Thresholds to cycle the integrals with the envelop interface: ezfio,provider,ocaml -default: 1.e-15 +default: 1.e-10 [max_it_dav] type: integer From 504d46f6934e07a7e8ffb6152d3e43e01b9a963e Mon Sep 17 00:00:00 2001 From: eginer Date: Sat, 1 Apr 2023 18:41:47 +0200 Subject: [PATCH 09/16] added print_mos --- external/qp2-dependencies | 2 +- src/kohn_sham/print_mos.irp.f | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 src/kohn_sham/print_mos.irp.f diff --git a/external/qp2-dependencies b/external/qp2-dependencies index ce14f57b..6e23ebac 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit ce14f57b50511825a9fedb096749200779d3f4d4 +Subproject commit 6e23ebac001acae91d1c762ca934e09a9b7d614a diff --git a/src/kohn_sham/print_mos.irp.f b/src/kohn_sham/print_mos.irp.f new file mode 100644 index 00000000..5e728444 --- /dev/null +++ b/src/kohn_sham/print_mos.irp.f @@ -0,0 +1,30 @@ +program print_mos + implicit none + integer :: i,nx + double precision :: r(3), xmax, dx, accu + double precision, allocatable :: mos_array(:) + double precision:: alpha,envelop + allocate(mos_array(mo_num)) + xmax = 5.d0 + nx = 1000 + dx=xmax/dble(nx) + r = 0.d0 + alpha = 0.5d0 + do i = 1, nx + call give_all_mos_at_r(r,mos_array) + accu = mos_array(3)**2+mos_array(4)**2+mos_array(5)**2 + accu = dsqrt(accu) + envelop = (1.d0 - dexp(-alpha * r(3)**2)) + write(33,'(100(F16.10,X))')r(3), mos_array(1), mos_array(2), accu, envelop + r(3) += dx + enddo + +end + +double precision function f_mu(x) + implicit none + double precision, intent(in) :: x + + + +end From 450a80e3078d695c931386044ddcd1451b1eaacd Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 3 Apr 2023 14:32:25 +0200 Subject: [PATCH 10/16] fixed stupid problem in pt_charges --- src/nuclei/write_pt_charges.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/nuclei/write_pt_charges.py b/src/nuclei/write_pt_charges.py index 6dbcd5b8..f5007090 100644 --- a/src/nuclei/write_pt_charges.py +++ b/src/nuclei/write_pt_charges.py @@ -21,7 +21,7 @@ def mv_in_ezfio(ezfio,tmp): os.system(cmdmv) -# Getting the EZFIO + ##Getting the EZFIO EZFIO=sys.argv[1] EZFIO=EZFIO.replace("/", "") print(EZFIO) @@ -66,8 +66,20 @@ zip_in_ezfio(EZFIO,tmp) tmp="pts_charge_coord" fcoord = open(tmp,'w') fcoord.write(" 2\n") -fcoord.write(" "+str(n_charges)+' 3\n') -#fcoord.write(" "+' 3 '+str(n_charges)+' \n') +if(n_charges < 10): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <100): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <1000): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <10000): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <100000): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <1000000): + fcoord.write(" "+str(n_charges)+' 3\n') +elif(n_charges <10000000): + fcoord.write(" "+str(n_charges)+' 3\n') for i in range(n_charges): fcoord.write(' '+coord_x[i]+'\n') for i in range(n_charges): From 04715abc640010ce15e5096b504baa5425bb36ab Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 10 Apr 2023 16:12:32 +0200 Subject: [PATCH 11/16] beginning to work on s2 for TC --- src/tc_bi_ortho/h_tc_s2_u0.irp.f | 739 ++++++++++++++++++ .../{u0_h_u0.irp.f => h_tc_u0.irp.f} | 3 - src/tc_bi_ortho/tc_bi_ortho.irp.f | 1 - src/tc_bi_ortho/tc_h_eigvectors.irp.f | 2 - src/tc_scf/tc_scf.irp.f | 4 +- 5 files changed, 741 insertions(+), 8 deletions(-) create mode 100644 src/tc_bi_ortho/h_tc_s2_u0.irp.f rename src/tc_bi_ortho/{u0_h_u0.irp.f => h_tc_u0.irp.f} (99%) diff --git a/src/tc_bi_ortho/h_tc_s2_u0.irp.f b/src/tc_bi_ortho/h_tc_s2_u0.irp.f new file mode 100644 index 00000000..5a9f5e69 --- /dev/null +++ b/src/tc_bi_ortho/h_tc_s2_u0.irp.f @@ -0,0 +1,739 @@ +subroutine H_tc_s2_u_0_opt(v_0,s_0,u_0,N_st,sze) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $v_0 = H | u_0\rangle$. + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + integer, intent(in) :: N_st,sze + double precision, intent(inout) :: v_0(sze,N_st), u_0(sze,N_st), s_0(sze,N_st) + logical :: do_right + do_right = .True. + call H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right) +end + +subroutine H_tc_s2_dagger_u_0_opt(v_0,s_0,u_0,N_st,sze) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $v_0 = H | u_0\rangle$. + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + integer, intent(in) :: N_st,sze + double precision, intent(inout) :: v_0(sze,N_st), u_0(sze,N_st), s_0(sze,N_st) + logical :: do_right + do_right = .False. + call H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right) +end + + +subroutine H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $v_0 = H | u_0\rangle$. + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + ! + ! if do_right == True then you compute H_TC |Psi>, else H_TC^T |Psi> + END_DOC + integer, intent(in) :: N_st,sze + double precision, intent(inout) :: v_0(sze,N_st), u_0(sze,N_st), s_0(sze,N_st) + logical, intent(in) :: do_right + integer :: k + double precision, allocatable :: u_t(:,:), v_t(:,:), s_t(:,:) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t + allocate(u_t(N_st,N_det),v_t(N_st,N_det),s_t(N_st,N_det)) + do k=1,N_st + call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det) + enddo + v_t = 0.d0 + s_t = 0.d0 + call dtranspose( & + u_0, & + size(u_0, 1), & + u_t, & + size(u_t, 1), & + N_det, N_st) + + call H_tc_s2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,1,N_det,0,1, do_right) + deallocate(u_t) + + call dtranspose( & + v_t, & + size(v_t, 1), & + v_0, & + size(v_0, 1), & + N_st, N_det) + call dtranspose( & + s_t, & + size(s_t, 1), & + s_0, & + size(s_0, 1), & + N_st, N_det) + deallocate(v_t,s_t) + + do k=1,N_st + call dset_order(v_0(1,k),psi_bilinear_matrix_order_reverse,N_det) + call dset_order(s_0(1,k),psi_bilinear_matrix_order_reverse,N_det) + call dset_order(u_0(1,k),psi_bilinear_matrix_order_reverse,N_det) + enddo + +end + + +subroutine H_tc_s2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep, do_right) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $v_t = H | u_t\rangle$ + ! + ! Default should be 1,N_det,0,1 + ! + ! if do_right == True then you compute H_TC |Psi>, else H_TC^T |Psi> + END_DOC + integer, intent(in) :: N_st,sze,istart,iend,ishift,istep + double precision, intent(in) :: u_t(N_st,N_det) + logical, intent(in) :: do_right + double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze) + + + PROVIDE ref_bitmask_energy N_int + + select case (N_int) + case (1) + call H_tc_s2_u_0_nstates_openmp_work_1(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + case (2) + call H_tc_s2_u_0_nstates_openmp_work_2(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + case (3) + call H_tc_s2_u_0_nstates_openmp_work_3(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + case (4) + call H_tc_s2_u_0_nstates_openmp_work_4(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + case default + call H_tc_s2_u_0_nstates_openmp_work_N_int(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + end select +end +BEGIN_TEMPLATE + +subroutine H_tc_s2_u_0_nstates_openmp_work_$N_int(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $v_t = H | u_t \\rangle$ and $s_t = S^2 | u_t\\rangle$ + ! + ! Default should be 1,N_det,0,1 + ! + ! if do_right == True then you compute H_TC |Psi>, else H_TC^T |Psi> + END_DOC + integer, intent(in) :: N_st,sze,istart,iend,ishift,istep + double precision, intent(in) :: u_t(N_st,N_det) + logical, intent(in) :: do_right + double precision, intent(out) :: v_t(N_st,sze), s_t(N_st,sze) + + double precision :: hij, sij + integer :: i,j,k,l,kk + integer :: k_a, k_b, l_a, l_b, m_a, m_b + integer :: istate + integer :: krow, kcol, krow_b, kcol_b + integer :: lrow, lcol + integer :: mrow, mcol + integer(bit_kind) :: spindet($N_int) + integer(bit_kind) :: tmp_det($N_int,2) + integer(bit_kind) :: tmp_det2($N_int,2) + integer(bit_kind) :: tmp_det3($N_int,2) + integer(bit_kind), allocatable :: buffer(:,:) + integer :: n_doubles + integer, allocatable :: doubles(:) + integer, allocatable :: singles_a(:) + integer, allocatable :: singles_b(:) + integer, allocatable :: idx(:), idx0(:) + integer :: maxab, n_singles_a, n_singles_b, kcol_prev + integer*8 :: k8 + logical :: compute_singles + integer*8 :: last_found, left, right, right_max + double precision :: rss, mem, ratio + double precision, allocatable :: utl(:,:) + integer, parameter :: block_size=128 + logical :: u_is_sparse + +! call resident_memory(rss) +! mem = dble(singles_beta_csc_size) / 1024.d0**3 +! +! compute_singles = (mem+rss > qp_max_mem) +! +! if (.not.compute_singles) then +! provide singles_beta_csc +! endif +compute_singles=.True. + + + maxab = max(N_det_alpha_unique, N_det_beta_unique)+1 + allocate(idx0(maxab)) + + do i=1,maxab + idx0(i) = i + enddo + + ! Prepare the array of all alpha single excitations + ! ------------------------------------------------- + + PROVIDE N_int nthreads_davidson + !$OMP PARALLEL DEFAULT(SHARED) NUM_THREADS(nthreads_davidson) & + !$OMP SHARED(psi_bilinear_matrix_rows, N_det, & + !$OMP psi_bilinear_matrix_columns, & + !$OMP psi_det_alpha_unique, psi_det_beta_unique, & + !$OMP n_det_alpha_unique, n_det_beta_unique, N_int, & + !$OMP psi_bilinear_matrix_transp_rows, & + !$OMP psi_bilinear_matrix_transp_columns, & + !$OMP psi_bilinear_matrix_transp_order, N_st, & + !$OMP psi_bilinear_matrix_order_transp_reverse, & + !$OMP psi_bilinear_matrix_columns_loc, & + !$OMP psi_bilinear_matrix_transp_rows_loc, & + !$OMP istart, iend, istep, irp_here, v_t, s_t, & + !$OMP ishift, idx0, u_t, maxab, compute_singles, & + !$OMP singles_alpha_csc,singles_alpha_csc_idx, & + !$OMP singles_beta_csc,singles_beta_csc_idx) & + !$OMP PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i, & + !$OMP lcol, lrow, l_a, l_b, utl, kk, u_is_sparse, & + !$OMP buffer, doubles, n_doubles, umax, & + !$OMP tmp_det2, hij, sij, idx, l, kcol_prev,hmono, htwoe, hthree, & + !$OMP singles_a, n_singles_a, singles_b, ratio, & + !$OMP n_singles_b, k8, last_found,left,right,right_max) + + ! Alpha/Beta double excitations + ! ============================= + + allocate( buffer($N_int,maxab), & + singles_a(maxab), & + singles_b(maxab), & + doubles(maxab), & + idx(maxab), utl(N_st,block_size)) + + kcol_prev=-1 + + ! Check if u has multiple zeros + kk=1 ! Avoid division by zero + !$OMP DO + do k=1,N_det + umax = 0.d0 + do l=1,N_st + umax = max(umax, dabs(u_t(l,k))) + enddo + if (umax < 1.d-20) then + !$OMP ATOMIC + kk = kk+1 + endif + enddo + !$OMP END DO + u_is_sparse = N_det / kk < 20 ! 5% + + ASSERT (iend <= N_det) + ASSERT (istart > 0) + ASSERT (istep > 0) + + !$OMP DO SCHEDULE(guided,64) + do k_a=istart+ishift,iend,istep ! Loop over all determinants (/!\ not in psidet order) + + krow = psi_bilinear_matrix_rows(k_a) ! Index of alpha part of determinant k_a + ASSERT (krow <= N_det_alpha_unique) + + kcol = psi_bilinear_matrix_columns(k_a) ! Index of beta part of determinant k_a + ASSERT (kcol <= N_det_beta_unique) + + tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + + if (kcol /= kcol_prev) then + tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + if (compute_singles) then + call get_all_spin_singles_$N_int( & + psi_det_beta_unique, idx0, & + tmp_det(1,2), N_det_beta_unique, & + singles_b, n_singles_b) + else + n_singles_b = 0 + !DIR$ LOOP COUNT avg(1000) + do k8=singles_beta_csc_idx(kcol),singles_beta_csc_idx(kcol+1)-1 + n_singles_b = n_singles_b+1 + singles_b(n_singles_b) = singles_beta_csc(k8) + enddo + endif + endif + kcol_prev = kcol + + ! -> Here, tmp_det is determinant k_a + + ! Loop over singly excited beta columns + ! ------------------------------------- + + !DIR$ LOOP COUNT avg(1000) + do i=1,n_singles_b + lcol = singles_b(i) + + tmp_det2(1:$N_int,2) = psi_det_beta_unique(1:$N_int, lcol) + + ! tmp_det2 is a single excitation of tmp_det in the beta spin + ! the alpha part is not defined yet + +!--- +! if (compute_singles) then + + l_a = psi_bilinear_matrix_columns_loc(lcol) + ASSERT (l_a <= N_det) + ! rows : | 1 2 3 4 | 1 3 4 6 | .... | 1 2 4 5 | + ! cols : | 1 1 1 1 | 2 2 2 2 | .... | 8 8 8 8 | + ! index : | 1 2 3 4 | 5 6 7 8 | .... | 58 59 60 61 | + ! ^ ^ + ! | | + ! l_a N_det + ! l_a is the index in the big vector os size Ndet of the position of the first element of column lcol + + ! Below we identify all the determinants with the same beta part + + !DIR$ UNROLL(8) + !DIR$ LOOP COUNT avg(50000) + do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + buffer(1:$N_int,j) = psi_det_alpha_unique(1:$N_int, lrow) ! hot spot + + ASSERT (l_a <= N_det) + idx(j) = l_a + l_a = l_a+1 + enddo + j = j-1 + + ! Get all single excitations from tmp_det(1,1) to buffer(1,?) + + call get_all_spin_singles_$N_int( & + buffer, idx, tmp_det(1,1), j, & + singles_a, n_singles_a ) + + ! Loop over alpha singles + ! ----------------------- + + double precision :: umax + + !DIR$ LOOP COUNT avg(1000) + do k = 1,n_singles_a,block_size + umax = 0.d0 + ! Prefetch u_t(:,l_a) + if (u_is_sparse) then + do kk=0,block_size-1 + if (k+kk > n_singles_a) exit + l_a = singles_a(k+kk) + ASSERT (l_a <= N_det) + + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (k+kk > n_singles_a) exit + l_a = singles_a(k+kk) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (k+kk > n_singles_a) exit + l_a = singles_a(k+kk) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow) +! call i_H_j( tmp_det, tmp_det2, $N_int, hij) ! double alpha-beta + if(do_right)then + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij) + else + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij) + endif + call get_s2(tmp_det,tmp_det2,$N_int,sij) + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + s_t(l,k_a) = s_t(l,k_a) + sij * utl(l,kk+1) + enddo + enddo + enddo + + enddo + + enddo + !$OMP END DO + + !$OMP DO SCHEDULE(guided,64) + do k_a=istart+ishift,iend,istep + + + ! Single and double alpha excitations + ! =================================== + + + ! Initial determinant is at k_a in alpha-major representation + ! ----------------------------------------------------------------------- + + krow = psi_bilinear_matrix_rows(k_a) + ASSERT (krow <= N_det_alpha_unique) + + kcol = psi_bilinear_matrix_columns(k_a) + ASSERT (kcol <= N_det_beta_unique) + + tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + + ! Initial determinant is at k_b in beta-major representation + ! ---------------------------------------------------------------------- + + k_b = psi_bilinear_matrix_order_transp_reverse(k_a) + ASSERT (k_b <= N_det) + + spindet(1:$N_int) = tmp_det(1:$N_int,1) + + ! Loop inside the beta column to gather all the connected alphas + lcol = psi_bilinear_matrix_columns(k_a) + l_a = psi_bilinear_matrix_columns_loc(lcol) + + !DIR$ LOOP COUNT avg(200000) + do i=1,N_det_alpha_unique + if (l_a > N_det) exit + lcol = psi_bilinear_matrix_columns(l_a) + if (lcol /= kcol) exit + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + buffer(1:$N_int,i) = psi_det_alpha_unique(1:$N_int, lrow) ! Hot spot + idx(i) = l_a + l_a = l_a+1 + enddo + i = i-1 + + call get_all_spin_singles_and_doubles_$N_int( & + buffer, idx, spindet, i, & + singles_a, doubles, n_singles_a, n_doubles ) + + ! Compute Hij for all alpha singles + ! ---------------------------------- + + tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + !DIR$ LOOP COUNT avg(1000) + do i=1,n_singles_a,block_size + umax = 0.d0 + ! Prefetch u_t(:,l_a) + if (u_is_sparse) then + do kk=0,block_size-1 + if (i+kk > n_singles_a) exit + l_a = singles_a(i+kk) + ASSERT (l_a <= N_det) + + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (i+kk > n_singles_a) exit + l_a = singles_a(i+kk) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (i+kk > n_singles_a) exit + l_a = singles_a(i+kk) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow) +! call i_h_j_single_spin( tmp_det, tmp_det2, $N_int, 1, hij) + if(do_right)then + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij) + else + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij) + endif + + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + enddo + enddo + enddo + + + ! Compute Hij for all alpha doubles + ! ---------------------------------- + + !DIR$ LOOP COUNT avg(50000) + do i=1,n_doubles,block_size + umax = 0.d0 + ! Prefetch u_t(:,l_a) + if (u_is_sparse) then + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_a = doubles(i+kk) + ASSERT (l_a <= N_det) + + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_a = doubles(i+kk) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_a = doubles(i+kk) + lrow = psi_bilinear_matrix_rows(l_a) + ASSERT (lrow <= N_det_alpha_unique) + + tmp_det2(1:N_int,1) = psi_det_alpha_unique(1:N_int, lrow) +! call i_H_j( tmp_det, tmp_det2, $N_int, hij) +! call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, lrow), $N_int, hij) + if(do_right)then + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij) + else + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij) + endif + + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + enddo + enddo + enddo + + + ! Single and double beta excitations + ! ================================== + + + ! Initial determinant is at k_a in alpha-major representation + ! ----------------------------------------------------------------------- + + krow = psi_bilinear_matrix_rows(k_a) + kcol = psi_bilinear_matrix_columns(k_a) + + tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + + spindet(1:$N_int) = tmp_det(1:$N_int,2) + + ! Initial determinant is at k_b in beta-major representation + ! ----------------------------------------------------------------------- + + k_b = psi_bilinear_matrix_order_transp_reverse(k_a) + ASSERT (k_b <= N_det) + + ! Loop inside the alpha row to gather all the connected betas + lrow = psi_bilinear_matrix_transp_rows(k_b) + l_b = psi_bilinear_matrix_transp_rows_loc(lrow) + !DIR$ LOOP COUNT avg(200000) + do i=1,N_det_beta_unique + if (l_b > N_det) exit + lrow = psi_bilinear_matrix_transp_rows(l_b) + if (lrow /= krow) exit + lcol = psi_bilinear_matrix_transp_columns(l_b) + ASSERT (lcol <= N_det_beta_unique) + + buffer(1:$N_int,i) = psi_det_beta_unique(1:$N_int, lcol) + idx(i) = l_b + l_b = l_b+1 + enddo + i = i-1 + + call get_all_spin_singles_and_doubles_$N_int( & + buffer, idx, spindet, i, & + singles_b, doubles, n_singles_b, n_doubles ) + + ! Compute Hij for all beta singles + ! ---------------------------------- + + tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + !DIR$ LOOP COUNT avg(1000) + do i=1,n_singles_b,block_size + umax = 0.d0 + if (u_is_sparse) then + do kk=0,block_size-1 + if (i+kk > n_singles_b) exit + l_b = singles_b(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + ASSERT (l_b <= N_det) + ASSERT (l_a <= N_det) + + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (i+kk > n_singles_b) exit + l_b = singles_b(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + ASSERT (l_b <= N_det) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (i+kk > n_singles_b) exit + l_b = singles_b(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + lcol = psi_bilinear_matrix_transp_columns(l_b) + ASSERT (lcol <= N_det_beta_unique) + + tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, lcol) +! call i_H_j_single_spin( tmp_det, tmp_det2, $N_int, 2, hij) + if(do_right)then + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij) + else + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij) + endif + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + enddo + enddo + enddo + + ! Compute Hij for all beta doubles + ! ---------------------------------- + + !DIR$ LOOP COUNT avg(50000) + do i=1,n_doubles,block_size + umax = 0.d0 + if (u_is_sparse) then + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_b = doubles(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + ASSERT (l_b <= N_det) + ASSERT (l_a <= N_det) + do l=1,N_st + utl(l,kk+1) = u_t(l,l_a) + umax = max(umax, dabs(utl(l,kk+1))) + enddo + enddo + else + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_b = doubles(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + ASSERT (l_b <= N_det) + ASSERT (l_a <= N_det) + utl(:,kk+1) = u_t(:,l_a) + enddo + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + do kk=0,block_size-1 + if (i+kk > n_doubles) exit + l_b = doubles(i+kk) + l_a = psi_bilinear_matrix_transp_order(l_b) + lcol = psi_bilinear_matrix_transp_columns(l_b) + ASSERT (lcol <= N_det_beta_unique) + + tmp_det2(1:N_int,2) = psi_det_beta_unique(1:N_int, lcol) +! call i_H_j( tmp_det, tmp_det2, $N_int, hij) +! call i_H_j_double_spin( tmp_det(1,2), psi_det_beta_unique(1, lcol), $N_int, hij) + if(do_right)then + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij) + else + call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij) + endif + + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1) + enddo + enddo + enddo + + + ! Diagonal contribution + ! ===================== + + + ! Initial determinant is at k_a in alpha-major representation + ! ----------------------------------------------------------------------- + + if (u_is_sparse) then + umax = 0.d0 + do l=1,N_st + umax = max(umax, dabs(u_t(l,k_a))) + enddo + else + umax = 1.d0 + endif + if (umax < 1.d-20) cycle + + krow = psi_bilinear_matrix_rows(k_a) + ASSERT (krow <= N_det_alpha_unique) + + kcol = psi_bilinear_matrix_columns(k_a) + ASSERT (kcol <= N_det_beta_unique) + + tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow) + tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol) + + double precision, external :: diag_H_mat_elem + double precision :: hmono, htwoe, hthree + +! hij = diag_H_mat_elem(tmp_det,$N_int) + call diag_htilde_mu_mat_fock_bi_ortho ($N_int, tmp_det, hmono, htwoe, hthree, hij) + call get_s2(tmp_det,tmp_det,$N_int,sij) + + !DIR$ LOOP COUNT AVG(4) + do l=1,N_st + v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,k_a) + s_t(l,k_a) = s_t(l,k_a) + sij * u_t(l,k_a) + enddo + + end do + !$OMP END DO + deallocate(buffer, singles_a, singles_b, doubles, idx, utl) + !$OMP END PARALLEL + +end + +SUBST [ N_int ] + +1;; +2;; +3;; +4;; +N_int;; + +END_TEMPLATE + + diff --git a/src/tc_bi_ortho/u0_h_u0.irp.f b/src/tc_bi_ortho/h_tc_u0.irp.f similarity index 99% rename from src/tc_bi_ortho/u0_h_u0.irp.f rename to src/tc_bi_ortho/h_tc_u0.irp.f index e107ad88..5e6150ea 100644 --- a/src/tc_bi_ortho/u0_h_u0.irp.f +++ b/src/tc_bi_ortho/h_tc_u0.irp.f @@ -93,9 +93,6 @@ subroutine H_tc_u_0_nstates_openmp(v_0,u_0,N_st,sze, do_right) double precision, allocatable :: u_t(:,:), v_t(:,:) !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t allocate(u_t(N_st,N_det),v_t(N_st,N_det)) -! provide mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e -! provide ref_tc_energy_tot fock_op_2_e_tc_closed_shell -! provide eff_2_e_from_3_e_ab eff_2_e_from_3_e_aa eff_2_e_from_3_e_bb do k=1,N_st call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det) enddo diff --git a/src/tc_bi_ortho/tc_bi_ortho.irp.f b/src/tc_bi_ortho/tc_bi_ortho.irp.f index 2d51f6f0..bd0b1ef5 100644 --- a/src/tc_bi_ortho/tc_bi_ortho.irp.f +++ b/src/tc_bi_ortho/tc_bi_ortho.irp.f @@ -3,7 +3,6 @@ program tc_bi_ortho BEGIN_DOC ! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together with the energy. Saves the left-right wave functions at the end. END_DOC - print *, 'Hello world' my_grid_becke = .True. my_n_pt_r_grid = 30 my_n_pt_a_grid = 50 diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index f2cbb637..11a14b41 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -25,8 +25,6 @@ subroutine diagonalize_CI_tc psi_r_coef_bi_ortho(i,j) = reigvec_tc_bi_orth(i,j) enddo enddo -! psi_energy(1:N_states) = CI_electronic_energy(1:N_states) -! psi_s2(1:N_states) = CI_s2(1:N_states) SOFT_TOUCH psi_l_coef_bi_ortho psi_r_coef_bi_ortho end diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f index ae3b609b..85389f30 100644 --- a/src/tc_scf/tc_scf.irp.f +++ b/src/tc_scf/tc_scf.irp.f @@ -11,8 +11,8 @@ program tc_scf print *, ' starting ...' my_grid_becke = .True. - my_n_pt_r_grid = 60 - my_n_pt_a_grid = 110 + my_n_pt_r_grid = 30 + my_n_pt_a_grid = 50 ! my_n_pt_r_grid = 10 ! small grid for quick debug ! my_n_pt_a_grid = 26 ! small grid for quick debug touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid From 042159a13497dc73129e8360cd6b47611acb5c4f Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 10 Apr 2023 16:50:55 +0200 Subject: [PATCH 12/16] added h_p in davidson diagonalization hS2 --- src/davidson/diagonalization_hs2_dressed.irp.f | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index d37b7386..8117f320 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -465,7 +465,8 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ integer :: lwork, info double precision, allocatable :: work(:) - y = h +! y = h + y = h_p lwork = -1 allocate(work(1)) call dsygv(1,'V','U',shift2,y,size(y,1), & From fbe8c4b60f349322ab8deb2b34007c585ed5587b Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 10 Apr 2023 17:53:09 +0200 Subject: [PATCH 13/16] working on S2 for TC: davidson with S2 penalty seems to work --- src/tc_bi_ortho/dav_h_tc_s2.irp.f | 547 ++++++++++++++++++++++++++++++ src/tc_bi_ortho/test_s2_tc.irp.f | 157 +++++++++ 2 files changed, 704 insertions(+) create mode 100644 src/tc_bi_ortho/dav_h_tc_s2.irp.f create mode 100644 src/tc_bi_ortho/test_s2_tc.irp.f diff --git a/src/tc_bi_ortho/dav_h_tc_s2.irp.f b/src/tc_bi_ortho/dav_h_tc_s2.irp.f new file mode 100644 index 00000000..02aa712b --- /dev/null +++ b/src/tc_bi_ortho/dav_h_tc_s2.irp.f @@ -0,0 +1,547 @@ + +! --- + +subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, energies, sze, N_st, N_st_diag_in, converged, hcalc) + + use mmap_module + + BEGIN_DOC + ! Generic modified-Davidson diagonalization + ! + ! H_jj : specific diagonal H matrix elements to diagonalize de Davidson + ! + ! u_in : guess coefficients on the various states. Overwritten on exit by right eigenvectors + ! + ! sze : Number of determinants + ! + ! N_st : Number of eigenstates + ! + ! N_st_diag_in : Number of states in which H is diagonalized. Assumed > N_st + ! + ! Initial guess vectors are not necessarily orthonormal + ! + ! hcalc subroutine to compute W = H U (see routine hcalc_template for template of input/output) + END_DOC + + implicit none + + integer, intent(in) :: sze, N_st, N_st_diag_in + double precision, intent(in) :: H_jj(sze) + logical, intent(inout) :: converged + double precision, intent(inout) :: u_in(sze,N_st_diag_in) + double precision, intent(out) :: energies(N_st) + external hcalc + + character*(16384) :: write_buffer + integer :: iter, N_st_diag + integer :: i, j, k, l, m + integer :: iter2, itertot + logical :: disk_based + integer :: shift, shift2, itermax + integer :: nproc_target + integer :: order(N_st_diag_in) + double precision :: to_print(3,N_st) + double precision :: r1, r2, alpha + double precision :: cpu, wall + double precision :: cmax + double precision :: energy_shift(N_st_diag_in*davidson_sze_max) + double precision, allocatable :: U(:,:) + double precision, allocatable :: y(:,:), h(:,:), lambda(:), h_p(:,:), s2(:) + real, allocatable :: y_s(:,:) + double precision, allocatable :: s_(:,:), s_tmp(:,:) + double precision, allocatable :: residual_norm(:) + + double precision :: lambda_tmp + integer, allocatable :: i_omax(:) + double precision, allocatable :: U_tmp(:), overlap(:), S_d(:,:) + + double precision, allocatable :: W(:,:) + real, pointer :: S(:,:) + + !double precision, pointer :: W(:,:) + double precision, external :: u_dot_v, u_dot_u + + + include 'constants.include.F' + + N_st_diag = N_st_diag_in +! print*,'trial vector' + do i = 1, sze + if(isnan(u_in(i,1)))then + print*,'pb in input vector of davidson_general_ext_rout_nonsym_b1space' + print*,i,u_in(i,1) + stop + else if (dabs(u_in(i,1)).lt.1.d-16)then + u_in(i,1) = 0.d0 + endif + enddo + + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, y_s, S_d, h, lambda + + if(N_st_diag*3 > sze) then + print *, 'error in Davidson :' + print *, 'Increase n_det_max_full to ', N_st_diag*3 + stop -1 + endif + + itermax = max(2, min(davidson_sze_max, sze/N_st_diag)) + 1 + + provide threshold_nonsym_davidson + call write_time(6) + write(6,'(A)') '' + write(6,'(A)') 'Davidson Diagonalization' + write(6,'(A)') '------------------------' + write(6,'(A)') '' + + + ! Find max number of cores to fit in memory + ! ----------------------------------------- + + nproc_target = nproc + double precision :: rss + integer :: maxab + maxab = sze + + m=1 + disk_based = .False. + call resident_memory(rss) + do + r1 = 8.d0 * &! bytes + ( dble(sze)*(N_st_diag*itermax) &! U + + 1.5d0*dble(sze*m)*(N_st_diag*itermax) &! W, S + + 4.5d0*(N_st_diag*itermax)**2 &! h,y,y_s,s_, s_tmp + + 2.d0*(N_st_diag*itermax) &! s2,lambda + + 1.d0*(N_st_diag) &! residual_norm + ! In H_S2_u_0_nstates_zmq + + 3.d0*(N_st_diag*N_det) &! u_t, v_t, s_t on collector + + 3.d0*(N_st_diag*N_det) &! u_t, v_t, s_t on slave + + 0.5d0*maxab &! idx0 in H_S2_u_0_nstates_openmp_work_* + + nproc_target * &! In OMP section + ( 1.d0*(N_int*maxab) &! buffer + + 3.5d0*(maxab) ) &! singles_a, singles_b, doubles, idx + ) / 1024.d0**3 + + if(nproc_target == 0) then + call check_mem(r1, irp_here) + nproc_target = 1 + exit + endif + + if(r1+rss < qp_max_mem) then + exit + endif + + if(itermax > 4) then + itermax = itermax - 1 +! else if (m==1.and.disk_based_davidson) then +! m = 0 +! disk_based = .True. +! itermax = 6 + else + nproc_target = nproc_target - 1 + endif + + enddo + + nthreads_davidson = nproc_target + TOUCH nthreads_davidson + + call write_int(6, N_st, 'Number of states') + call write_int(6, N_st_diag, 'Number of states in diagonalization') + call write_int(6, sze, 'Number of basis functions') + call write_int(6, nproc_target, 'Number of threads for diagonalization') + call write_double(6, r1, 'Memory(Gb)') + if(disk_based) then + print *, 'Using swap space to reduce RAM' + endif + + !--------------- + + write(6,'(A)') '' + write_buffer = '=====' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ =========== ===========' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + write_buffer = 'Iter' + do i=1,N_st + write_buffer = trim(write_buffer)//' Energy S^2 Residual ' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + write_buffer = '=====' + do i=1,N_st + write_buffer = trim(write_buffer)//' ================ =========== ===========' + enddo + write(6,'(A)') write_buffer(1:6+41*N_st) + + + ! --- + + + allocate( W(sze,N_st_diag*itermax), S(sze,N_st_diag*itermax) ) + + allocate( & + ! Large + U(sze,N_st_diag*itermax), & + S_d(sze,N_st_diag), & + + ! Small + h(N_st_diag*itermax,N_st_diag*itermax), & + h_p(N_st_diag*itermax,N_st_diag*itermax), & + y(N_st_diag*itermax,N_st_diag*itermax), & + s_(N_st_diag*itermax,N_st_diag*itermax), & + s_tmp(N_st_diag*itermax,N_st_diag*itermax), & + lambda(N_st_diag*itermax), & + residual_norm(N_st_diag), & + i_omax(N_st), & + s2(N_st_diag*itermax), & + y_s(N_st_diag*itermax,N_st_diag*itermax) & + ) + + U = 0.d0 + h = 0.d0 + y = 0.d0 + s_ = 0.d0 + s_tmp = 0.d0 + + lambda = 0.d0 + residual_norm = 0.d0 + + + ASSERT (N_st > 0) + ASSERT (N_st_diag >= N_st) + ASSERT (sze > 0) + + ! Davidson iterations + ! =================== + + converged = .False. + + ! Initialize from N_st to N_st_diag with gaussian random numbers + ! to be sure to have overlap with any eigenvectors + do k = N_st+1, N_st_diag + u_in(k,k) = 10.d0 + do i = 1, sze + call random_number(r1) + call random_number(r2) + r1 = dsqrt(-2.d0*dlog(r1)) + r2 = dtwo_pi*r2 + u_in(i,k) = r1*dcos(r2) + enddo + enddo + ! Normalize all states + do k = 1, N_st_diag + call normalize(u_in(1,k), sze) + enddo + + ! Copy from the guess input "u_in" to the working vectors "U" + do k = 1, N_st_diag + do i = 1, sze + U(i,k) = u_in(i,k) + enddo + enddo + + ! --- + + itertot = 0 + + do while (.not.converged) + + itertot = itertot + 1 + if(itertot == 8) then + exit + endif + + do iter = 1, itermax-1 + + shift = N_st_diag * (iter-1) + shift2 = N_st_diag * iter + + if( (iter > 1) .or. (itertot == 1) ) then + + ! Gram-Schmidt to orthogonalize all new guess with the previous vectors + call ortho_qr(U, size(U, 1), sze, shift2) + call ortho_qr(U, size(U, 1), sze, shift2) + + ! W = H U +! call hcalc(W(1,shift+1), U(1,shift+1), N_st_diag, sze) + call hcalc(W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze) + S(1:sze,shift+1:shift+N_st_diag) = real(S_d(1:sze,1:N_st_diag)) + else + + ! Already computed in update below + continue + endif + ! Compute s_kl = = + ! ------------------------------------------- + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j,k) COLLAPSE(2) + do j=1,shift2 + do i=1,shift2 + s_(i,j) = 0.d0 + do k=1,sze + s_(i,j) = s_(i,j) + U(k,i) * dble(S(k,j)) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + + ! Compute h_kl = = + ! ------------------------------------------- + call dgemm( 'T', 'N', shift2, shift2, sze, 1.d0 & + , U, size(U, 1), W, size(W, 1) & + , 0.d0, h, size(h, 1) ) + ! Penalty method + ! -------------- + + if (s2_eig) then + h_p = s_ + do k=1,shift2 + h_p(k,k) = h_p(k,k) - expected_s2 + enddo + if (only_expected_s2) then + alpha = 0.1d0 + h_p = h + alpha*h_p + else + alpha = 0.0001d0 + h_p = h + alpha*h_p + endif + else + h_p = h + alpha = 0.d0 + endif + + ! Diagonalize h y = lambda y + ! --------------------------- + call diag_nonsym_right(shift2, h_p(1,1), size(h_p, 1), y(1,1), size(y, 1), lambda(1), size(lambda, 1)) + + do k = 1, N_st_diag +! print*,'lambda(k) before = ',lambda(k) + lambda(k) = 0.d0 + do l = 1, shift2 + do m = 1, shift2 + lambda(k) += y(m,k) * h(m,l) * y(l,k) + enddo + enddo +! print*,'lambda(k) new = ',lambda(k) + enddo + ! Compute S2 for each eigenvector + ! ------------------------------- + + call dgemm('N','N',shift2,shift2,shift2, & + 1.d0, s_, size(s_,1), y, size(y,1), & + 0.d0, s_tmp, size(s_tmp,1)) + + call dgemm('T','N',shift2,shift2,shift2, & + 1.d0, y, size(y,1), s_tmp, size(s_tmp,1), & + 0.d0, s_, size(s_,1)) + + do k=1,shift2 + s2(k) = s_(k,k) + enddo + + ! Express eigenvectors of h in the determinant basis: + ! --------------------------------------------------- + + ! y(:,k) = rk + ! U(:,k) = Bk + ! U(:,shift2+k) = Rk = Bk x rk + call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 & + , U, size(U, 1), y, size(y, 1) & + , 0.d0, U(1,shift2+1), size(U, 1) ) + + do k = 1, N_st_diag + call normalize(U(1,shift2+k), sze) + enddo + + ! --- + ! select the max overlap + + ! + ! start test ------------------------------------------------------------------------ + ! + !double precision, allocatable :: Utest(:,:), Otest(:) + !allocate( Utest(sze,shift2), Otest(shift2) ) + + !call dgemm( 'N', 'N', sze, shift2, shift2, 1.d0 & + ! , U, size(U, 1), y, size(y, 1), 0.d0, Utest(1,1), size(Utest, 1) ) + !do k = 1, shift2 + ! call normalize(Utest(1,k), sze) + !enddo + !do j = 1, sze + ! write(455, '(100(1X, F16.10))') (Utest(j,k), k=1,shift2) + !enddo + + !do k = 1, shift2 + ! Otest(k) = 0.d0 + ! do i = 1, sze + ! Otest(k) += Utest(i,k) * u_in(i,1) + ! enddo + ! Otest(k) = dabs(Otest(k)) + ! print *, ' Otest =', k, Otest(k), lambda(k) + !enddo + + !deallocate(Utest, Otest) + ! + ! end test ------------------------------------------------------------------------ + ! + + ! TODO + ! state_following is more efficient + do l = 1, N_st + + allocate( overlap(N_st_diag) ) + + do k = 1, N_st_diag + overlap(k) = 0.d0 + do i = 1, sze + overlap(k) = overlap(k) + U(i,shift2+k) * u_in(i,l) + enddo + overlap(k) = dabs(overlap(k)) + !print *, ' overlap =', k, overlap(k) + enddo + + lambda_tmp = 0.d0 + do k = 1, N_st_diag + if(overlap(k) .gt. lambda_tmp) then + i_omax(l) = k + lambda_tmp = overlap(k) + endif + enddo + + deallocate(overlap) + + if(lambda_tmp .lt. 0.7d0) then + print *, ' very small overlap ...', l, i_omax(l) + print *, ' max overlap = ', lambda_tmp + stop + endif + + if(i_omax(l) .ne. l) then + print *, ' !!! WARNONG !!!' + print *, ' index of state', l, i_omax(l) + endif + enddo + + ! y(:,k) = rk + ! W(:,k) = H x Bk + ! W(:,shift2+k) = H x Bk x rk + ! = Wk + call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 & + , W, size(W, 1), y, size(y, 1) & + , 0.d0, W(1,shift2+1), size(W, 1) ) + + ! --- + + ! Compute residual vector and davidson step + ! ----------------------------------------- + + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k) + do k = 1, N_st_diag + do i = 1, sze + U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k)) / max(H_jj(i)-lambda(k), 1.d-2) + enddo + if(k <= N_st) then + l = k + residual_norm(k) = u_dot_u(U(1,shift2+l), sze) + to_print(1,k) = lambda(l) + to_print(2,k) = s2(l) + to_print(3,k) = residual_norm(l) + endif + enddo + !$OMP END PARALLEL DO + !residual_norm(1) = u_dot_u(U(1,shift2+1), sze) + !to_print(1,1) = lambda(1) + !to_print(2,1) = residual_norm(1) + + + if( (itertot > 1) .and. (iter == 1) ) then + !don't print + continue + else + write(*, '(1X, I3, 1X, 100(1X, F16.10, 1X, F16.10, 1X, F16.10))') iter-1, to_print(1:3,1:N_st) + endif + + ! Check convergence + if(iter > 1) then + converged = dabs(maxval(residual_norm(1:N_st))) < threshold_nonsym_davidson + endif + + do k = 1, N_st + if(residual_norm(k) > 1.e8) then + print *, 'Davidson failed' + stop -1 + endif + enddo + if(converged) then + exit + endif + + logical, external :: qp_stop + if(qp_stop()) then + converged = .True. + exit + endif + + enddo ! loop over iter + + + ! Re-contract U and update W + ! -------------------------------- + + call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 & + , W, size(W, 1), y, size(y, 1) & + , 0.d0, u_in, size(u_in, 1) ) + do k = 1, N_st_diag + do i = 1, sze + W(i,k) = u_in(i,k) + enddo + enddo + + call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 & + , U, size(U, 1), y, size(y, 1) & + , 0.d0, u_in, size(u_in, 1) ) + do k = 1, N_st_diag + do i = 1, sze + U(i,k) = u_in(i,k) + enddo + enddo + + call ortho_qr(U, size(U, 1), sze, N_st_diag) + call ortho_qr(U, size(U, 1), sze, N_st_diag) + do j = 1, N_st_diag + k = 1 + do while( (k < sze) .and. (U(k,j) == 0.d0) ) + k = k+1 + enddo + if(U(k,j) * u_in(k,j) < 0.d0) then + do i = 1, sze + W(i,j) = -W(i,j) + enddo + endif + enddo + + enddo ! loop over while + + ! --- + + do k = 1, N_st + energies(k) = lambda(k) + enddo + write_buffer = '=====' + do i = 1, N_st + write_buffer = trim(write_buffer)//' ================ ===========' + enddo + write(6,'(A)') trim(write_buffer) + write(6,'(A)') '' + call write_time(6) + + deallocate(W) + deallocate(U, h, y, lambda, residual_norm, i_omax) + + FREE nthreads_davidson + +end subroutine davidson_general_ext_rout_nonsym_b1space + +! --- diff --git a/src/tc_bi_ortho/test_s2_tc.irp.f b/src/tc_bi_ortho/test_s2_tc.irp.f new file mode 100644 index 00000000..a5241fe3 --- /dev/null +++ b/src/tc_bi_ortho/test_s2_tc.irp.f @@ -0,0 +1,157 @@ +program test_tc + implicit none + read_wf = .True. + my_grid_becke = .True. + my_n_pt_r_grid = 30 + my_n_pt_a_grid = 50 + read_wf = .True. + touch read_wf + touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid + call routine_test_s2 + call routine_test_s2_davidson +end + +subroutine routine_test_s2 + implicit none + logical :: do_right + integer :: sze ,i, N_st, j + double precision :: sij, accu_e, accu_s, accu_e_0, accu_s_0 + double precision, allocatable :: v_0_ref(:,:),u_0(:,:),s_0_ref(:,:) + double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) + sze = N_det + N_st = 1 + allocate(v_0_ref(N_det,1),u_0(N_det,1),s_0_ref(N_det,1),s_0_new(N_det,1),v_0_new(N_det,1)) + print*,'Checking first the Left ' + do_right = .False. + do i = 1, sze + u_0(i,1) = psi_l_coef_bi_ortho(i,1) + enddo + call H_tc_u_0_nstates_openmp(v_0_ref,u_0,N_st,sze, do_right) + s_0_ref = 0.d0 + do i = 1, sze + do j = 1, sze + call get_s2(psi_det(1,1,i),psi_det(1,1,j),N_int,sij) + s_0_ref(i,1) += u_0(j,1) * sij + enddo + enddo + call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,u_0,N_st,sze, do_right) + accu_e = 0.d0 + accu_s = 0.d0 + accu_e_0 = 0.d0 + accu_s_0 = 0.d0 + do i = 1, sze + accu_e_0 += v_0_ref(i,1) * psi_r_coef_bi_ortho(i,1) + accu_s_0 += s_0_ref(i,1) * psi_r_coef_bi_ortho(i,1) + accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) + accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) + enddo + print*,'accu_e = ',accu_e + print*,'accu_s = ',accu_s + print*,'accu_e_0 = ',accu_e_0 + print*,'accu_s_0 = ',accu_s_0 + + print*,'Checking then the right ' + do_right = .True. + do i = 1, sze + u_0(i,1) = psi_r_coef_bi_ortho(i,1) + enddo + call H_tc_u_0_nstates_openmp(v_0_ref,u_0,N_st,sze, do_right) + s_0_ref = 0.d0 + do i = 1, sze + do j = 1, sze + call get_s2(psi_det(1,1,i),psi_det(1,1,j),N_int,sij) + s_0_ref(i,1) += u_0(j,1) * sij + enddo + enddo + call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,u_0,N_st,sze, do_right) + accu_e = 0.d0 + accu_s = 0.d0 + accu_e_0 = 0.d0 + accu_s_0 = 0.d0 + do i = 1, sze + accu_e_0 += v_0_ref(i,1) * psi_l_coef_bi_ortho(i,1) + accu_s_0 += s_0_ref(i,1) * psi_l_coef_bi_ortho(i,1) + accu_e += dabs(v_0_ref(i,1) - v_0_new(i,1)) + accu_s += dabs(s_0_ref(i,1) - s_0_new(i,1)) + enddo + print*,'accu_e = ',accu_e + print*,'accu_s = ',accu_s + print*,'accu_e_0 = ',accu_e_0 + print*,'accu_s_0 = ',accu_s_0 + + +end + +subroutine routine_test_s2_davidson + implicit none + double precision, allocatable :: H_jj(:),vec_tmp(:,:), energies(:) + integer :: i,istate + logical :: converged + external H_tc_s2_dagger_u_0_opt + external H_tc_s2_u_0_opt + allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),energies(n_states_diag)) + do i = 1, N_det + call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) + enddo + ! Preparing the left-eigenvector + print*,'Computing the left-eigenvector ' + vec_tmp = 0.d0 + do istate = 1, N_states + vec_tmp(1:N_det,istate) = psi_l_coef_bi_ortho(1:N_det,istate) + enddo + do istate = N_states+1, n_states_diag + vec_tmp(istate,istate) = 1.d0 + enddo + do istate = 1, N_states + leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) + enddo + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) + print*,'energies = ',energies + double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) + integer :: sze,N_st + logical :: do_right + sze = N_det + N_st = 1 + do_right = .False. + allocate(s_0_new(N_det,1),v_0_new(N_det,1)) + call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,vec_tmp,N_st,sze, do_right) + double precision :: accu_e_0, accu_s_0 + accu_e_0 = 0.d0 + accu_s_0 = 0.d0 + do i = 1, sze + accu_e_0 += v_0_new(i,1) * vec_tmp(i,1) + accu_s_0 += s_0_new(i,1) * vec_tmp(i,1) + enddo + print*,'accu_e_0',accu_e_0 + print*,'accu_s_0',accu_s_0 + + ! Preparing the right-eigenvector + print*,'Computing the right-eigenvector ' + vec_tmp = 0.d0 + do istate = 1, N_states + vec_tmp(1:N_det,istate) = psi_r_coef_bi_ortho(1:N_det,istate) + enddo + do istate = N_states+1, n_states_diag + vec_tmp(istate,istate) = 1.d0 + enddo + do istate = 1, N_states + leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) + enddo + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_u_0_opt) + print*,'energies = ',energies + sze = N_det + N_st = 1 + do_right = .True. + v_0_new = 0.d0 + s_0_new = 0.d0 + call H_tc_s2_u_0_nstates_openmp(v_0_new,s_0_new,vec_tmp,N_st,sze, do_right) + accu_e_0 = 0.d0 + accu_s_0 = 0.d0 + do i = 1, sze + accu_e_0 += v_0_new(i,1) * vec_tmp(i,1) + accu_s_0 += s_0_new(i,1) * vec_tmp(i,1) + enddo + print*,'accu_e_0',accu_e_0 + print*,'accu_s_0',accu_s_0 + +end From 367abb3d70a452eec981febbd1a5999f91be9bd7 Mon Sep 17 00:00:00 2001 From: eginer Date: Mon, 10 Apr 2023 19:37:54 +0200 Subject: [PATCH 14/16] S2 OK in TC --- src/tc_bi_ortho/dav_h_tc_s2.irp.f | 4 +- src/tc_bi_ortho/h_tc_s2_u0.irp.f | 30 ++++ src/tc_bi_ortho/tc_h_eigvectors.irp.f | 201 +++++++++++++++++++------- src/tc_bi_ortho/test_s2_tc.irp.f | 14 +- 4 files changed, 189 insertions(+), 60 deletions(-) diff --git a/src/tc_bi_ortho/dav_h_tc_s2.irp.f b/src/tc_bi_ortho/dav_h_tc_s2.irp.f index 02aa712b..c0ea054a 100644 --- a/src/tc_bi_ortho/dav_h_tc_s2.irp.f +++ b/src/tc_bi_ortho/dav_h_tc_s2.irp.f @@ -1,7 +1,7 @@ ! --- -subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, energies, sze, N_st, N_st_diag_in, converged, hcalc) +subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N_st_diag_in, converged, hcalc) use mmap_module @@ -30,6 +30,7 @@ subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, energies, sze, N_st, N_st_dia logical, intent(inout) :: converged double precision, intent(inout) :: u_in(sze,N_st_diag_in) double precision, intent(out) :: energies(N_st) + double precision, intent(inout) :: s2_out(N_st) external hcalc character*(16384) :: write_buffer @@ -528,6 +529,7 @@ subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, energies, sze, N_st, N_st_dia do k = 1, N_st energies(k) = lambda(k) + s2_out(k) = s2(k) enddo write_buffer = '=====' do i = 1, N_st diff --git a/src/tc_bi_ortho/h_tc_s2_u0.irp.f b/src/tc_bi_ortho/h_tc_s2_u0.irp.f index 5a9f5e69..30b0f273 100644 --- a/src/tc_bi_ortho/h_tc_s2_u0.irp.f +++ b/src/tc_bi_ortho/h_tc_s2_u0.irp.f @@ -1,3 +1,33 @@ + +subroutine get_H_tc_s2_l0_r0(l_0,r_0,N_st,sze,energies, s2) + use bitmasks + implicit none + BEGIN_DOC + ! Computes $e_0 = \langle l_0 | H | r_0\rangle$. + ! + ! Computes $s_0 = \langle l_0 | S^2 | r_0\rangle$. + ! + ! Assumes that the determinants are in psi_det + ! + ! istart, iend, ishift, istep are used in ZMQ parallelization. + END_DOC + integer, intent(in) :: N_st,sze + double precision, intent(in) :: l_0(sze,N_st), r_0(sze,N_st) + double precision, intent(out) :: energies(N_st), s2(N_st) + logical :: do_right + integer :: istate + double precision, allocatable :: s_0(:,:), v_0(:,:) + double precision :: u_dot_v, norm + allocate(s_0(sze,N_st), v_0(sze,N_st)) + do_right = .True. + call H_tc_s2_u_0_opt(v_0,s_0,r_0,N_st,sze) + do istate = 1, N_st + norm = u_dot_v(l_0(1,istate),r_0(1,istate),sze) + energies(istate) = u_dot_v(l_0(1,istate),v_0(1,istate),sze)/norm + s2(istate) = u_dot_v(l_0(1,istate),s_0(1,istate),sze)/norm + enddo +end + subroutine H_tc_s2_u_0_opt(v_0,s_0,u_0,N_st,sze) use bitmasks implicit none diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index 11a14b41..71dad8d6 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -35,6 +35,7 @@ end &BEGIN_PROVIDER [double precision, eigval_left_tc_bi_orth, (N_states)] &BEGIN_PROVIDER [double precision, reigvec_tc_bi_orth, (N_det,N_states)] &BEGIN_PROVIDER [double precision, leigvec_tc_bi_orth, (N_det,N_states)] +&BEGIN_PROVIDER [double precision, s2_eigvec_tc_bi_orth, (N_states)] &BEGIN_PROVIDER [double precision, norm_ground_left_right_bi_orth ] BEGIN_DOC @@ -46,64 +47,153 @@ end logical :: converged, dagger integer :: n_real_tc_bi_orth_eigval_right,igood_r,igood_l double precision, allocatable :: reigvec_tc_bi_orth_tmp(:,:),leigvec_tc_bi_orth_tmp(:,:),eigval_right_tmp(:) + double precision, allocatable :: s2_values_tmp(:), H_prime(:,:), expect_e(:) + double precision, parameter :: alpha = 0.1d0 + integer :: i_good_state,i_other_state, i_state + integer, allocatable :: index_good_state_array(:) + logical, allocatable :: good_state_array(:) + double precision, allocatable :: coef_hf_r(:),coef_hf_l(:) + integer, allocatable :: iorder(:) PROVIDE N_det N_int if(n_det.le.N_det_max_full)then - allocate(reigvec_tc_bi_orth_tmp(N_det,N_det),leigvec_tc_bi_orth_tmp(N_det,N_det),eigval_right_tmp(N_det)) - call non_hrmt_real_diag(N_det,htilde_matrix_elmt_bi_ortho,& + allocate(reigvec_tc_bi_orth_tmp(N_det,N_det),leigvec_tc_bi_orth_tmp(N_det,N_det),eigval_right_tmp(N_det),expect_e(N_det)) + allocate (H_prime(N_det,N_det),s2_values_tmp(N_det)) + H_prime(1:N_det,1:N_det) = htilde_matrix_elmt_bi_ortho(1:N_det,1:N_det) + if(s2_eig)then + H_prime(1:N_det,1:N_det) += alpha * S2_matrix_all_dets(1:N_det,1:N_det) + do j=1,N_det + H_prime(j,j) = H_prime(j,j) - alpha*expected_s2 + enddo + endif + call non_hrmt_real_diag(N_det,H_prime,& leigvec_tc_bi_orth_tmp,reigvec_tc_bi_orth_tmp,& n_real_tc_bi_orth_eigval_right,eigval_right_tmp) - double precision, allocatable :: coef_hf_r(:),coef_hf_l(:) - integer, allocatable :: iorder(:) - allocate(coef_hf_r(N_det),coef_hf_l(N_det),iorder(N_det)) - do i = 1,N_det - iorder(i) = i - coef_hf_r(i) = -dabs(reigvec_tc_bi_orth_tmp(index_HF_psi_det,i)) - enddo - call dsort(coef_hf_r,iorder,N_det) - igood_r = iorder(1) - print*,'igood_r, coef_hf_r = ',igood_r,coef_hf_r(1) - do i = 1,N_det - iorder(i) = i - coef_hf_l(i) = -dabs(leigvec_tc_bi_orth_tmp(index_HF_psi_det,i)) - enddo - call dsort(coef_hf_l,iorder,N_det) - igood_l = iorder(1) - print*,'igood_l, coef_hf_l = ',igood_l,coef_hf_l(1) - - if(igood_r.ne.igood_l.and.igood_r.ne.1)then - print *,'' - print *,'Warning, the left and right eigenvectors are "not the same" ' - print *,'Warning, the ground state is not dominated by HF...' - print *,'State with largest RIGHT coefficient of HF ',igood_r - print *,'coef of HF in RIGHT eigenvector = ',reigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_r) - print *,'State with largest LEFT coefficient of HF ',igood_l - print *,'coef of HF in LEFT eigenvector = ',leigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_l) - endif - if(state_following_tc)then - print *,'Following the states with the largest coef on HF' - print *,'igood_r,igood_l',igood_r,igood_l - i= igood_r - eigval_right_tc_bi_orth(1) = eigval_right_tmp(i) - do j = 1, N_det - reigvec_tc_bi_orth(j,1) = reigvec_tc_bi_orth_tmp(j,i) -! print*,reigvec_tc_bi_orth(j,1) - enddo - i= igood_l - eigval_left_tc_bi_orth(1) = eigval_right_tmp(i) - do j = 1, N_det - leigvec_tc_bi_orth(j,1) = leigvec_tc_bi_orth_tmp(j,i) - enddo - else - do i = 1, N_states - eigval_right_tc_bi_orth(i) = eigval_right_tmp(i) - eigval_left_tc_bi_orth(i) = eigval_right_tmp(i) - do j = 1, N_det - reigvec_tc_bi_orth(j,i) = reigvec_tc_bi_orth_tmp(j,i) - leigvec_tc_bi_orth(j,i) = leigvec_tc_bi_orth_tmp(j,i) +! do i = 1, N_det +! call get_H_tc_s2_l0_r0(leigvec_tc_bi_orth_tmp(1,i),reigvec_tc_bi_orth_tmp(1,i),1,N_det,expect_e(i), s2_values_tmp(i)) +! enddo + call get_H_tc_s2_l0_r0(leigvec_tc_bi_orth_tmp,reigvec_tc_bi_orth_tmp,N_det,N_det,expect_e, s2_values_tmp) + allocate(index_good_state_array(N_det),good_state_array(N_det)) + i_state = 0 + good_state_array = .False. + if(s2_eig)then + if (only_expected_s2) then + do j=1,N_det + ! Select at least n_states states with S^2 values closed to "expected_s2" +! print*,'s2_values_tmp(j) = ',s2_values_tmp(j),eigval_right_tmp(j),expect_e(j) + if(dabs(s2_values_tmp(j)-expected_s2).le.0.5d0)then + i_state +=1 + index_good_state_array(i_state) = j + good_state_array(j) = .True. + endif + if(i_state.eq.N_states) then + exit + endif enddo - enddo + else + do j=1,N_det + index_good_state_array(j) = j + good_state_array(j) = .True. + enddo + endif + if(i_state .ne.0)then + ! Fill the first "i_state" states that have a correct S^2 value + do j = 1, i_state + do i=1,N_det + reigvec_tc_bi_orth(i,j) = reigvec_tc_bi_orth_tmp(i,index_good_state_array(j)) + leigvec_tc_bi_orth(i,j) = leigvec_tc_bi_orth_tmp(i,index_good_state_array(j)) + enddo + eigval_right_tc_bi_orth(j) = expect_e(index_good_state_array(j)) + eigval_left_tc_bi_orth(j) = expect_e(index_good_state_array(j)) + s2_eigvec_tc_bi_orth(j) = s2_values_tmp(index_good_state_array(j)) + enddo + i_other_state = 0 + do j = 1, N_det + if(good_state_array(j))cycle + i_other_state +=1 + if(i_state+i_other_state.gt.n_states)then + exit + endif + do i=1,N_det + reigvec_tc_bi_orth(i,i_state+i_other_state) = reigvec_tc_bi_orth_tmp(i,j) + leigvec_tc_bi_orth(i,i_state+i_other_state) = leigvec_tc_bi_orth_tmp(i,j) + enddo + eigval_right_tc_bi_orth(i_state+i_other_state) = eigval_right_tmp(j) + eigval_left_tc_bi_orth (i_state+i_other_state) = eigval_right_tmp(j) + s2_eigvec_tc_bi_orth(i_state+i_other_state) = s2_values_tmp(i_state+i_other_state) + enddo + else ! istate == 0 + print*,'' + print*,'!!!!!!!! WARNING !!!!!!!!!' + print*,' Within the ',N_det,'determinants selected' + print*,' and the ',N_states_diag,'states requested' + print*,' We did not find only states with S^2 values close to ',expected_s2 + print*,' We will then set the first N_states eigenvectors of the H matrix' + print*,' as the CI_eigenvectors' + print*,' You should consider more states and maybe ask for s2_eig to be .True. or just enlarge the CI space' + print*,'' + do j=1,min(N_states_diag,N_det) + do i=1,N_det + leigvec_tc_bi_orth(i,j) = leigvec_tc_bi_orth_tmp(i,j) + reigvec_tc_bi_orth(i,j) = reigvec_tc_bi_orth_tmp(i,j) + enddo + eigval_right_tc_bi_orth(j) = eigval_right_tmp(j) + eigval_left_tc_bi_orth (j) = eigval_right_tmp(j) + s2_eigvec_tc_bi_orth(j) = s2_values_tmp(j) + enddo + endif ! istate .ne. 0 + + else ! s2_eig + allocate(coef_hf_r(N_det),coef_hf_l(N_det),iorder(N_det)) + do i = 1,N_det + iorder(i) = i + coef_hf_r(i) = -dabs(reigvec_tc_bi_orth_tmp(index_HF_psi_det,i)) + enddo + call dsort(coef_hf_r,iorder,N_det) + igood_r = iorder(1) + print*,'igood_r, coef_hf_r = ',igood_r,coef_hf_r(1) + do i = 1,N_det + iorder(i) = i + coef_hf_l(i) = -dabs(leigvec_tc_bi_orth_tmp(index_HF_psi_det,i)) + enddo + call dsort(coef_hf_l,iorder,N_det) + igood_l = iorder(1) + print*,'igood_l, coef_hf_l = ',igood_l,coef_hf_l(1) + + if(igood_r.ne.igood_l.and.igood_r.ne.1)then + print *,'' + print *,'Warning, the left and right eigenvectors are "not the same" ' + print *,'Warning, the ground state is not dominated by HF...' + print *,'State with largest RIGHT coefficient of HF ',igood_r + print *,'coef of HF in RIGHT eigenvector = ',reigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_r) + print *,'State with largest LEFT coefficient of HF ',igood_l + print *,'coef of HF in LEFT eigenvector = ',leigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_l) + endif + if(state_following_tc)then + print *,'Following the states with the largest coef on HF' + print *,'igood_r,igood_l',igood_r,igood_l + i= igood_r + eigval_right_tc_bi_orth(1) = eigval_right_tmp(i) + do j = 1, N_det + reigvec_tc_bi_orth(j,1) = reigvec_tc_bi_orth_tmp(j,i) +! print*,reigvec_tc_bi_orth(j,1) + enddo + i= igood_l + eigval_left_tc_bi_orth(1) = eigval_right_tmp(i) + do j = 1, N_det + leigvec_tc_bi_orth(j,1) = leigvec_tc_bi_orth_tmp(j,i) + enddo + else + do i = 1, N_states + eigval_right_tc_bi_orth(i) = eigval_right_tmp(i) + eigval_left_tc_bi_orth(i) = eigval_right_tmp(i) + do j = 1, N_det + reigvec_tc_bi_orth(j,i) = reigvec_tc_bi_orth_tmp(j,i) + leigvec_tc_bi_orth(j,i) = leigvec_tc_bi_orth_tmp(j,i) + enddo + enddo + endif endif else double precision, allocatable :: H_jj(:),vec_tmp(:,:) @@ -111,6 +201,8 @@ end external htcdag_bi_ortho_calc_tdav external H_tc_u_0_opt external H_tc_dagger_u_0_opt + external H_tc_s2_dagger_u_0_opt + external H_tc_s2_u_0_opt allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag)) do i = 1, N_det call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) @@ -125,7 +217,8 @@ end vec_tmp(istate,istate) = 1.d0 enddo ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, htcdag_bi_ortho_calc_tdav) - call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_dagger_u_0_opt) +! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_dagger_u_0_opt) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo @@ -140,7 +233,8 @@ end vec_tmp(istate,istate) = 1.d0 enddo ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) - call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt) +! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) do istate = 1, N_states reigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo @@ -154,6 +248,7 @@ end norm_ground_left_right_bi_orth += leigvec_tc_bi_orth(j,1) * reigvec_tc_bi_orth(j,1) enddo print*,'norm l/r = ',norm_ground_left_right_bi_orth + print*,' = ',s2_eigvec_tc_bi_orth(1) END_PROVIDER diff --git a/src/tc_bi_ortho/test_s2_tc.irp.f b/src/tc_bi_ortho/test_s2_tc.irp.f index a5241fe3..4229fef1 100644 --- a/src/tc_bi_ortho/test_s2_tc.irp.f +++ b/src/tc_bi_ortho/test_s2_tc.irp.f @@ -84,12 +84,12 @@ end subroutine routine_test_s2_davidson implicit none - double precision, allocatable :: H_jj(:),vec_tmp(:,:), energies(:) + double precision, allocatable :: H_jj(:),vec_tmp(:,:), energies(:) , s2(:) integer :: i,istate logical :: converged external H_tc_s2_dagger_u_0_opt external H_tc_s2_u_0_opt - allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),energies(n_states_diag)) + allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),energies(n_states_diag), s2(n_states_diag)) do i = 1, N_det call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) enddo @@ -105,8 +105,7 @@ subroutine routine_test_s2_davidson do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) - print*,'energies = ',energies + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) integer :: sze,N_st logical :: do_right @@ -122,6 +121,8 @@ subroutine routine_test_s2_davidson accu_e_0 += v_0_new(i,1) * vec_tmp(i,1) accu_s_0 += s_0_new(i,1) * vec_tmp(i,1) enddo + print*,'energies = ',energies + print*,'s2 = ',s2 print*,'accu_e_0',accu_e_0 print*,'accu_s_0',accu_s_0 @@ -137,8 +138,7 @@ subroutine routine_test_s2_davidson do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_u_0_opt) - print*,'energies = ',energies + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_u_0_opt) sze = N_det N_st = 1 do_right = .True. @@ -151,6 +151,8 @@ subroutine routine_test_s2_davidson accu_e_0 += v_0_new(i,1) * vec_tmp(i,1) accu_s_0 += s_0_new(i,1) * vec_tmp(i,1) enddo + print*,'energies = ',energies + print*,'s2 = ',s2 print*,'accu_e_0',accu_e_0 print*,'accu_s_0',accu_s_0 From 6be57e3c01f1ba71d22495fcfaed52448792163e Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 12 Apr 2023 17:10:06 +0200 Subject: [PATCH 15/16] fixed bug in S2 for TC davidson --- .../diagonalization_hs2_dressed.irp.f | 4 ++-- src/tc_bi_ortho/dav_h_tc_s2.irp.f | 16 +++++++++++---- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 20 +++++++++++++++++-- src/tc_bi_ortho/test_s2_tc.irp.f | 7 +++++-- 4 files changed, 37 insertions(+), 10 deletions(-) diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index 8117f320..ac71d1d4 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -465,8 +465,8 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ integer :: lwork, info double precision, allocatable :: work(:) -! y = h - y = h_p + y = h +! y = h_p lwork = -1 allocate(work(1)) call dsygv(1,'V','U',shift2,y,size(y,1), & diff --git a/src/tc_bi_ortho/dav_h_tc_s2.irp.f b/src/tc_bi_ortho/dav_h_tc_s2.irp.f index c0ea054a..ea9cacff 100644 --- a/src/tc_bi_ortho/dav_h_tc_s2.irp.f +++ b/src/tc_bi_ortho/dav_h_tc_s2.irp.f @@ -1,7 +1,7 @@ ! --- -subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N_st_diag_in, converged, hcalc) +subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N_st_diag_in, n_it_max_dav, converged, hcalc) use mmap_module @@ -21,11 +21,17 @@ subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N ! Initial guess vectors are not necessarily orthonormal ! ! hcalc subroutine to compute W = H U (see routine hcalc_template for template of input/output) + ! + ! !!! WARNING !!! IT SEEMS THAT IF THE NUMBER OF MACRO ITERATIONS EXCEEDS n_it_max_dav, + ! + ! THE RECONTRACTION IS WRONG. YOU SHOULD CONSIDER CALLING MULTIPLE TIME THE ROUTINE + ! + ! SEE FOR INSTANCE IN tc_bi_ortho/tc_h_eigvectors.irp.f END_DOC implicit none - integer, intent(in) :: sze, N_st, N_st_diag_in + integer, intent(in) :: sze, N_st, N_st_diag_in, n_it_max_dav double precision, intent(in) :: H_jj(sze) logical, intent(inout) :: converged double precision, intent(inout) :: u_in(sze,N_st_diag_in) @@ -246,7 +252,9 @@ subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N itertot = 0 - do while (.not.converged) +! do while (.not.converged.or.itertot.le.n_it_max_dav) + integer :: iiii + do iiii = 1, n_it_max_dav itertot = itertot + 1 if(itertot == 8) then @@ -522,7 +530,7 @@ subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N enddo endif enddo - + if(converged)exit enddo ! loop over while ! --- diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index 71dad8d6..91775cf1 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -208,7 +208,11 @@ end call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) enddo !!!! Preparing the left-eigenvector + print*,'---------------------------------' + print*,'---------------------------------' print*,'Computing the left-eigenvector ' + print*,'---------------------------------' + print*,'---------------------------------' vec_tmp = 0.d0 do istate = 1, N_states vec_tmp(1:N_det,istate) = psi_l_coef_bi_ortho(1:N_det,istate) @@ -218,12 +222,21 @@ end enddo ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, htcdag_bi_ortho_calc_tdav) ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_dagger_u_0_opt) - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) + integer :: n_it_max + n_it_max = 1 + converged = .False. + do while (.not.converged) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + enddo do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo + print*,'---------------------------------' + print*,'---------------------------------' print*,'Computing the right-eigenvector ' + print*,'---------------------------------' + print*,'---------------------------------' !!!! Preparing the right-eigenvector vec_tmp = 0.d0 do istate = 1, N_states @@ -234,7 +247,10 @@ end enddo ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt) - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) + converged = .False. + do while (.not.converged) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + enddo do istate = 1, N_states reigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo diff --git a/src/tc_bi_ortho/test_s2_tc.irp.f b/src/tc_bi_ortho/test_s2_tc.irp.f index 4229fef1..4debe2e2 100644 --- a/src/tc_bi_ortho/test_s2_tc.irp.f +++ b/src/tc_bi_ortho/test_s2_tc.irp.f @@ -105,7 +105,9 @@ subroutine routine_test_s2_davidson do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) + integer :: n_it_max + n_it_max = 1 + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) integer :: sze,N_st logical :: do_right @@ -138,7 +140,8 @@ subroutine routine_test_s2_davidson do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_u_0_opt) + n_it_max = 1 + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_opt) sze = N_det N_st = 1 do_right = .True. From 5cfff229a17859c97aeb15363bad58946852cdf7 Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 12 Apr 2023 17:10:06 +0200 Subject: [PATCH 16/16] fixed bug in S2 for TC davidson --- .../diagonalization_hs2_dressed.irp.f | 4 ++-- src/tc_bi_ortho/dav_h_tc_s2.irp.f | 16 +++++++++++---- src/tc_bi_ortho/tc_h_eigvectors.irp.f | 20 +++++++++++++++++-- src/tc_bi_ortho/test_s2_tc.irp.f | 7 +++++-- 4 files changed, 37 insertions(+), 10 deletions(-) diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f index 8117f320..ac71d1d4 100644 --- a/src/davidson/diagonalization_hs2_dressed.irp.f +++ b/src/davidson/diagonalization_hs2_dressed.irp.f @@ -465,8 +465,8 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_ integer :: lwork, info double precision, allocatable :: work(:) -! y = h - y = h_p + y = h +! y = h_p lwork = -1 allocate(work(1)) call dsygv(1,'V','U',shift2,y,size(y,1), & diff --git a/src/tc_bi_ortho/dav_h_tc_s2.irp.f b/src/tc_bi_ortho/dav_h_tc_s2.irp.f index c0ea054a..ea9cacff 100644 --- a/src/tc_bi_ortho/dav_h_tc_s2.irp.f +++ b/src/tc_bi_ortho/dav_h_tc_s2.irp.f @@ -1,7 +1,7 @@ ! --- -subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N_st_diag_in, converged, hcalc) +subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N_st_diag_in, n_it_max_dav, converged, hcalc) use mmap_module @@ -21,11 +21,17 @@ subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N ! Initial guess vectors are not necessarily orthonormal ! ! hcalc subroutine to compute W = H U (see routine hcalc_template for template of input/output) + ! + ! !!! WARNING !!! IT SEEMS THAT IF THE NUMBER OF MACRO ITERATIONS EXCEEDS n_it_max_dav, + ! + ! THE RECONTRACTION IS WRONG. YOU SHOULD CONSIDER CALLING MULTIPLE TIME THE ROUTINE + ! + ! SEE FOR INSTANCE IN tc_bi_ortho/tc_h_eigvectors.irp.f END_DOC implicit none - integer, intent(in) :: sze, N_st, N_st_diag_in + integer, intent(in) :: sze, N_st, N_st_diag_in, n_it_max_dav double precision, intent(in) :: H_jj(sze) logical, intent(inout) :: converged double precision, intent(inout) :: u_in(sze,N_st_diag_in) @@ -246,7 +252,9 @@ subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N itertot = 0 - do while (.not.converged) +! do while (.not.converged.or.itertot.le.n_it_max_dav) + integer :: iiii + do iiii = 1, n_it_max_dav itertot = itertot + 1 if(itertot == 8) then @@ -522,7 +530,7 @@ subroutine davidson_hs2_nonsym_b1space(u_in, H_jj, s2_out,energies, sze, N_st, N enddo endif enddo - + if(converged)exit enddo ! loop over while ! --- diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f index 71dad8d6..91775cf1 100644 --- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f +++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f @@ -208,7 +208,11 @@ end call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i)) enddo !!!! Preparing the left-eigenvector + print*,'---------------------------------' + print*,'---------------------------------' print*,'Computing the left-eigenvector ' + print*,'---------------------------------' + print*,'---------------------------------' vec_tmp = 0.d0 do istate = 1, N_states vec_tmp(1:N_det,istate) = psi_l_coef_bi_ortho(1:N_det,istate) @@ -218,12 +222,21 @@ end enddo ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, htcdag_bi_ortho_calc_tdav) ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_dagger_u_0_opt) - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) + integer :: n_it_max + n_it_max = 1 + converged = .False. + do while (.not.converged) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + enddo do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo + print*,'---------------------------------' + print*,'---------------------------------' print*,'Computing the right-eigenvector ' + print*,'---------------------------------' + print*,'---------------------------------' !!!! Preparing the right-eigenvector vec_tmp = 0.d0 do istate = 1, N_states @@ -234,7 +247,10 @@ end enddo ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav) ! call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt) - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) + converged = .False. + do while (.not.converged) + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) + enddo do istate = 1, N_states reigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo diff --git a/src/tc_bi_ortho/test_s2_tc.irp.f b/src/tc_bi_ortho/test_s2_tc.irp.f index 4229fef1..4debe2e2 100644 --- a/src/tc_bi_ortho/test_s2_tc.irp.f +++ b/src/tc_bi_ortho/test_s2_tc.irp.f @@ -105,7 +105,9 @@ subroutine routine_test_s2_davidson do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_dagger_u_0_opt) + integer :: n_it_max + n_it_max = 1 + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt) double precision, allocatable :: v_0_new(:,:),s_0_new(:,:) integer :: sze,N_st logical :: do_right @@ -138,7 +140,8 @@ subroutine routine_test_s2_davidson do istate = 1, N_states leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate) enddo - call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, converged, H_tc_s2_u_0_opt) + n_it_max = 1 + call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2, energies, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_opt) sze = N_det N_st = 1 do_right = .True.