From b9c18338960064f01bdd3e7bf6427ab3510575c9 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Fri, 2 Jun 2023 00:33:37 +0200 Subject: [PATCH] Optimized three_e_5_idx_exch12_bi_ort --- src/bi_ort_ints/bi_ort_ints.irp.f | 12 +- src/bi_ort_ints/three_body_ijmkl.irp.f | 162 ++++++++++++++++++++----- src/utils/integration.irp.f | 24 ++++ 3 files changed, 166 insertions(+), 32 deletions(-) diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f index ca50dd56..63b2aa8c 100644 --- a/src/bi_ort_ints/bi_ort_ints.irp.f +++ b/src/bi_ort_ints/bi_ort_ints.irp.f @@ -16,23 +16,27 @@ subroutine test_3e double precision :: accu, contrib,new,ref i = 1 k = 1 + n = 0 accu = 0.d0 do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num do l = 1, mo_num do m = 1, mo_num - do n = 1, mo_num - call give_integrals_3_body_bi_ort(n, l, k, m, j, i, new) - call give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, ref) + new = three_e_5_idx_exch12_bi_ort(m,l,j,k,i) + ref = three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) +! do n = 1, mo_num +! call give_integrals_3_body_bi_ort(n, l, k, m, j, i, new) +! call give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, ref) contrib = dabs(new - ref) accu += contrib if(contrib .gt. 1.d-10)then print*,'pb !!' print*,i,k,j,l,m,n print*,ref,new,contrib + stop endif - enddo +! enddo enddo enddo enddo diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f index ae4c9bd5..af2cb353 100644 --- a/src/bi_ort_ints/three_body_ijmkl.irp.f +++ b/src/bi_ort_ints/three_body_ijmkl.irp.f @@ -24,7 +24,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_direct_bi_ort) !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num @@ -33,7 +33,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, do l = 1, mo_num do m = 1, mo_num call give_integrals_3_body_bi_ort(m, l, k, m, j, i, integral) - three_e_5_idx_direct_bi_ort(m,l,j,k,i) = -1.d0 * integral + three_e_5_idx_direct_bi_ort(m,l,j,k,i) = -1.d0 * integral enddo enddo enddo @@ -45,7 +45,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_5_idx_direct_bi_ort', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- @@ -73,7 +73,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_cycle_1_bi_ort) !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num @@ -82,7 +82,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num do l = 1, mo_num do m = 1, mo_num call give_integrals_3_body_bi_ort(m, l, k, j, i, m, integral) - three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = -1.d0 * integral + three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = -1.d0 * integral enddo enddo enddo @@ -94,7 +94,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num call wall_time(wall1) print *, ' wall time for three_e_5_idx_cycle_1_bi_ort', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- @@ -122,7 +122,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_cycle_2_bi_ort) !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num @@ -131,7 +131,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num do m = 1, mo_num do l = 1, mo_num call give_integrals_3_body_bi_ort(m, l, k, i, m, j, integral) - three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = -1.d0 * integral + three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = -1.d0 * integral enddo enddo enddo @@ -143,7 +143,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num call wall_time(wall1) print *, ' wall time for three_e_5_idx_cycle_2_bi_ort', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- @@ -171,7 +171,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort, (mo_num, mo_num, !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_exch23_bi_ort) !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num @@ -180,7 +180,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort, (mo_num, mo_num, do l = 1, mo_num do m = 1, mo_num call give_integrals_3_body_bi_ort(m, l, k, j, m, i, integral) - three_e_5_idx_exch23_bi_ort(m,l,j,k,i) = -1.d0 * integral + three_e_5_idx_exch23_bi_ort(m,l,j,k,i) = -1.d0 * integral enddo enddo enddo @@ -192,7 +192,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_5_idx_exch23_bi_ort', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- @@ -220,7 +220,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort, (mo_num, mo_num, !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP PRIVATE (i,j,k,m,l,integral) & !$OMP SHARED (mo_num,three_e_5_idx_exch13_bi_ort) !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) do i = 1, mo_num @@ -229,7 +229,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort, (mo_num, mo_num, do l = 1, mo_num do m = 1, mo_num call give_integrals_3_body_bi_ort(m, l, k, i, j, m, integral) - three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = -1.d0 * integral + three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = -1.d0 * integral enddo enddo enddo @@ -241,7 +241,57 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort, (mo_num, mo_num, call wall_time(wall1) print *, ' wall time for three_e_5_idx_exch13_bi_ort', wall1 - wall0 -END_PROVIDER +END_PROVIDER + +! --- + +BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort_old, (mo_num, mo_num, mo_num, mo_num, mo_num)] + + BEGIN_DOC + ! + ! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs + ! + ! three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) = ::: notice that i is the RIGHT MO and k is the LEFT MO + ! + ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign + ! + END_DOC + + implicit none + integer :: i, j, k, m, l + double precision :: integral, wall1, wall0 + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t + + three_e_5_idx_exch12_bi_ort_old = 0.d0 + print *, ' Providing the three_e_5_idx_exch12_bi_ort_old ...' + call wall_time(wall0) + + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (i,j,k,m,l,integral) & + !$OMP SHARED (mo_num,three_e_5_idx_exch12_bi_ort_old) + !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + do m = 1, mo_num + call give_integrals_3_body_bi_ort(m, l, k, m, i, j, integral) + three_e_5_idx_exch12_bi_ort_old(m,l,j,k,i) = -1.d0 * integral + enddo + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call wall_time(wall1) + print *, ' wall time for three_e_5_idx_exch12_bi_ort_old', wall1 - wall0 + +END_PROVIDER ! --- @@ -259,38 +309,94 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, implicit none integer :: i, j, k, m, l - double precision :: integral, wall1, wall0 + double precision :: wall1, wall0 + integer :: ipoint + double precision :: weight + double precision, allocatable :: grad_mli(:,:,:), m2grad_r(:,:,:,:), m2grad_l(:,:,:,:) + double precision, allocatable :: tmp_mat(:,:,:,:), orb_mat(:,:,:) + allocate(grad_mli(n_points_final_grid,mo_num,mo_num)) + allocate(m2grad_r(n_points_final_grid,3,mo_num,mo_num)) + allocate(m2grad_l(n_points_final_grid,3,mo_num,mo_num)) + allocate(tmp_mat(mo_num,mo_num,mo_num,mo_num)) + allocate(orb_mat(n_points_final_grid,mo_num,mo_num)) + + provide mos_r_in_r_array_transp mos_l_in_r_array_transp + PROVIDE mo_l_coef mo_r_coef int2_grad1_u12_bimo_t - three_e_5_idx_exch12_bi_ort = 0.d0 print *, ' Providing the three_e_5_idx_exch12_bi_ort ...' call wall_time(wall0) - provide mos_r_in_r_array_transp mos_l_in_r_array_transp + do m = 1, mo_num !$OMP PARALLEL & !$OMP DEFAULT (NONE) & - !$OMP PRIVATE (i,j,k,m,l,integral) & - !$OMP SHARED (mo_num,three_e_5_idx_exch12_bi_ort) - !$OMP DO SCHEDULE (dynamic) COLLAPSE(2) + !$OMP PRIVATE (i,l,ipoint) & + !$OMP SHARED (m,mo_num,n_points_final_grid, & + !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & + !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & + !$OMP m2grad_r, m2grad_l, grad_mli, tmp_mat, orb_mat) + !$OMP DO COLLAPSE(2) + do i=1,mo_num + do l=1,mo_num + do ipoint=1, n_points_final_grid + grad_mli(ipoint,l,i) = final_weight_at_r_vector(ipoint) * ( & + int2_grad1_u12_bimo_t(ipoint,1,m,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + & + int2_grad1_u12_bimo_t(ipoint,2,m,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + & + int2_grad1_u12_bimo_t(ipoint,3,m,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) ) + m2grad_l(ipoint,1,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * final_weight_at_r_vector(ipoint) + m2grad_l(ipoint,2,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * final_weight_at_r_vector(ipoint) + m2grad_l(ipoint,3,l,i) = mos_l_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * final_weight_at_r_vector(ipoint) + m2grad_r(ipoint,1,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,1,l,i) + m2grad_r(ipoint,2,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,2,l,i) + m2grad_r(ipoint,3,l,i) = mos_r_in_r_array_transp(ipoint,m) * int2_grad1_u12_bimo_t(ipoint,3,l,i) + orb_mat(ipoint,l,i) = mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i) + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, n_points_final_grid, 1.d0, & + orb_mat, n_points_final_grid, & + grad_mli, n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,l) do i = 1, mo_num do k = 1, mo_num do j = 1, mo_num do l = 1, mo_num - do m = 1, mo_num - call give_integrals_3_body_bi_ort(m, l, k, m, i, j, integral) - three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = -1.d0 * integral - enddo + three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = - tmp_mat(l,i,k,j) - tmp_mat(k,j,l,i) enddo enddo enddo enddo - !$OMP END DO - !$OMP END PARALLEL + !$OMP END PARALLEL DO + + call dgemm('T','N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0, & + m2grad_l, 3*n_points_final_grid, & + m2grad_r, 3*n_points_final_grid, 0.d0, & + tmp_mat, mo_num*mo_num) + + !$OMP PARALLEL DO PRIVATE(i,j,k,l) + do i = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do l = 1, mo_num + three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = & + three_e_5_idx_exch12_bi_ort(m,l,j,k,i) - tmp_mat(l,i,k,j) + enddo + enddo + enddo + enddo + !$OMP END PARALLEL DO + + enddo call wall_time(wall1) print *, ' wall time for three_e_5_idx_exch12_bi_ort', wall1 - wall0 -END_PROVIDER +END_PROVIDER ! --- diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f index 21179dac..b548b18a 100644 --- a/src/utils/integration.irp.f +++ b/src/utils/integration.irp.f @@ -484,6 +484,30 @@ subroutine multiply_poly(b,nb,c,nc,d,nd) integer :: ib, ic, id, k if(ior(nc,nb) < 0) return !False if nc>=0 and nb>=0 + select case (nb) + case (0) + call multiply_poly_b0(b,c,nc,d,nd) + return + case (1) + call multiply_poly_b1(b,c,nc,d,nd) + return + case (2) + call multiply_poly_b2(b,c,nc,d,nd) + return + end select + + select case (nc) + case (0) + call multiply_poly_c0(b,nb,c,d,nd) + return + case (1) + call multiply_poly_c1(b,nb,c,d,nd) + return + case (2) + call multiply_poly_c2(b,nb,c,d,nd) + return + end select + do ib=0,nb do ic = 0,nc d(ib+ic) = d(ib+ic) + c(ic) * b(ib)