10
0
mirror of https://github.com/QuantumPackage/qp2.git synced 2025-01-05 10:59:45 +01:00

4-idx tensors: DGEMM with tmp(N3) added

This commit is contained in:
Abdallah Ammar 2023-06-09 21:32:13 +02:00
parent 87f7ec0456
commit 22e1dcd1c4
3 changed files with 771 additions and 469 deletions

View File

@ -18,10 +18,11 @@ program bi_ort_ints
! call test_3e ! call test_3e
! call test_5idx ! call test_5idx
! call test_5idx2 ! call test_5idx2
!call test_4idx call test_4idx()
call test_4idx2() call test_4idx_n4()
call test_5idx2 !call test_4idx2()
call test_5idx !call test_5idx2
!call test_5idx
end end
subroutine test_5idx2 subroutine test_5idx2
@ -167,13 +168,138 @@ end
! --- ! ---
subroutine test_4idx_n4()
implicit none
integer :: i, j, k, l
double precision :: accu, contrib, new, ref, thr
thr = 1d-10
PROVIDE three_e_4_idx_direct_bi_ort_old
PROVIDE three_e_4_idx_direct_bi_ort_n4
accu = 0.d0
do i = 1, mo_num
do j = 1, mo_num
do k = 1, mo_num
do l = 1, mo_num
new = three_e_4_idx_direct_bi_ort_n4 (l,k,j,i)
ref = three_e_4_idx_direct_bi_ort_old(l,k,j,i)
contrib = dabs(new - ref)
accu += contrib
if(contrib .gt. thr) then
print*, ' problem in three_e_4_idx_direct_bi_ort_n4'
print*, l, k, j, i
print*, ref, new, contrib
stop
endif
enddo
enddo
enddo
enddo
print*, ' accu on three_e_4_idx_direct_bi_ort_n4 = ', accu / dble(mo_num)**4
! ---
PROVIDE three_e_4_idx_exch13_bi_ort_old
PROVIDE three_e_4_idx_exch13_bi_ort_n4
accu = 0.d0
do i = 1, mo_num
do j = 1, mo_num
do k = 1, mo_num
do l = 1, mo_num
new = three_e_4_idx_exch13_bi_ort_n4 (l,k,j,i)
ref = three_e_4_idx_exch13_bi_ort_old(l,k,j,i)
contrib = dabs(new - ref)
accu += contrib
if(contrib .gt. thr) then
print*, ' problem in three_e_4_idx_exch13_bi_ort_n4'
print*, l, k, j, i
print*, ref, new, contrib
stop
endif
enddo
enddo
enddo
enddo
print*, ' accu on three_e_4_idx_exch13_bi_ort_n4 = ', accu / dble(mo_num)**4
! ---
PROVIDE three_e_4_idx_cycle_1_bi_ort_old
PROVIDE three_e_4_idx_cycle_1_bi_ort_n4
accu = 0.d0
do i = 1, mo_num
do j = 1, mo_num
do k = 1, mo_num
do l = 1, mo_num
new = three_e_4_idx_cycle_1_bi_ort_n4 (l,k,j,i)
ref = three_e_4_idx_cycle_1_bi_ort_old(l,k,j,i)
contrib = dabs(new - ref)
accu += contrib
if(contrib .gt. thr) then
print*, ' problem in three_e_4_idx_cycle_1_bi_ort_n4'
print*, l, k, j, i
print*, ref, new, contrib
stop
endif
enddo
enddo
enddo
enddo
print*, ' accu on three_e_4_idx_cycle_1_bi_ort_n4 = ', accu / dble(mo_num)**4
! ---
PROVIDE three_e_4_idx_exch23_bi_ort_old
PROVIDE three_e_4_idx_exch23_bi_ort_n4
accu = 0.d0
do i = 1, mo_num
do j = 1, mo_num
do k = 1, mo_num
do l = 1, mo_num
new = three_e_4_idx_exch23_bi_ort_n4 (l,k,j,i)
ref = three_e_4_idx_exch23_bi_ort_old(l,k,j,i)
contrib = dabs(new - ref)
accu += contrib
if(contrib .gt. thr) then
print*, ' problem in three_e_4_idx_exch23_bi_ort_n4'
print*, l, k, j, i
print*, ref, new, contrib
stop
endif
enddo
enddo
enddo
enddo
print*, ' accu on three_e_4_idx_exch23_bi_ort_n4 = ', accu / dble(mo_num)**4
! ---
return
end
! ---
subroutine test_4idx() subroutine test_4idx()
implicit none implicit none
integer :: i, j, k, l integer :: i, j, k, l
double precision :: accu, contrib, new, ref, thr double precision :: accu, contrib, new, ref, thr
thr = 1d-5 thr = 1d-10
PROVIDE three_e_4_idx_direct_bi_ort_old PROVIDE three_e_4_idx_direct_bi_ort_old
PROVIDE three_e_4_idx_direct_bi_ort PROVIDE three_e_4_idx_direct_bi_ort
@ -231,34 +357,6 @@ subroutine test_4idx()
! --- ! ---
! PROVIDE three_e_4_idx_exch12_bi_ort_old
! PROVIDE three_e_4_idx_exch12_bi_ort
!
! accu = 0.d0
! do i = 1, mo_num
! do j = 1, mo_num
! do k = 1, mo_num
! do l = 1, mo_num
!
! new = three_e_4_idx_exch12_bi_ort (l,k,j,i)
! ref = three_e_4_idx_exch12_bi_ort_old(l,k,j,i)
! contrib = dabs(new - ref)
! accu += contrib
! if(contrib .gt. thr) then
! print*, ' problem in three_e_4_idx_exch12_bi_ort'
! print*, l, k, j, i
! print*, ref, new, contrib
! stop
! endif
!
! enddo
! enddo
! enddo
! enddo
! print*, ' accu on three_e_4_idx_exch12_bi_ort = ', accu / dble(mo_num)**4
! ---
PROVIDE three_e_4_idx_cycle_1_bi_ort_old PROVIDE three_e_4_idx_cycle_1_bi_ort_old
PROVIDE three_e_4_idx_cycle_1_bi_ort PROVIDE three_e_4_idx_cycle_1_bi_ort
@ -287,34 +385,6 @@ subroutine test_4idx()
! --- ! ---
! PROVIDE three_e_4_idx_cycle_2_bi_ort_old
! PROVIDE three_e_4_idx_cycle_2_bi_ort
!
! accu = 0.d0
! do i = 1, mo_num
! do j = 1, mo_num
! do k = 1, mo_num
! do l = 1, mo_num
!
! new = three_e_4_idx_cycle_2_bi_ort (l,k,j,i)
! ref = three_e_4_idx_cycle_2_bi_ort_old(l,k,j,i)
! contrib = dabs(new - ref)
! accu += contrib
! if(contrib .gt. thr) then
! print*, ' problem in three_e_4_idx_cycle_2_bi_ort'
! print*, l, k, j, i
! print*, ref, new, contrib
! stop
! endif
!
! enddo
! enddo
! enddo
! enddo
! print*, ' accu on three_e_4_idx_cycle_2_bi_ort = ', accu / dble(mo_num)**4
! ---
PROVIDE three_e_4_idx_exch23_bi_ort_old PROVIDE three_e_4_idx_exch23_bi_ort_old
PROVIDE three_e_4_idx_exch23_bi_ort PROVIDE three_e_4_idx_exch23_bi_ort

View File

@ -3,9 +3,8 @@
BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort , (mo_num, mo_num, mo_num, mo_num)] BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort , (mo_num, mo_num, mo_num, mo_num)]
&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort , (mo_num, mo_num, mo_num, mo_num)] &BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort , (mo_num, mo_num, mo_num, mo_num)]
&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort , (mo_num, mo_num, mo_num, mo_num)]
&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num)] &BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num)]
!&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort , (mo_num, mo_num, mo_num, mo_num)]
!&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num)]
BEGIN_DOC BEGIN_DOC
! !
@ -13,28 +12,25 @@
! !
! three_e_4_idx_direct_bi_ort (m,j,k,i) = < m j k | -L | m j i > ::: notice that i is the RIGHT MO and k is the LEFT MO ! three_e_4_idx_direct_bi_ort (m,j,k,i) = < m j k | -L | m j i > ::: notice that i is the RIGHT MO and k is the LEFT MO
! three_e_4_idx_exch13_bi_ort (m,j,k,i) = < m j k | -L | i j m > ::: notice that i is the RIGHT MO and k is the LEFT MO ! three_e_4_idx_exch13_bi_ort (m,j,k,i) = < m j k | -L | i j m > ::: notice that i is the RIGHT MO and k is the LEFT MO
! three_e_4_idx_exch12_bi_ort (m,j,k,i) = < m j k | -L | m i j > ::: notice that i is the RIGHT MO and k is the LEFT MO ! three_e_4_idx_exch23_bi_ort (m,j,k,i) = < m j k | -L | j m i > ::: notice that i is the RIGHT MO and k is the LEFT MO
! = three_e_4_idx_exch13_bi_ort (j,m,k,i)
! three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = < m j k | -L | j i m > ::: notice that i is the RIGHT MO and k is the LEFT MO ! three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = < m j k | -L | j i m > ::: notice that i is the RIGHT MO and k is the LEFT MO
! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = < m j k | -L | i m j > ::: notice that i is the RIGHT MO and k is the LEFT MO
! = three_e_4_idx_cycle_1_bi_ort(j,m,k,i)
! !
! notice the -1 sign: in this way three_e_4_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign ! notice the -1 sign: in this way three_e_4_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
! !
! three_e_4_idx_direct_bi_ort (m,j,k,i) : Lk Ri Imm Ijj + Lj Rj Imm Iki + Lm Rm Ijj Iki ! three_e_4_idx_direct_bi_ort (m,j,k,i) : Lk Ri Imm Ijj + Lj Rj Imm Iki + Lm Rm Ijj Iki
! three_e_4_idx_exch13_bi_ort (m,j,k,i) : Lk Rm Imi Ijj + Lj Rj Imi Ikm + Lm Ri Ijj Ikm ! three_e_4_idx_exch13_bi_ort (m,j,k,i) : Lk Rm Imi Ijj + Lj Rj Imi Ikm + Lm Ri Ijj Ikm
! three_e_4_idx_exch23_bi_ort (m,j,k,i) : Lk Ri Imj Ijm + Lj Rm Imj Iki + Lm Rj Ijm Iki
! three_e_4_idx_cycle_1_bi_ort(m,j,k,i) : Lk Rm Imj Iji + Lj Ri Imj Ikm + Lm Rj Iji Ikm ! three_e_4_idx_cycle_1_bi_ort(m,j,k,i) : Lk Rm Imj Iji + Lj Ri Imj Ikm + Lm Rj Iji Ikm
! !
END_DOC END_DOC
implicit none implicit none
integer :: ipoint, i, j, k, l, m integer :: ipoint, i, j, k, m, n
double precision :: wall1, wall0 double precision :: wall1, wall0
double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:,:,:), tmp3(:,:,:,:) double precision :: tmp_loc_1, tmp_loc_2
double precision, allocatable :: tmp_4d(:,:,:,:) double precision, allocatable :: tmp1(:,:,:), tmp2(:,:,:)
double precision, allocatable :: tmp4(:,:,:) double precision, allocatable :: tmp_2d(:,:)
double precision, allocatable :: tmp5(:,:) double precision, allocatable :: tmp_aux_1(:,:,:), tmp_aux_2(:,:)
double precision, allocatable :: tmp_3d(:,:,:)
print *, ' Providing the three_e_4_idx_bi_ort ...' print *, ' Providing the three_e_4_idx_bi_ort ...'
call wall_time(wall0) call wall_time(wall0)
@ -42,230 +38,74 @@
provide mos_r_in_r_array_transp mos_l_in_r_array_transp provide mos_r_in_r_array_transp mos_l_in_r_array_transp
allocate(tmp_4d(mo_num,mo_num,mo_num,mo_num)) ! to reduce the number of operations
allocate(tmp_aux_1(n_points_final_grid,4,mo_num))
allocate(tmp1(n_points_final_grid,3,mo_num,mo_num)) allocate(tmp_aux_2(n_points_final_grid,mo_num))
allocate(tmp2(n_points_final_grid,3,mo_num,mo_num))
allocate(tmp3(n_points_final_grid,3,mo_num,mo_num))
!$OMP PARALLEL & !$OMP PARALLEL &
!$OMP DEFAULT (NONE) & !$OMP DEFAULT (NONE) &
!$OMP PRIVATE (i, l, ipoint) & !$OMP PRIVATE (n, ipoint) &
!$OMP SHARED (mo_num, n_points_final_grid, & !$OMP SHARED (mo_num, n_points_final_grid, &
!$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
!$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, &
!$OMP tmp1, tmp2, tmp3) !$OMP tmp_aux_1, tmp_aux_2)
!$OMP DO COLLAPSE(2)
do i = 1, mo_num
do l = 1, mo_num
do ipoint = 1, n_points_final_grid
tmp1(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint)
tmp1(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint)
tmp1(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint)
tmp2(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_r_in_r_array_transp(ipoint,i)
tmp2(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_r_in_r_array_transp(ipoint,i)
tmp2(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_r_in_r_array_transp(ipoint,i)
tmp3(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_r_in_r_array_transp(ipoint,l)
tmp3(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_r_in_r_array_transp(ipoint,l)
tmp3(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_r_in_r_array_transp(ipoint,l)
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 &
, tmp1, 3*n_points_final_grid, tmp2, 3*n_points_final_grid &
, 0.d0, tmp_4d, mo_num*mo_num)
!$OMP PARALLEL DO PRIVATE(i,j,k,m)
do i = 1, mo_num
do k = 1, mo_num
do j = 1, mo_num
do m = 1, mo_num
three_e_4_idx_direct_bi_ort(m,j,k,i) = -tmp_4d(m,k,j,i)
enddo
enddo
enddo
enddo
!$OMP END PARALLEL DO
call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 &
, tmp3, 3*n_points_final_grid, tmp1, 3*n_points_final_grid &
, 0.d0, tmp_4d, mo_num*mo_num)
deallocate(tmp1)
!$OMP PARALLEL DO PRIVATE(i,j,k,m)
do i = 1, mo_num
do k = 1, mo_num
do j = 1, mo_num
do m = 1, mo_num
three_e_4_idx_exch13_bi_ort(m,j,k,i) = -tmp_4d(m,i,j,k)
enddo
enddo
enddo
enddo
!$OMP END PARALLEL DO
!$OMP PARALLEL &
!$OMP DEFAULT (NONE) &
!$OMP PRIVATE (i, l, ipoint) &
!$OMP SHARED (mo_num, n_points_final_grid, &
!$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
!$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, &
!$OMP tmp1)
!$OMP DO COLLAPSE(2)
do i = 1, mo_num
do l = 1, mo_num
do ipoint = 1, n_points_final_grid
tmp1(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint)
tmp1(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint)
tmp1(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint)
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 &
, tmp1, 3*n_points_final_grid, tmp2, 3*n_points_final_grid &
, 0.d0, tmp_4d, mo_num*mo_num)
deallocate(tmp2)
!$OMP PARALLEL DO PRIVATE(i,j,k,m)
do i = 1, mo_num
do k = 1, mo_num
do j = 1, mo_num
do m = 1, mo_num
three_e_4_idx_exch13_bi_ort(m,j,k,i) = three_e_4_idx_exch13_bi_ort(m,j,k,i) - tmp_4d(m,k,j,i)
enddo
enddo
enddo
enddo
!$OMP END PARALLEL DO
call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 &
, tmp1, 3*n_points_final_grid, tmp3, 3*n_points_final_grid &
, 0.d0, tmp_4d, mo_num*mo_num)
deallocate(tmp3)
!$OMP PARALLEL DO PRIVATE(i,j,k,m)
do i = 1, mo_num
do k = 1, mo_num
do j = 1, mo_num
do m = 1, mo_num
three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = -tmp_4d(m,k,j,i)
enddo
enddo
enddo
enddo
!$OMP END PARALLEL DO
!$OMP PARALLEL &
!$OMP DEFAULT (NONE) &
!$OMP PRIVATE (i, l, ipoint) &
!$OMP SHARED (mo_num, n_points_final_grid, &
!$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
!$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, &
!$OMP tmp1)
!$OMP DO COLLAPSE(2)
do i = 1, mo_num
do l = 1, mo_num
do ipoint = 1, n_points_final_grid
tmp1(ipoint,1,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i)
tmp1(ipoint,2,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i)
tmp1(ipoint,3,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i)
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 &
, tmp1, 3*n_points_final_grid, int2_grad1_u12_bimo_t, 3*n_points_final_grid &
, 0.d0, tmp_4d, mo_num*mo_num)
deallocate(tmp1)
!$OMP PARALLEL DO PRIVATE(i,j,k,m)
do i = 1, mo_num
do k = 1, mo_num
do j = 1, mo_num
do m = 1, mo_num
three_e_4_idx_direct_bi_ort(m,j,k,i) = three_e_4_idx_direct_bi_ort(m,j,k,i) - tmp_4d(m,j,k,i) - tmp_4d(j,m,k,i)
enddo
enddo
enddo
enddo
!$OMP END PARALLEL DO
deallocate(tmp_4d)
allocate(tmp_3d(mo_num,mo_num,mo_num))
allocate(tmp5(n_points_final_grid,mo_num))
!$OMP PARALLEL &
!$OMP DEFAULT (NONE) &
!$OMP PRIVATE (i, ipoint) &
!$OMP SHARED (mo_num, n_points_final_grid, &
!$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
!$OMP final_weight_at_r_vector, &
!$OMP tmp5)
!$OMP DO !$OMP DO
do i = 1, mo_num do n = 1, mo_num
do ipoint = 1, n_points_final_grid do ipoint = 1, n_points_final_grid
tmp5(ipoint,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i)
tmp_aux_1(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,n,n) * final_weight_at_r_vector(ipoint)
tmp_aux_1(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,n,n) * final_weight_at_r_vector(ipoint)
tmp_aux_1(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,n,n) * final_weight_at_r_vector(ipoint)
tmp_aux_1(ipoint,4,n) = mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,n) * final_weight_at_r_vector(ipoint)
tmp_aux_2(ipoint,n) = mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,n)
enddo enddo
enddo enddo
!$OMP END DO !$OMP END DO
!$OMP END PARALLEL !$OMP END PARALLEL
allocate(tmp_2d(mo_num,mo_num))
allocate(tmp1(n_points_final_grid,4,mo_num))
allocate(tmp2(n_points_final_grid,4,mo_num))
allocate(tmp4(n_points_final_grid,mo_num,mo_num)) ! loops approach to break the O(N^4) scaling in memory
do k = 1, mo_num
do m = 1, mo_num do i = 1, mo_num
!$OMP PARALLEL & !$OMP PARALLEL &
!$OMP DEFAULT (NONE) & !$OMP DEFAULT (NONE) &
!$OMP PRIVATE (i, k, ipoint) & !$OMP PRIVATE (n, ipoint, tmp_loc_1, tmp_loc_2) &
!$OMP SHARED (mo_num, n_points_final_grid, m, & !$OMP SHARED (mo_num, n_points_final_grid, i, k, &
!$OMP int2_grad1_u12_bimo_t, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
!$OMP tmp4) !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, &
!$OMP DO COLLAPSE(2) !$OMP tmp_aux_2, tmp1)
do i = 1, mo_num !$OMP DO
do k = 1, mo_num do n = 1, mo_num
do ipoint = 1, n_points_final_grid do ipoint = 1, n_points_final_grid
tmp4(ipoint,k,i) = int2_grad1_u12_bimo_t(ipoint,1,k,m) * int2_grad1_u12_bimo_t(ipoint,1,m,i) & tmp_loc_1 = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i)
+ int2_grad1_u12_bimo_t(ipoint,2,k,m) * int2_grad1_u12_bimo_t(ipoint,2,m,i) & tmp_loc_2 = tmp_aux_2(ipoint,n)
+ int2_grad1_u12_bimo_t(ipoint,3,k,m) * int2_grad1_u12_bimo_t(ipoint,3,m,i)
enddo tmp1(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,1,k,i) * tmp_loc_2
tmp1(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,2,k,i) * tmp_loc_2
tmp1(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,n,n) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,3,k,i) * tmp_loc_2
tmp1(ipoint,4,n) = int2_grad1_u12_bimo_t(ipoint,1,n,n) * int2_grad1_u12_bimo_t(ipoint,1,k,i) &
+ int2_grad1_u12_bimo_t(ipoint,2,n,n) * int2_grad1_u12_bimo_t(ipoint,2,k,i) &
+ int2_grad1_u12_bimo_t(ipoint,3,n,n) * int2_grad1_u12_bimo_t(ipoint,3,k,i)
enddo enddo
enddo enddo
!$OMP END DO !$OMP END DO
!$OMP END PARALLEL !$OMP END PARALLEL
call dgemm( 'T', 'N', mo_num, mo_num*mo_num, n_points_final_grid, 1.d0 & call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 &
, tmp5, n_points_final_grid, tmp4, n_points_final_grid & , tmp_aux_1(1,1,1), 4*n_points_final_grid, tmp1(1,1,1), 4*n_points_final_grid &
, 0.d0, tmp_3d, mo_num) , 0.d0, tmp_2d(1,1), mo_num)
!$OMP PARALLEL DO PRIVATE(i,j,k) !$OMP PARALLEL DO PRIVATE(j,m)
do i = 1, mo_num
do k = 1, mo_num
do j = 1, mo_num do j = 1, mo_num
three_e_4_idx_exch13_bi_ort(m,j,k,i) = three_e_4_idx_exch13_bi_ort(m,j,k,i) - tmp_3d(j,k,i) do m = 1, mo_num
enddo three_e_4_idx_direct_bi_ort(m,j,k,i) = -tmp_2d(m,j)
enddo enddo
enddo enddo
!$OMP END PARALLEL DO !$OMP END PARALLEL DO
@ -274,92 +114,112 @@
!$OMP PARALLEL & !$OMP PARALLEL &
!$OMP DEFAULT (NONE) & !$OMP DEFAULT (NONE) &
!$OMP PRIVATE (j, k, ipoint) & !$OMP PRIVATE (n, ipoint, tmp_loc_1, tmp_loc_2) &
!$OMP SHARED (mo_num, n_points_final_grid, m, & !$OMP SHARED (mo_num, n_points_final_grid, i, k, &
!$OMP mos_l_in_r_array_transp, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
!$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, &
!$OMP tmp4) !$OMP tmp1, tmp2)
!$OMP DO COLLAPSE(2) !$OMP DO
do k = 1, mo_num do n = 1, mo_num
do j = 1, mo_num
do ipoint = 1, n_points_final_grid do ipoint = 1, n_points_final_grid
tmp4(ipoint,j,k) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) & tmp_loc_1 = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,n)
* ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,k,m) & tmp_loc_2 = mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,i)
+ int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,k,m) &
+ int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,k,m) ) tmp1(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,n,i) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,1,k,n) * tmp_loc_2
enddo tmp1(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,n,i) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,2,k,n) * tmp_loc_2
tmp1(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,n,i) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,3,k,n) * tmp_loc_2
tmp1(ipoint,4,n) = int2_grad1_u12_bimo_t(ipoint,1,n,i) * int2_grad1_u12_bimo_t(ipoint,1,k,n) &
+ int2_grad1_u12_bimo_t(ipoint,2,n,i) * int2_grad1_u12_bimo_t(ipoint,2,k,n) &
+ int2_grad1_u12_bimo_t(ipoint,3,n,i) * int2_grad1_u12_bimo_t(ipoint,3,k,n)
tmp2(ipoint,1,n) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,i,n)
tmp2(ipoint,2,n) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,i,n)
tmp2(ipoint,3,n) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,i,n)
tmp2(ipoint,4,n) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,n)
enddo enddo
enddo enddo
!$OMP END DO !$OMP END DO
!$OMP END PARALLEL !$OMP END PARALLEL
call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, 1.d0 & call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 &
, tmp4, n_points_final_grid, mos_r_in_r_array_transp, n_points_final_grid & , tmp1(1,1,1), 4*n_points_final_grid, tmp_aux_1(1,1,1), 4*n_points_final_grid &
, 0.d0, tmp_3d, mo_num*mo_num) , 0.d0, tmp_2d(1,1), mo_num)
!$OMP PARALLEL DO PRIVATE(i,j,k) !$OMP PARALLEL DO PRIVATE(j,m)
do i = 1, mo_num
do k = 1, mo_num
do j = 1, mo_num do j = 1, mo_num
three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = three_e_4_idx_cycle_1_bi_ort(m,j,k,i) - tmp_3d(j,k,i) do m = 1, mo_num
enddo three_e_4_idx_exch13_bi_ort(m,j,k,i) = -tmp_2d(m,j)
enddo enddo
enddo enddo
!$OMP END PARALLEL DO !$OMP END PARALLEL DO
call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 &
, tmp1(1,1,1), 4*n_points_final_grid, tmp2(1,1,1), 4*n_points_final_grid &
, 0.d0, tmp_2d(1,1), mo_num)
!$OMP PARALLEL DO PRIVATE(j,m)
do j = 1, mo_num
do m = 1, mo_num
three_e_4_idx_cycle_1_bi_ort(m,i,k,j) = -tmp_2d(m,j)
enddo enddo
enddo
!$OMP END PARALLEL DO
deallocate(tmp5) enddo ! i
deallocate(tmp_3d)
do j = 1, mo_num
do i = 1, mo_num
!$OMP PARALLEL & !$OMP PARALLEL &
!$OMP DEFAULT (NONE) & !$OMP DEFAULT (NONE) &
!$OMP PRIVATE (m, j, ipoint) & !$OMP PRIVATE (n, ipoint, tmp_loc_1, tmp_loc_2) &
!$OMP SHARED (mo_num, n_points_final_grid, i, & !$OMP SHARED (mo_num, n_points_final_grid, j, k, &
!$OMP mos_r_in_r_array_transp, & !$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
!$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, & !$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, &
!$OMP tmp4) !$OMP tmp1, tmp2)
!$OMP DO COLLAPSE(2) !$OMP DO
do j = 1, mo_num do n = 1, mo_num
do m = 1, mo_num
do ipoint = 1, n_points_final_grid do ipoint = 1, n_points_final_grid
tmp4(ipoint,m,j) = final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,m) & tmp_loc_1 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) * mos_r_in_r_array_transp(ipoint,n)
* ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) & tmp_loc_2 = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,j)
+ int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) &
+ int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) ) tmp1(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,n,j) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,1,j,n) * tmp_loc_2
enddo tmp1(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,n,j) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,2,j,n) * tmp_loc_2
tmp1(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,n,j) * tmp_loc_1 + int2_grad1_u12_bimo_t(ipoint,3,j,n) * tmp_loc_2
tmp1(ipoint,4,n) = int2_grad1_u12_bimo_t(ipoint,1,n,j) * int2_grad1_u12_bimo_t(ipoint,1,j,n) &
+ int2_grad1_u12_bimo_t(ipoint,2,n,j) * int2_grad1_u12_bimo_t(ipoint,2,j,n) &
+ int2_grad1_u12_bimo_t(ipoint,3,n,j) * int2_grad1_u12_bimo_t(ipoint,3,j,n)
tmp2(ipoint,1,n) = int2_grad1_u12_bimo_t(ipoint,1,k,n)
tmp2(ipoint,2,n) = int2_grad1_u12_bimo_t(ipoint,2,k,n)
tmp2(ipoint,3,n) = int2_grad1_u12_bimo_t(ipoint,3,k,n)
tmp2(ipoint,4,n) = mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,n)
enddo enddo
enddo enddo
!$OMP END DO !$OMP END DO
!$OMP END PARALLEL !$OMP END PARALLEL
call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, -1.d0 & call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 &
, tmp4, n_points_final_grid, mos_l_in_r_array_transp, n_points_final_grid & , tmp1(1,1,1), 4*n_points_final_grid, tmp2(1,1,1), 4*n_points_final_grid &
, 1.d0, three_e_4_idx_cycle_1_bi_ort(1,1,1,i), mo_num*mo_num) , 0.d0, tmp_2d(1,1), mo_num)
!$OMP PARALLEL DO PRIVATE(i,m)
do i = 1, mo_num
do m = 1, mo_num
three_e_4_idx_exch23_bi_ort(m,j,k,i) = -tmp_2d(m,i)
enddo enddo
enddo
!$OMP END PARALLEL DO
deallocate(tmp4) enddo ! j
enddo !k
deallocate(tmp_2d)
! !$OMP PARALLEL DO PRIVATE(i,j,k,m) deallocate(tmp1)
! do i = 1, mo_num deallocate(tmp2)
! do k = 1, mo_num deallocate(tmp_aux_1)
! do j = 1, mo_num deallocate(tmp_aux_2)
! do m = 1, mo_num
! three_e_4_idx_exch12_bi_ort (m,j,k,i) = three_e_4_idx_exch13_bi_ort (j,m,k,i)
! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = three_e_4_idx_cycle_1_bi_ort(j,m,k,i)
! enddo
! enddo
! enddo
! enddo
! !$OMP END PARALLEL DO
call wall_time(wall1) call wall_time(wall1)
@ -370,115 +230,3 @@ END_PROVIDER
! --- ! ---
BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort , (mo_num, mo_num, mo_num, mo_num)]
BEGIN_DOC
!
! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs
!
! three_e_4_idx_exch23_bi_ort (m,j,k,i) = < m j k | -L | j m i > ::: notice that i is the RIGHT MO and k is the LEFT MO
!
! notice the -1 sign: in this way three_e_4_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
!
! three_e_4_idx_exch23_bi_ort (m,j,k,i) : Lk Ri Imj Ijm + Lj Rm Imj Iki + Lm Rj Ijm Iki
!
END_DOC
implicit none
integer :: i, j, k, l, m, ipoint
double precision :: wall1, wall0
double precision, allocatable :: tmp1(:,:,:,:), tmp_4d(:,:,:,:)
double precision, allocatable :: tmp5(:,:,:), tmp6(:,:,:)
print *, ' Providing the three_e_4_idx_exch23_bi_ort ...'
call wall_time(wall0)
provide mos_r_in_r_array_transp mos_l_in_r_array_transp
allocate(tmp5(n_points_final_grid,mo_num,mo_num))
allocate(tmp6(n_points_final_grid,mo_num,mo_num))
!$OMP PARALLEL &
!$OMP DEFAULT (NONE) &
!$OMP PRIVATE (i, l, ipoint) &
!$OMP SHARED (mo_num, n_points_final_grid, &
!$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
!$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, &
!$OMP tmp5, tmp6)
!$OMP DO COLLAPSE(2)
do i = 1, mo_num
do l = 1, mo_num
do ipoint = 1, n_points_final_grid
tmp5(ipoint,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * int2_grad1_u12_bimo_t(ipoint,1,i,l) &
+ int2_grad1_u12_bimo_t(ipoint,2,l,i) * int2_grad1_u12_bimo_t(ipoint,2,i,l) &
+ int2_grad1_u12_bimo_t(ipoint,3,l,i) * int2_grad1_u12_bimo_t(ipoint,3,i,l)
tmp6(ipoint,l,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i)
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, n_points_final_grid, -1.d0 &
, tmp5, n_points_final_grid, tmp6, n_points_final_grid &
, 0.d0, three_e_4_idx_exch23_bi_ort, mo_num*mo_num)
deallocate(tmp5)
deallocate(tmp6)
allocate(tmp_4d(mo_num,mo_num,mo_num,mo_num))
allocate(tmp1(n_points_final_grid,3,mo_num,mo_num))
!$OMP PARALLEL &
!$OMP DEFAULT (NONE) &
!$OMP PRIVATE (i, l, ipoint) &
!$OMP SHARED (mo_num, n_points_final_grid, &
!$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
!$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, &
!$OMP tmp1)
!$OMP DO COLLAPSE(2)
do i = 1, mo_num
do l = 1, mo_num
do ipoint = 1, n_points_final_grid
tmp1(ipoint,1,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l)
tmp1(ipoint,2,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l)
tmp1(ipoint,3,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l)
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 &
, tmp1, 3*n_points_final_grid, int2_grad1_u12_bimo_t, 3*n_points_final_grid &
, 0.d0, tmp_4d, mo_num*mo_num)
deallocate(tmp1)
!$OMP PARALLEL DO PRIVATE(i,j,k,m)
do i = 1, mo_num
do k = 1, mo_num
do j = 1, mo_num
do m = 1, mo_num
three_e_4_idx_exch23_bi_ort(m,j,k,i) = three_e_4_idx_exch23_bi_ort(m,j,k,i) - tmp_4d(m,j,k,i) - tmp_4d(j,m,k,i)
enddo
enddo
enddo
enddo
!$OMP END PARALLEL DO
deallocate(tmp_4d)
call wall_time(wall1)
print *, ' wall time for three_e_4_idx_exch23_bi_ort', wall1 - wall0
call print_memory_usage()
END_PROVIDER
! ---

View File

@ -0,0 +1,484 @@
! ---
BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort_n4 , (mo_num, mo_num, mo_num, mo_num)]
&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort_n4 , (mo_num, mo_num, mo_num, mo_num)]
&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort_n4, (mo_num, mo_num, mo_num, mo_num)]
!&BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort , (mo_num, mo_num, mo_num, mo_num)]
!&BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num)]
BEGIN_DOC
!
! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs
!
! three_e_4_idx_direct_bi_ort_n4 (m,j,k,i) = < m j k | -L | m j i > ::: notice that i is the RIGHT MO and k is the LEFT MO
! three_e_4_idx_exch13_bi_ort_n4 (m,j,k,i) = < m j k | -L | i j m > ::: notice that i is the RIGHT MO and k is the LEFT MO
! three_e_4_idx_exch12_bi_ort (m,j,k,i) = < m j k | -L | m i j > ::: notice that i is the RIGHT MO and k is the LEFT MO
! = three_e_4_idx_exch13_bi_ort_n4 (j,m,k,i)
! three_e_4_idx_cycle_1_bi_ort_n4(m,j,k,i) = < m j k | -L | j i m > ::: notice that i is the RIGHT MO and k is the LEFT MO
! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = < m j k | -L | i m j > ::: notice that i is the RIGHT MO and k is the LEFT MO
! = three_e_4_idx_cycle_1_bi_ort_n4(j,m,k,i)
!
! notice the -1 sign: in this way three_e_4_idx_direct_bi_ort_n4 can be directly used to compute Slater rules with a + sign
!
! three_e_4_idx_direct_bi_ort_n4 (m,j,k,i) : Lk Ri Imm Ijj + Lj Rj Imm Iki + Lm Rm Ijj Iki
! three_e_4_idx_exch13_bi_ort_n4 (m,j,k,i) : Lk Rm Imi Ijj + Lj Rj Imi Ikm + Lm Ri Ijj Ikm
! three_e_4_idx_cycle_1_bi_ort_n4(m,j,k,i) : Lk Rm Imj Iji + Lj Ri Imj Ikm + Lm Rj Iji Ikm
!
END_DOC
implicit none
integer :: ipoint, i, j, k, l, m
double precision :: wall1, wall0
double precision, allocatable :: tmp1(:,:,:,:), tmp2(:,:,:,:), tmp3(:,:,:,:)
double precision, allocatable :: tmp_4d(:,:,:,:)
double precision, allocatable :: tmp4(:,:,:)
double precision, allocatable :: tmp5(:,:)
double precision, allocatable :: tmp_3d(:,:,:)
print *, ' Providing the O(N^4) three_e_4_idx_bi_ort ...'
call wall_time(wall0)
provide mos_r_in_r_array_transp mos_l_in_r_array_transp
allocate(tmp_4d(mo_num,mo_num,mo_num,mo_num))
allocate(tmp1(n_points_final_grid,3,mo_num,mo_num))
allocate(tmp2(n_points_final_grid,3,mo_num,mo_num))
allocate(tmp3(n_points_final_grid,3,mo_num,mo_num))
!$OMP PARALLEL &
!$OMP DEFAULT (NONE) &
!$OMP PRIVATE (i, l, ipoint) &
!$OMP SHARED (mo_num, n_points_final_grid, &
!$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
!$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, &
!$OMP tmp1, tmp2, tmp3)
!$OMP DO COLLAPSE(2)
do i = 1, mo_num
do l = 1, mo_num
do ipoint = 1, n_points_final_grid
tmp1(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint)
tmp1(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint)
tmp1(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_l_in_r_array_transp(ipoint,i) * final_weight_at_r_vector(ipoint)
tmp2(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_r_in_r_array_transp(ipoint,i)
tmp2(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_r_in_r_array_transp(ipoint,i)
tmp2(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_r_in_r_array_transp(ipoint,i)
tmp3(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_r_in_r_array_transp(ipoint,l)
tmp3(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_r_in_r_array_transp(ipoint,l)
tmp3(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_r_in_r_array_transp(ipoint,l)
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 &
, tmp1(1,1,1,1), 3*n_points_final_grid, tmp2(1,1,1,1), 3*n_points_final_grid &
, 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num)
!$OMP PARALLEL DO PRIVATE(i,j,k,m)
do i = 1, mo_num
do k = 1, mo_num
do j = 1, mo_num
do m = 1, mo_num
three_e_4_idx_direct_bi_ort_n4(m,j,k,i) = -tmp_4d(m,k,j,i)
enddo
enddo
enddo
enddo
!$OMP END PARALLEL DO
call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 &
, tmp3(1,1,1,1), 3*n_points_final_grid, tmp1(1,1,1,1), 3*n_points_final_grid &
, 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num)
deallocate(tmp1)
!$OMP PARALLEL DO PRIVATE(i,j,k,m)
do i = 1, mo_num
do k = 1, mo_num
do j = 1, mo_num
do m = 1, mo_num
three_e_4_idx_exch13_bi_ort_n4(m,j,k,i) = -tmp_4d(m,i,j,k)
enddo
enddo
enddo
enddo
!$OMP END PARALLEL DO
!$OMP PARALLEL &
!$OMP DEFAULT (NONE) &
!$OMP PRIVATE (i, l, ipoint) &
!$OMP SHARED (mo_num, n_points_final_grid, &
!$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
!$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, &
!$OMP tmp1)
!$OMP DO COLLAPSE(2)
do i = 1, mo_num
do l = 1, mo_num
do ipoint = 1, n_points_final_grid
tmp1(ipoint,1,l,i) = int2_grad1_u12_bimo_t(ipoint,1,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint)
tmp1(ipoint,2,l,i) = int2_grad1_u12_bimo_t(ipoint,2,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint)
tmp1(ipoint,3,l,i) = int2_grad1_u12_bimo_t(ipoint,3,i,l) * mos_l_in_r_array_transp(ipoint,l) * final_weight_at_r_vector(ipoint)
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 &
, tmp1(1,1,1,1), 3*n_points_final_grid, tmp2(1,1,1,1), 3*n_points_final_grid &
, 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num)
deallocate(tmp2)
!$OMP PARALLEL DO PRIVATE(i,j,k,m)
do i = 1, mo_num
do k = 1, mo_num
do j = 1, mo_num
do m = 1, mo_num
three_e_4_idx_exch13_bi_ort_n4(m,j,k,i) = three_e_4_idx_exch13_bi_ort_n4(m,j,k,i) - tmp_4d(m,k,j,i)
enddo
enddo
enddo
enddo
!$OMP END PARALLEL DO
call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 &
, tmp1(1,1,1,1), 3*n_points_final_grid, tmp3(1,1,1,1), 3*n_points_final_grid &
, 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num)
deallocate(tmp3)
!$OMP PARALLEL DO PRIVATE(i,j,k,m)
do i = 1, mo_num
do k = 1, mo_num
do j = 1, mo_num
do m = 1, mo_num
three_e_4_idx_cycle_1_bi_ort_n4(m,j,k,i) = -tmp_4d(m,k,j,i)
enddo
enddo
enddo
enddo
!$OMP END PARALLEL DO
!$OMP PARALLEL &
!$OMP DEFAULT (NONE) &
!$OMP PRIVATE (i, l, ipoint) &
!$OMP SHARED (mo_num, n_points_final_grid, &
!$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
!$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, &
!$OMP tmp1)
!$OMP DO COLLAPSE(2)
do i = 1, mo_num
do l = 1, mo_num
do ipoint = 1, n_points_final_grid
tmp1(ipoint,1,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i)
tmp1(ipoint,2,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i)
tmp1(ipoint,3,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,l,l) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i)
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 &
, tmp1(1,1,1,1), 3*n_points_final_grid, int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid &
, 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num)
deallocate(tmp1)
!$OMP PARALLEL DO PRIVATE(i,j,k,m)
do i = 1, mo_num
do k = 1, mo_num
do j = 1, mo_num
do m = 1, mo_num
three_e_4_idx_direct_bi_ort_n4(m,j,k,i) = three_e_4_idx_direct_bi_ort(m,j,k,i) - tmp_4d(m,j,k,i) - tmp_4d(j,m,k,i)
enddo
enddo
enddo
enddo
!$OMP END PARALLEL DO
deallocate(tmp_4d)
allocate(tmp_3d(mo_num,mo_num,mo_num))
allocate(tmp5(n_points_final_grid,mo_num))
!$OMP PARALLEL &
!$OMP DEFAULT (NONE) &
!$OMP PRIVATE (i, ipoint) &
!$OMP SHARED (mo_num, n_points_final_grid, &
!$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
!$OMP final_weight_at_r_vector, &
!$OMP tmp5)
!$OMP DO
do i = 1, mo_num
do ipoint = 1, n_points_final_grid
tmp5(ipoint,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,i)
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
allocate(tmp4(n_points_final_grid,mo_num,mo_num))
do m = 1, mo_num
!$OMP PARALLEL &
!$OMP DEFAULT (NONE) &
!$OMP PRIVATE (i, k, ipoint) &
!$OMP SHARED (mo_num, n_points_final_grid, m, &
!$OMP int2_grad1_u12_bimo_t, &
!$OMP tmp4)
!$OMP DO COLLAPSE(2)
do i = 1, mo_num
do k = 1, mo_num
do ipoint = 1, n_points_final_grid
tmp4(ipoint,k,i) = int2_grad1_u12_bimo_t(ipoint,1,k,m) * int2_grad1_u12_bimo_t(ipoint,1,m,i) &
+ int2_grad1_u12_bimo_t(ipoint,2,k,m) * int2_grad1_u12_bimo_t(ipoint,2,m,i) &
+ int2_grad1_u12_bimo_t(ipoint,3,k,m) * int2_grad1_u12_bimo_t(ipoint,3,m,i)
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
call dgemm( 'T', 'N', mo_num, mo_num*mo_num, n_points_final_grid, 1.d0 &
, tmp5(1,1), n_points_final_grid, tmp4(1,1,1), n_points_final_grid &
, 0.d0, tmp_3d(1,1,1), mo_num)
!$OMP PARALLEL DO PRIVATE(i,j,k)
do i = 1, mo_num
do k = 1, mo_num
do j = 1, mo_num
three_e_4_idx_exch13_bi_ort_n4(m,j,k,i) = three_e_4_idx_exch13_bi_ort_n4(m,j,k,i) - tmp_3d(j,k,i)
enddo
enddo
enddo
!$OMP END PARALLEL DO
!$OMP PARALLEL &
!$OMP DEFAULT (NONE) &
!$OMP PRIVATE (j, k, ipoint) &
!$OMP SHARED (mo_num, n_points_final_grid, m, &
!$OMP mos_l_in_r_array_transp, &
!$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, &
!$OMP tmp4)
!$OMP DO COLLAPSE(2)
do k = 1, mo_num
do j = 1, mo_num
do ipoint = 1, n_points_final_grid
tmp4(ipoint,j,k) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,j) &
* ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,k,m) &
+ int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,k,m) &
+ int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,k,m) )
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, 1.d0 &
, tmp4, n_points_final_grid, mos_r_in_r_array_transp, n_points_final_grid &
, 0.d0, tmp_3d, mo_num*mo_num)
!$OMP PARALLEL DO PRIVATE(i,j,k)
do i = 1, mo_num
do k = 1, mo_num
do j = 1, mo_num
three_e_4_idx_cycle_1_bi_ort_n4(m,j,k,i) = three_e_4_idx_cycle_1_bi_ort_n4(m,j,k,i) - tmp_3d(j,k,i)
enddo
enddo
enddo
!$OMP END PARALLEL DO
enddo
deallocate(tmp5)
deallocate(tmp_3d)
do i = 1, mo_num
!$OMP PARALLEL &
!$OMP DEFAULT (NONE) &
!$OMP PRIVATE (m, j, ipoint) &
!$OMP SHARED (mo_num, n_points_final_grid, i, &
!$OMP mos_r_in_r_array_transp, &
!$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, &
!$OMP tmp4)
!$OMP DO COLLAPSE(2)
do j = 1, mo_num
do m = 1, mo_num
do ipoint = 1, n_points_final_grid
tmp4(ipoint,m,j) = final_weight_at_r_vector(ipoint) * mos_r_in_r_array_transp(ipoint,m) &
* ( int2_grad1_u12_bimo_t(ipoint,1,m,j) * int2_grad1_u12_bimo_t(ipoint,1,j,i) &
+ int2_grad1_u12_bimo_t(ipoint,2,m,j) * int2_grad1_u12_bimo_t(ipoint,2,j,i) &
+ int2_grad1_u12_bimo_t(ipoint,3,m,j) * int2_grad1_u12_bimo_t(ipoint,3,j,i) )
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
call dgemm( 'T', 'N', mo_num*mo_num, mo_num, n_points_final_grid, -1.d0 &
, tmp4, n_points_final_grid, mos_l_in_r_array_transp, n_points_final_grid &
, 1.d0, three_e_4_idx_cycle_1_bi_ort_n4(1,1,1,i), mo_num*mo_num)
enddo
deallocate(tmp4)
! !$OMP PARALLEL DO PRIVATE(i,j,k,m)
! do i = 1, mo_num
! do k = 1, mo_num
! do j = 1, mo_num
! do m = 1, mo_num
! three_e_4_idx_exch12_bi_ort (m,j,k,i) = three_e_4_idx_exch13_bi_ort_n4 (j,m,k,i)
! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = three_e_4_idx_cycle_1_bi_ort_n4(j,m,k,i)
! enddo
! enddo
! enddo
! enddo
! !$OMP END PARALLEL DO
call wall_time(wall1)
print *, ' wall time for O(N^4) three_e_4_idx_bi_ort', wall1 - wall0
call print_memory_usage()
END_PROVIDER
! ---
BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort_n4 , (mo_num, mo_num, mo_num, mo_num)]
BEGIN_DOC
!
! matrix element of the -L three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs
!
! three_e_4_idx_exch23_bi_ort_n4 (m,j,k,i) = < m j k | -L | j m i > ::: notice that i is the RIGHT MO and k is the LEFT MO
!
! notice the -1 sign: in this way three_e_4_idx_direct_bi_ort_n4 can be directly used to compute Slater rules with a + sign
!
! three_e_4_idx_exch23_bi_ort_n4 (m,j,k,i) : Lk Ri Imj Ijm + Lj Rm Imj Iki + Lm Rj Ijm Iki
!
END_DOC
implicit none
integer :: i, j, k, l, m, ipoint
double precision :: wall1, wall0
double precision, allocatable :: tmp1(:,:,:,:), tmp_4d(:,:,:,:)
double precision, allocatable :: tmp5(:,:,:), tmp6(:,:,:)
print *, ' Providing the O(N^4) three_e_4_idx_exch23_bi_ort_n4 ...'
call wall_time(wall0)
provide mos_r_in_r_array_transp mos_l_in_r_array_transp
allocate(tmp5(n_points_final_grid,mo_num,mo_num))
allocate(tmp6(n_points_final_grid,mo_num,mo_num))
!$OMP PARALLEL &
!$OMP DEFAULT (NONE) &
!$OMP PRIVATE (i, l, ipoint) &
!$OMP SHARED (mo_num, n_points_final_grid, &
!$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
!$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, &
!$OMP tmp5, tmp6)
!$OMP DO COLLAPSE(2)
do i = 1, mo_num
do l = 1, mo_num
do ipoint = 1, n_points_final_grid
tmp5(ipoint,l,i) = int2_grad1_u12_bimo_t(ipoint,1,l,i) * int2_grad1_u12_bimo_t(ipoint,1,i,l) &
+ int2_grad1_u12_bimo_t(ipoint,2,l,i) * int2_grad1_u12_bimo_t(ipoint,2,i,l) &
+ int2_grad1_u12_bimo_t(ipoint,3,l,i) * int2_grad1_u12_bimo_t(ipoint,3,i,l)
tmp6(ipoint,l,i) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,i)
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, n_points_final_grid, -1.d0 &
, tmp5(1,1,1), n_points_final_grid, tmp6(1,1,1), n_points_final_grid &
, 0.d0, three_e_4_idx_exch23_bi_ort_n4(1,1,1,1), mo_num*mo_num)
deallocate(tmp5)
deallocate(tmp6)
allocate(tmp_4d(mo_num,mo_num,mo_num,mo_num))
allocate(tmp1(n_points_final_grid,3,mo_num,mo_num))
!$OMP PARALLEL &
!$OMP DEFAULT (NONE) &
!$OMP PRIVATE (i, l, ipoint) &
!$OMP SHARED (mo_num, n_points_final_grid, &
!$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
!$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, &
!$OMP tmp1)
!$OMP DO COLLAPSE(2)
do i = 1, mo_num
do l = 1, mo_num
do ipoint = 1, n_points_final_grid
tmp1(ipoint,1,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,1,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l)
tmp1(ipoint,2,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,2,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l)
tmp1(ipoint,3,l,i) = final_weight_at_r_vector(ipoint) * int2_grad1_u12_bimo_t(ipoint,3,l,i) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,l)
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
call dgemm( 'T', 'N', mo_num*mo_num, mo_num*mo_num, 3*n_points_final_grid, 1.d0 &
, tmp1(1,1,1,1), 3*n_points_final_grid, int2_grad1_u12_bimo_t(1,1,1,1), 3*n_points_final_grid &
, 0.d0, tmp_4d(1,1,1,1), mo_num*mo_num)
deallocate(tmp1)
!$OMP PARALLEL DO PRIVATE(i,j,k,m)
do i = 1, mo_num
do k = 1, mo_num
do j = 1, mo_num
do m = 1, mo_num
three_e_4_idx_exch23_bi_ort_n4(m,j,k,i) = three_e_4_idx_exch23_bi_ort_n4(m,j,k,i) - tmp_4d(m,j,k,i) - tmp_4d(j,m,k,i)
enddo
enddo
enddo
enddo
!$OMP END PARALLEL DO
deallocate(tmp_4d)
call wall_time(wall1)
print *, ' wall time for O(N^4) three_e_4_idx_exch23_bi_ort_n4', wall1 - wall0
call print_memory_usage()
END_PROVIDER
! ---