mirror of
https://github.com/QuantumPackage/qp2.git
synced 2025-01-05 19:08:47 +01:00
OPTIMZATIONS IN 4-ind integ
This commit is contained in:
parent
7076fcd202
commit
8739c26509
@ -18,10 +18,11 @@ program bi_ort_ints
|
|||||||
! call test_5idx
|
! call test_5idx
|
||||||
! call test_5idx2
|
! call test_5idx2
|
||||||
call test_4idx()
|
call test_4idx()
|
||||||
call test_4idx_n4()
|
!call test_4idx_n4()
|
||||||
!call test_4idx2()
|
!call test_4idx2()
|
||||||
!call test_5idx2
|
!call test_5idx2
|
||||||
!call test_5idx
|
!call test_5idx
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
subroutine test_5idx2
|
subroutine test_5idx2
|
||||||
@ -340,7 +341,7 @@ subroutine test_4idx()
|
|||||||
|
|
||||||
implicit none
|
implicit none
|
||||||
integer :: i, j, k, l
|
integer :: i, j, k, l
|
||||||
double precision :: accu, contrib, new, ref, thr
|
double precision :: accu, contrib, new, ref, thr, norm
|
||||||
|
|
||||||
thr = 1d-10
|
thr = 1d-10
|
||||||
|
|
||||||
@ -348,6 +349,7 @@ subroutine test_4idx()
|
|||||||
PROVIDE three_e_4_idx_direct_bi_ort
|
PROVIDE three_e_4_idx_direct_bi_ort
|
||||||
|
|
||||||
accu = 0.d0
|
accu = 0.d0
|
||||||
|
norm = 0.d0
|
||||||
do i = 1, mo_num
|
do i = 1, mo_num
|
||||||
do j = 1, mo_num
|
do j = 1, mo_num
|
||||||
do k = 1, mo_num
|
do k = 1, mo_num
|
||||||
@ -356,7 +358,6 @@ subroutine test_4idx()
|
|||||||
new = three_e_4_idx_direct_bi_ort (l,k,j,i)
|
new = three_e_4_idx_direct_bi_ort (l,k,j,i)
|
||||||
ref = three_e_4_idx_direct_bi_ort_old(l,k,j,i)
|
ref = three_e_4_idx_direct_bi_ort_old(l,k,j,i)
|
||||||
contrib = dabs(new - ref)
|
contrib = dabs(new - ref)
|
||||||
accu += contrib
|
|
||||||
if(contrib .gt. thr) then
|
if(contrib .gt. thr) then
|
||||||
print*, ' problem in three_e_4_idx_direct_bi_ort'
|
print*, ' problem in three_e_4_idx_direct_bi_ort'
|
||||||
print*, l, k, j, i
|
print*, l, k, j, i
|
||||||
@ -364,11 +365,14 @@ subroutine test_4idx()
|
|||||||
stop
|
stop
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
accu += contrib
|
||||||
|
norm += dabs(ref)
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
print*, ' accu on three_e_4_idx_direct_bi_ort = ', accu / dble(mo_num)**4
|
|
||||||
|
print*, ' accu on three_e_4_idx_direct_bi_ort (%) = ', 100.d0 * accu / norm
|
||||||
|
|
||||||
! ---
|
! ---
|
||||||
|
|
||||||
@ -376,6 +380,7 @@ subroutine test_4idx()
|
|||||||
PROVIDE three_e_4_idx_exch13_bi_ort
|
PROVIDE three_e_4_idx_exch13_bi_ort
|
||||||
|
|
||||||
accu = 0.d0
|
accu = 0.d0
|
||||||
|
norm = 0.d0
|
||||||
do i = 1, mo_num
|
do i = 1, mo_num
|
||||||
do j = 1, mo_num
|
do j = 1, mo_num
|
||||||
do k = 1, mo_num
|
do k = 1, mo_num
|
||||||
@ -384,7 +389,6 @@ subroutine test_4idx()
|
|||||||
new = three_e_4_idx_exch13_bi_ort (l,k,j,i)
|
new = three_e_4_idx_exch13_bi_ort (l,k,j,i)
|
||||||
ref = three_e_4_idx_exch13_bi_ort_old(l,k,j,i)
|
ref = three_e_4_idx_exch13_bi_ort_old(l,k,j,i)
|
||||||
contrib = dabs(new - ref)
|
contrib = dabs(new - ref)
|
||||||
accu += contrib
|
|
||||||
if(contrib .gt. thr) then
|
if(contrib .gt. thr) then
|
||||||
print*, ' problem in three_e_4_idx_exch13_bi_ort'
|
print*, ' problem in three_e_4_idx_exch13_bi_ort'
|
||||||
print*, l, k, j, i
|
print*, l, k, j, i
|
||||||
@ -392,11 +396,14 @@ subroutine test_4idx()
|
|||||||
stop
|
stop
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
accu += contrib
|
||||||
|
norm += dabs(ref)
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
print*, ' accu on three_e_4_idx_exch13_bi_ort = ', accu / dble(mo_num)**4
|
|
||||||
|
print*, ' accu on three_e_4_idx_exch13_bi_ort (%) = ', 100.d0 * accu / norm
|
||||||
|
|
||||||
! ---
|
! ---
|
||||||
|
|
||||||
@ -404,6 +411,7 @@ subroutine test_4idx()
|
|||||||
PROVIDE three_e_4_idx_cycle_1_bi_ort
|
PROVIDE three_e_4_idx_cycle_1_bi_ort
|
||||||
|
|
||||||
accu = 0.d0
|
accu = 0.d0
|
||||||
|
norm = 0.d0
|
||||||
do i = 1, mo_num
|
do i = 1, mo_num
|
||||||
do j = 1, mo_num
|
do j = 1, mo_num
|
||||||
do k = 1, mo_num
|
do k = 1, mo_num
|
||||||
@ -412,7 +420,6 @@ subroutine test_4idx()
|
|||||||
new = three_e_4_idx_cycle_1_bi_ort (l,k,j,i)
|
new = three_e_4_idx_cycle_1_bi_ort (l,k,j,i)
|
||||||
ref = three_e_4_idx_cycle_1_bi_ort_old(l,k,j,i)
|
ref = three_e_4_idx_cycle_1_bi_ort_old(l,k,j,i)
|
||||||
contrib = dabs(new - ref)
|
contrib = dabs(new - ref)
|
||||||
accu += contrib
|
|
||||||
if(contrib .gt. thr) then
|
if(contrib .gt. thr) then
|
||||||
print*, ' problem in three_e_4_idx_cycle_1_bi_ort'
|
print*, ' problem in three_e_4_idx_cycle_1_bi_ort'
|
||||||
print*, l, k, j, i
|
print*, l, k, j, i
|
||||||
@ -420,11 +427,14 @@ subroutine test_4idx()
|
|||||||
stop
|
stop
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
accu += contrib
|
||||||
|
norm += dabs(ref)
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
print*, ' accu on three_e_4_idx_cycle_1_bi_ort = ', accu / dble(mo_num)**4
|
|
||||||
|
print*, ' accu on three_e_4_idx_cycle_1_bi_ort (%) = ', 100.d0 * accu / norm
|
||||||
|
|
||||||
! ---
|
! ---
|
||||||
|
|
||||||
@ -432,6 +442,7 @@ subroutine test_4idx()
|
|||||||
PROVIDE three_e_4_idx_exch23_bi_ort
|
PROVIDE three_e_4_idx_exch23_bi_ort
|
||||||
|
|
||||||
accu = 0.d0
|
accu = 0.d0
|
||||||
|
norm = 0.d0
|
||||||
do i = 1, mo_num
|
do i = 1, mo_num
|
||||||
do j = 1, mo_num
|
do j = 1, mo_num
|
||||||
do k = 1, mo_num
|
do k = 1, mo_num
|
||||||
@ -440,7 +451,6 @@ subroutine test_4idx()
|
|||||||
new = three_e_4_idx_exch23_bi_ort (l,k,j,i)
|
new = three_e_4_idx_exch23_bi_ort (l,k,j,i)
|
||||||
ref = three_e_4_idx_exch23_bi_ort_old(l,k,j,i)
|
ref = three_e_4_idx_exch23_bi_ort_old(l,k,j,i)
|
||||||
contrib = dabs(new - ref)
|
contrib = dabs(new - ref)
|
||||||
accu += contrib
|
|
||||||
if(contrib .gt. thr) then
|
if(contrib .gt. thr) then
|
||||||
print*, ' problem in three_e_4_idx_exch23_bi_ort'
|
print*, ' problem in three_e_4_idx_exch23_bi_ort'
|
||||||
print*, l, k, j, i
|
print*, l, k, j, i
|
||||||
@ -448,13 +458,18 @@ subroutine test_4idx()
|
|||||||
stop
|
stop
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
accu += contrib
|
||||||
|
norm += dabs(ref)
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
print*, ' accu on three_e_4_idx_exch23_bi_ort = ', accu / dble(mo_num)**4
|
|
||||||
|
print*, ' accu on three_e_4_idx_exch23_bi_ort (%) = ', 100.d0 * accu / norm
|
||||||
|
|
||||||
! ---
|
! ---
|
||||||
|
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
@ -64,22 +64,37 @@
|
|||||||
!$OMP END DO
|
!$OMP END DO
|
||||||
!$OMP END PARALLEL
|
!$OMP END PARALLEL
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
! loops approach to break the O(N^4) scaling in memory
|
||||||
|
|
||||||
|
call set_multiple_levels_omp(.false.)
|
||||||
|
|
||||||
|
!$OMP PARALLEL &
|
||||||
|
!$OMP DEFAULT (NONE) &
|
||||||
|
!$OMP PRIVATE (n, ipoint, tmp_loc_1, tmp_loc_2, tmp_2d, tmp1, tmp2) &
|
||||||
|
!$OMP SHARED (mo_num, n_points_final_grid, i, k, &
|
||||||
|
!$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
|
||||||
|
!$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, &
|
||||||
|
!$OMP tmp_aux_1, tmp_aux_2, &
|
||||||
|
!$OMP three_e_4_idx_direct_bi_ort, three_e_4_idx_exch13_bi_ort, &
|
||||||
|
!$OMP three_e_4_idx_exch23_bi_ort, three_e_4_idx_cycle_1_bi_ort)
|
||||||
|
|
||||||
allocate(tmp_2d(mo_num,mo_num))
|
allocate(tmp_2d(mo_num,mo_num))
|
||||||
allocate(tmp1(n_points_final_grid,4,mo_num))
|
allocate(tmp1(n_points_final_grid,4,mo_num))
|
||||||
allocate(tmp2(n_points_final_grid,4,mo_num))
|
allocate(tmp2(n_points_final_grid,4,mo_num))
|
||||||
|
|
||||||
! loops approach to break the O(N^4) scaling in memory
|
!$OMP DO
|
||||||
do k = 1, mo_num
|
do k = 1, mo_num
|
||||||
|
|
||||||
|
! ---
|
||||||
|
|
||||||
do i = 1, mo_num
|
do i = 1, mo_num
|
||||||
|
|
||||||
!$OMP PARALLEL &
|
! ---
|
||||||
!$OMP DEFAULT (NONE) &
|
|
||||||
!$OMP PRIVATE (n, ipoint, tmp_loc_1, tmp_loc_2) &
|
|
||||||
!$OMP SHARED (mo_num, n_points_final_grid, i, k, &
|
|
||||||
!$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
|
|
||||||
!$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, &
|
|
||||||
!$OMP tmp_aux_2, tmp1)
|
|
||||||
!$OMP DO
|
|
||||||
do n = 1, mo_num
|
do n = 1, mo_num
|
||||||
do ipoint = 1, n_points_final_grid
|
do ipoint = 1, n_points_final_grid
|
||||||
|
|
||||||
@ -95,31 +110,19 @@
|
|||||||
|
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
!$OMP END DO
|
|
||||||
!$OMP END PARALLEL
|
|
||||||
|
|
||||||
call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 &
|
call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 &
|
||||||
, tmp_aux_1(1,1,1), 4*n_points_final_grid, tmp1(1,1,1), 4*n_points_final_grid &
|
, tmp_aux_1(1,1,1), 4*n_points_final_grid, tmp1(1,1,1), 4*n_points_final_grid &
|
||||||
, 0.d0, tmp_2d(1,1), mo_num)
|
, 0.d0, tmp_2d(1,1), mo_num)
|
||||||
|
|
||||||
!$OMP PARALLEL DO PRIVATE(j,m)
|
|
||||||
do j = 1, mo_num
|
do j = 1, mo_num
|
||||||
do m = 1, mo_num
|
do m = 1, mo_num
|
||||||
three_e_4_idx_direct_bi_ort(m,j,k,i) = -tmp_2d(m,j)
|
three_e_4_idx_direct_bi_ort(m,j,k,i) = -tmp_2d(m,j)
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
!$OMP END PARALLEL DO
|
|
||||||
|
|
||||||
|
! ---
|
||||||
|
|
||||||
|
|
||||||
!$OMP PARALLEL &
|
|
||||||
!$OMP DEFAULT (NONE) &
|
|
||||||
!$OMP PRIVATE (n, ipoint, tmp_loc_1, tmp_loc_2) &
|
|
||||||
!$OMP SHARED (mo_num, n_points_final_grid, i, k, &
|
|
||||||
!$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
|
|
||||||
!$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, &
|
|
||||||
!$OMP tmp1, tmp2)
|
|
||||||
!$OMP DO
|
|
||||||
do n = 1, mo_num
|
do n = 1, mo_num
|
||||||
do ipoint = 1, n_points_final_grid
|
do ipoint = 1, n_points_final_grid
|
||||||
|
|
||||||
@ -139,45 +142,39 @@
|
|||||||
tmp2(ipoint,4,n) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,n)
|
tmp2(ipoint,4,n) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,i) * mos_r_in_r_array_transp(ipoint,n)
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
!$OMP END DO
|
|
||||||
!$OMP END PARALLEL
|
! ---
|
||||||
|
|
||||||
call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 &
|
call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 &
|
||||||
, tmp1(1,1,1), 4*n_points_final_grid, tmp_aux_1(1,1,1), 4*n_points_final_grid &
|
, tmp1(1,1,1), 4*n_points_final_grid, tmp_aux_1(1,1,1), 4*n_points_final_grid &
|
||||||
, 0.d0, tmp_2d(1,1), mo_num)
|
, 0.d0, tmp_2d(1,1), mo_num)
|
||||||
|
|
||||||
!$OMP PARALLEL DO PRIVATE(j,m)
|
|
||||||
do j = 1, mo_num
|
do j = 1, mo_num
|
||||||
do m = 1, mo_num
|
do m = 1, mo_num
|
||||||
three_e_4_idx_exch13_bi_ort(m,j,k,i) = -tmp_2d(m,j)
|
three_e_4_idx_exch13_bi_ort(m,j,k,i) = -tmp_2d(m,j)
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
!$OMP END PARALLEL DO
|
|
||||||
|
! ---
|
||||||
|
|
||||||
call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 &
|
call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 &
|
||||||
, tmp1(1,1,1), 4*n_points_final_grid, tmp2(1,1,1), 4*n_points_final_grid &
|
, tmp1(1,1,1), 4*n_points_final_grid, tmp2(1,1,1), 4*n_points_final_grid &
|
||||||
, 0.d0, tmp_2d(1,1), mo_num)
|
, 0.d0, tmp_2d(1,1), mo_num)
|
||||||
|
|
||||||
!$OMP PARALLEL DO PRIVATE(j,m)
|
|
||||||
do j = 1, mo_num
|
do j = 1, mo_num
|
||||||
do m = 1, mo_num
|
do m = 1, mo_num
|
||||||
three_e_4_idx_cycle_1_bi_ort(m,i,k,j) = -tmp_2d(m,j)
|
three_e_4_idx_cycle_1_bi_ort(m,i,k,j) = -tmp_2d(m,j)
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
!$OMP END PARALLEL DO
|
|
||||||
|
! ---
|
||||||
|
|
||||||
enddo ! i
|
enddo ! i
|
||||||
|
|
||||||
|
! ---
|
||||||
|
|
||||||
do j = 1, mo_num
|
do j = 1, mo_num
|
||||||
|
|
||||||
!$OMP PARALLEL &
|
|
||||||
!$OMP DEFAULT (NONE) &
|
|
||||||
!$OMP PRIVATE (n, ipoint, tmp_loc_1, tmp_loc_2) &
|
|
||||||
!$OMP SHARED (mo_num, n_points_final_grid, j, k, &
|
|
||||||
!$OMP mos_l_in_r_array_transp, mos_r_in_r_array_transp, &
|
|
||||||
!$OMP int2_grad1_u12_bimo_t, final_weight_at_r_vector, &
|
|
||||||
!$OMP tmp1, tmp2)
|
|
||||||
!$OMP DO
|
|
||||||
do n = 1, mo_num
|
do n = 1, mo_num
|
||||||
do ipoint = 1, n_points_final_grid
|
do ipoint = 1, n_points_final_grid
|
||||||
|
|
||||||
@ -197,31 +194,33 @@
|
|||||||
tmp2(ipoint,4,n) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,n)
|
tmp2(ipoint,4,n) = final_weight_at_r_vector(ipoint) * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,n)
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
!$OMP END DO
|
|
||||||
!$OMP END PARALLEL
|
|
||||||
|
|
||||||
call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 &
|
call dgemm( 'T', 'N', mo_num, mo_num, 4*n_points_final_grid, 1.d0 &
|
||||||
, tmp1(1,1,1), 4*n_points_final_grid, tmp2(1,1,1), 4*n_points_final_grid &
|
, tmp1(1,1,1), 4*n_points_final_grid, tmp2(1,1,1), 4*n_points_final_grid &
|
||||||
, 0.d0, tmp_2d(1,1), mo_num)
|
, 0.d0, tmp_2d(1,1), mo_num)
|
||||||
|
|
||||||
!$OMP PARALLEL DO PRIVATE(i,m)
|
|
||||||
do i = 1, mo_num
|
do i = 1, mo_num
|
||||||
do m = 1, mo_num
|
do m = 1, mo_num
|
||||||
three_e_4_idx_exch23_bi_ort(m,j,k,i) = -tmp_2d(m,i)
|
three_e_4_idx_exch23_bi_ort(m,j,k,i) = -tmp_2d(m,i)
|
||||||
enddo
|
enddo
|
||||||
enddo
|
enddo
|
||||||
!$OMP END PARALLEL DO
|
|
||||||
|
|
||||||
enddo ! j
|
enddo ! j
|
||||||
|
|
||||||
|
! ---
|
||||||
|
|
||||||
enddo !k
|
enddo !k
|
||||||
|
!$OMP END DO
|
||||||
|
|
||||||
deallocate(tmp_2d)
|
deallocate(tmp_2d)
|
||||||
deallocate(tmp1)
|
deallocate(tmp1)
|
||||||
deallocate(tmp2)
|
deallocate(tmp2)
|
||||||
|
|
||||||
|
!$OMP END PARALLEL
|
||||||
|
|
||||||
deallocate(tmp_aux_1)
|
deallocate(tmp_aux_1)
|
||||||
deallocate(tmp_aux_2)
|
deallocate(tmp_aux_2)
|
||||||
|
|
||||||
|
|
||||||
call wall_time(wall1)
|
call wall_time(wall1)
|
||||||
print *, ' wall time for three_e_4_idx_bi_ort', wall1 - wall0
|
print *, ' wall time for three_e_4_idx_bi_ort', wall1 - wall0
|
||||||
call print_memory_usage()
|
call print_memory_usage()
|
||||||
|
Loading…
Reference in New Issue
Block a user