mirror of
https://github.com/QuantumPackage/qp2.git
synced 2024-11-19 12:32:30 +01:00
add normal-ordering with CuTC
This commit is contained in:
parent
4e9143133a
commit
bf15b68b0b
@ -9,13 +9,13 @@ module cutc_module
|
||||
|
||||
! ---
|
||||
|
||||
subroutine cutc_int_c(nxBlocks, nyBlocks, nzBlocks, &
|
||||
blockxSize, blockySize, blockzSize, &
|
||||
n_grid1, n_grid2, n_ao, n_nuc, size_bh, &
|
||||
r1, wr1, r2, wr2, rn, &
|
||||
aos_data1, aos_data2, &
|
||||
c_bh, m_bh, n_bh, o_bh, &
|
||||
int2_grad1_u12_ao, int_2e_ao) bind(C, name = "cutc_int_c")
|
||||
subroutine cutc_int(nxBlocks, nyBlocks, nzBlocks, &
|
||||
blockxSize, blockySize, blockzSize, &
|
||||
n_grid1, n_grid2, n_ao, n_nuc, size_bh, &
|
||||
r1, wr1, r2, wr2, rn, &
|
||||
aos_data1, aos_data2, &
|
||||
c_bh, m_bh, n_bh, o_bh, &
|
||||
int2_grad1_u12_ao, int_2e_ao) bind(C, name = "cutc_int")
|
||||
|
||||
import c_int, c_double, c_ptr
|
||||
integer(c_int), intent(in), value :: nxBlocks, blockxSize
|
||||
@ -37,7 +37,7 @@ module cutc_module
|
||||
real(c_double), intent(out) :: int2_grad1_u12_ao(n_ao,n_ao,n_grid1,3)
|
||||
real(c_double), intent(out) :: int_2e_ao(n_ao,n_ao,n_ao,n_ao)
|
||||
|
||||
end subroutine cutc_int_c
|
||||
end subroutine cutc_int
|
||||
|
||||
! ---
|
||||
|
||||
@ -170,6 +170,48 @@ module cutc_module
|
||||
|
||||
! ---
|
||||
|
||||
subroutine cutc_no_0e(n_grid1, n_mo, ne_a, ne_b, &
|
||||
wr1, mos_l_in_r, mos_r_in_r, int2_grad1_u12, &
|
||||
no_0e) bind(C, name = "cutc_no_0e")
|
||||
|
||||
import c_int, c_double, c_ptr
|
||||
|
||||
integer(c_int), intent(in), value :: n_grid1
|
||||
integer(c_int), intent(in), value :: n_mo
|
||||
integer(c_int), intent(in), value :: ne_a
|
||||
integer(c_int), intent(in), value :: ne_b
|
||||
real(c_double), intent(in) :: wr1(n_grid1)
|
||||
real(c_double), intent(in) :: mos_l_in_r(n_grid1,n_mo)
|
||||
real(c_double), intent(in) :: mos_r_in_r(n_grid1,n_mo)
|
||||
real(c_double), intent(in) :: int2_grad1_u12(n_grid1,3,n_mo,n_mo)
|
||||
real(c_double), intent(out) :: no_0e(1)
|
||||
|
||||
end subroutine cutc_no_0e
|
||||
|
||||
! ---
|
||||
|
||||
subroutine cutc_no(n_grid1, n_mo, ne_a, ne_b, &
|
||||
wr1, mos_l_in_r, mos_r_in_r, int2_grad1_u12, &
|
||||
no_2e, no_1e, no_0e) bind(C, name = "cutc_no")
|
||||
|
||||
import c_int, c_double, c_ptr
|
||||
|
||||
integer(c_int), intent(in), value :: n_grid1
|
||||
integer(c_int), intent(in), value :: n_mo
|
||||
integer(c_int), intent(in), value :: ne_a
|
||||
integer(c_int), intent(in), value :: ne_b
|
||||
real(c_double), intent(in) :: wr1(n_grid1)
|
||||
real(c_double), intent(in) :: mos_l_in_r(n_grid1,n_mo)
|
||||
real(c_double), intent(in) :: mos_r_in_r(n_grid1,n_mo)
|
||||
real(c_double), intent(in) :: int2_grad1_u12(n_grid1,3,n_mo,n_mo)
|
||||
real(c_double), intent(out) :: no_2e(n_mo,n_mo,n_mo,n_mo)
|
||||
real(c_double), intent(out) :: no_1e(n_mo,n_mo)
|
||||
real(c_double), intent(out) :: no_0e(1)
|
||||
|
||||
end subroutine cutc_no
|
||||
|
||||
! ---
|
||||
|
||||
end interface
|
||||
|
||||
end module cutc_module
|
||||
|
96
plugins/local/tc_int/deb_no_0e_gpu.irp.f
Normal file
96
plugins/local/tc_int/deb_no_0e_gpu.irp.f
Normal file
@ -0,0 +1,96 @@
|
||||
|
||||
! ---
|
||||
|
||||
subroutine deb_no_0e_gpu()
|
||||
|
||||
use cutc_module
|
||||
|
||||
implicit none
|
||||
|
||||
integer :: i, j, k, l, ipoint
|
||||
double precision :: acc_thr, err_tot, nrm_tot, err_loc
|
||||
double precision :: noL_0e
|
||||
double precision :: noL_0e_gpu(1)
|
||||
double precision, allocatable :: int2_grad1_u12_ao(:,:,:,:)
|
||||
double precision, allocatable :: tmp(:,:,:,:)
|
||||
double precision, allocatable :: int2_grad1_u12_bimo_t(:,:,:,:)
|
||||
|
||||
|
||||
PROVIDE mo_l_coef mo_r_coef
|
||||
PROVIDE mos_l_in_r_array_transp mos_r_in_r_array_transp
|
||||
|
||||
|
||||
allocate(int2_grad1_u12_ao(ao_num,ao_num,n_points_final_grid,3))
|
||||
print*, ' Reading int2_grad1_u12_ao from ', trim(ezfio_filename) // '/work/int2_grad1_u12_ao'
|
||||
open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="read")
|
||||
read(11) int2_grad1_u12_ao
|
||||
close(11)
|
||||
|
||||
allocate(tmp(mo_num,mo_num,n_points_final_grid,3))
|
||||
allocate(int2_grad1_u12_bimo_t(n_points_final_grid,3,mo_num,mo_num))
|
||||
|
||||
!$OMP PARALLEL &
|
||||
!$OMP DEFAULT (NONE) &
|
||||
!$OMP PRIVATE (ipoint) &
|
||||
!$OMP SHARED (ao_num, mo_num, n_points_final_grid, int2_grad1_u12_ao, tmp)
|
||||
!$OMP DO SCHEDULE (dynamic)
|
||||
do ipoint = 1, n_points_final_grid
|
||||
call ao_to_mo_bi_ortho(int2_grad1_u12_ao(1,1,ipoint,1), ao_num, tmp(1,1,ipoint,1), mo_num)
|
||||
call ao_to_mo_bi_ortho(int2_grad1_u12_ao(1,1,ipoint,2), ao_num, tmp(1,1,ipoint,2), mo_num)
|
||||
call ao_to_mo_bi_ortho(int2_grad1_u12_ao(1,1,ipoint,3), ao_num, tmp(1,1,ipoint,3), mo_num)
|
||||
enddo
|
||||
!$OMP END DO
|
||||
!$OMP END PARALLEL
|
||||
|
||||
deallocate(int2_grad1_u12_ao)
|
||||
|
||||
!$OMP PARALLEL &
|
||||
!$OMP DEFAULT (NONE) &
|
||||
!$OMP PRIVATE (i, j, ipoint) &
|
||||
!$OMP SHARED (mo_num, n_points_final_grid, tmp, int2_grad1_u12_bimo_t)
|
||||
!$OMP DO COLLAPSE(2) SCHEDULE (dynamic)
|
||||
do ipoint = 1, n_points_final_grid
|
||||
do i = 1, mo_num
|
||||
do j = 1, mo_num
|
||||
int2_grad1_u12_bimo_t(ipoint,1,j,i) = tmp(j,i,ipoint,1)
|
||||
int2_grad1_u12_bimo_t(ipoint,2,j,i) = tmp(j,i,ipoint,2)
|
||||
int2_grad1_u12_bimo_t(ipoint,3,j,i) = tmp(j,i,ipoint,3)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO
|
||||
!$OMP END PARALLEL
|
||||
|
||||
deallocate(tmp)
|
||||
|
||||
! ---
|
||||
|
||||
call cutc_no_0e(n_points_final_grid, mo_num, elec_alpha_num, elec_beta_num, &
|
||||
final_weight_at_r_vector(1), &
|
||||
mos_l_in_r_array_transp(1,1), mos_r_in_r_array_transp(1,1), &
|
||||
int2_grad1_u12_bimo_t(1,1,1,1), noL_0e_gpu(1))
|
||||
|
||||
! ---
|
||||
|
||||
call provide_no_0e(n_points_final_grid, mo_num, elec_alpha_num, elec_beta_num, &
|
||||
final_weight_at_r_vector(1), &
|
||||
mos_l_in_r_array_transp(1,1), mos_r_in_r_array_transp(1,1), &
|
||||
int2_grad1_u12_bimo_t(1,1,1,1), noL_0e)
|
||||
|
||||
! ---
|
||||
|
||||
deallocate(int2_grad1_u12_bimo_t)
|
||||
|
||||
print *, 'noL_0e CPU = ', noL_0e
|
||||
print *, 'noL_0e GPU = ', noL_0e_gpu(1)
|
||||
|
||||
err_tot = dabs(noL_0e - noL_0e_gpu(1))
|
||||
nrm_tot = dabs(noL_0e)
|
||||
print *, ' absolute accuracy on noL_0e (%) =', 100.d0 * err_tot / nrm_tot
|
||||
|
||||
return
|
||||
|
||||
end
|
||||
|
||||
! ---
|
||||
|
170
plugins/local/tc_int/deb_no_gpu.irp.f
Normal file
170
plugins/local/tc_int/deb_no_gpu.irp.f
Normal file
@ -0,0 +1,170 @@
|
||||
|
||||
! ---
|
||||
|
||||
subroutine deb_no_gpu()
|
||||
|
||||
use cutc_module
|
||||
|
||||
implicit none
|
||||
|
||||
integer :: i, j, k, l, ipoint
|
||||
double precision :: acc_thr, err_tot, nrm_tot, err_loc
|
||||
double precision :: noL_0e
|
||||
double precision :: noL_0e_gpu(1)
|
||||
double precision, allocatable :: int2_grad1_u12_ao(:,:,:,:)
|
||||
double precision, allocatable :: tmp(:,:,:,:)
|
||||
double precision, allocatable :: int2_grad1_u12_bimo_t(:,:,:,:)
|
||||
double precision, allocatable :: noL_1e (:,:)
|
||||
double precision, allocatable :: noL_1e_gpu(:,:)
|
||||
double precision, allocatable :: noL_2e (:,:,:,:)
|
||||
double precision, allocatable :: noL_2e_gpu(:,:,:,:)
|
||||
|
||||
|
||||
PROVIDE mo_l_coef mo_r_coef
|
||||
PROVIDE mos_l_in_r_array_transp mos_r_in_r_array_transp
|
||||
|
||||
|
||||
allocate(int2_grad1_u12_ao(ao_num,ao_num,n_points_final_grid,3))
|
||||
print*, ' Reading int2_grad1_u12_ao from ', trim(ezfio_filename) // '/work/int2_grad1_u12_ao'
|
||||
open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="read")
|
||||
read(11) int2_grad1_u12_ao
|
||||
close(11)
|
||||
|
||||
allocate(tmp(mo_num,mo_num,n_points_final_grid,3))
|
||||
allocate(int2_grad1_u12_bimo_t(n_points_final_grid,3,mo_num,mo_num))
|
||||
|
||||
!$OMP PARALLEL &
|
||||
!$OMP DEFAULT (NONE) &
|
||||
!$OMP PRIVATE (ipoint) &
|
||||
!$OMP SHARED (ao_num, mo_num, n_points_final_grid, int2_grad1_u12_ao, tmp)
|
||||
!$OMP DO SCHEDULE (dynamic)
|
||||
do ipoint = 1, n_points_final_grid
|
||||
call ao_to_mo_bi_ortho(int2_grad1_u12_ao(1,1,ipoint,1), ao_num, tmp(1,1,ipoint,1), mo_num)
|
||||
call ao_to_mo_bi_ortho(int2_grad1_u12_ao(1,1,ipoint,2), ao_num, tmp(1,1,ipoint,2), mo_num)
|
||||
call ao_to_mo_bi_ortho(int2_grad1_u12_ao(1,1,ipoint,3), ao_num, tmp(1,1,ipoint,3), mo_num)
|
||||
enddo
|
||||
!$OMP END DO
|
||||
!$OMP END PARALLEL
|
||||
|
||||
deallocate(int2_grad1_u12_ao)
|
||||
|
||||
!$OMP PARALLEL &
|
||||
!$OMP DEFAULT (NONE) &
|
||||
!$OMP PRIVATE (i, j, ipoint) &
|
||||
!$OMP SHARED (mo_num, n_points_final_grid, tmp, int2_grad1_u12_bimo_t)
|
||||
!$OMP DO COLLAPSE(2) SCHEDULE (dynamic)
|
||||
do ipoint = 1, n_points_final_grid
|
||||
do i = 1, mo_num
|
||||
do j = 1, mo_num
|
||||
int2_grad1_u12_bimo_t(ipoint,1,j,i) = tmp(j,i,ipoint,1)
|
||||
int2_grad1_u12_bimo_t(ipoint,2,j,i) = tmp(j,i,ipoint,2)
|
||||
int2_grad1_u12_bimo_t(ipoint,3,j,i) = tmp(j,i,ipoint,3)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO
|
||||
!$OMP END PARALLEL
|
||||
|
||||
deallocate(tmp)
|
||||
|
||||
! ---
|
||||
|
||||
allocate(noL_2e_gpu(mo_num,mo_num,mo_num,mo_num))
|
||||
allocate(noL_1e_gpu(mo_num,mo_num))
|
||||
|
||||
call cutc_no(n_points_final_grid, mo_num, elec_alpha_num, elec_beta_num, &
|
||||
final_weight_at_r_vector(1), &
|
||||
mos_l_in_r_array_transp(1,1), mos_r_in_r_array_transp(1,1), &
|
||||
int2_grad1_u12_bimo_t(1,1,1,1), noL_2e_gpu(1,1,1,1), noL_1e_gpu(1,1), noL_0e_gpu(1))
|
||||
|
||||
! ---
|
||||
|
||||
allocate(noL_2e(mo_num,mo_num,mo_num,mo_num))
|
||||
allocate(noL_1e(mo_num,mo_num))
|
||||
|
||||
call provide_no_2e(n_points_final_grid, mo_num, elec_alpha_num, elec_beta_num, &
|
||||
final_weight_at_r_vector(1), &
|
||||
mos_l_in_r_array_transp(1,1), mos_r_in_r_array_transp(1,1), &
|
||||
int2_grad1_u12_bimo_t(1,1,1,1), noL_2e(1,1,1,1))
|
||||
|
||||
call provide_no_1e(n_points_final_grid, mo_num, elec_alpha_num, elec_beta_num, &
|
||||
final_weight_at_r_vector(1), &
|
||||
mos_l_in_r_array_transp(1,1), mos_r_in_r_array_transp(1,1), &
|
||||
int2_grad1_u12_bimo_t(1,1,1,1), noL_1e(1,1))
|
||||
|
||||
call provide_no_0e(n_points_final_grid, mo_num, elec_alpha_num, elec_beta_num, &
|
||||
final_weight_at_r_vector(1), &
|
||||
mos_l_in_r_array_transp(1,1), mos_r_in_r_array_transp(1,1), &
|
||||
int2_grad1_u12_bimo_t(1,1,1,1), noL_0e)
|
||||
|
||||
! ---
|
||||
|
||||
deallocate(int2_grad1_u12_bimo_t)
|
||||
|
||||
acc_thr = 1d-12
|
||||
|
||||
! ---
|
||||
|
||||
err_tot = 0.d0
|
||||
nrm_tot = 0.d0
|
||||
do i = 1, mo_num
|
||||
do j = 1, mo_num
|
||||
do k = 1, mo_num
|
||||
do l = 1, mo_num
|
||||
err_loc = dabs(noL_2e(l,k,j,i) - noL_2e_gpu(l,k,j,i))
|
||||
if(err_loc > acc_thr) then
|
||||
print*, " error on", l, k, j, i
|
||||
print*, " CPU res", noL_2e (l,k,j,i)
|
||||
print*, " GPU res", noL_2e_gpu(l,k,j,i)
|
||||
stop
|
||||
endif
|
||||
err_tot = err_tot + err_loc
|
||||
nrm_tot = nrm_tot + dabs(noL_2e(l,k,j,i))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
print *, ' absolute accuracy on noL_2e (%) =', 100.d0 * err_tot / nrm_tot
|
||||
|
||||
deallocate(noL_2e)
|
||||
deallocate(noL_2e_gpu)
|
||||
|
||||
! ---
|
||||
|
||||
err_tot = 0.d0
|
||||
nrm_tot = 0.d0
|
||||
do k = 1, mo_num
|
||||
do l = 1, mo_num
|
||||
err_loc = dabs(noL_1e(l,k) - noL_1e_gpu(l,k))
|
||||
if(err_loc > acc_thr) then
|
||||
print*, " error on", l, k
|
||||
print*, " CPU res", noL_1e (l,k)
|
||||
print*, " GPU res", noL_1e_gpu(l,k)
|
||||
stop
|
||||
endif
|
||||
err_tot = err_tot + err_loc
|
||||
nrm_tot = nrm_tot + dabs(noL_1e(l,k))
|
||||
enddo
|
||||
enddo
|
||||
print *, ' absolute accuracy on noL_1e (%) =', 100.d0 * err_tot / nrm_tot
|
||||
|
||||
deallocate(noL_1e)
|
||||
deallocate(noL_1e_gpu)
|
||||
|
||||
! ---
|
||||
|
||||
print *, 'noL_0e CPU = ', noL_0e
|
||||
print *, 'noL_0e GPU = ', noL_0e_gpu(1)
|
||||
|
||||
err_tot = dabs(noL_0e - noL_0e_gpu(1))
|
||||
nrm_tot = dabs(noL_0e)
|
||||
print *, ' absolute accuracy on noL_0e (%) =', 100.d0 * err_tot / nrm_tot
|
||||
|
||||
|
||||
return
|
||||
|
||||
end
|
||||
|
||||
! ---
|
||||
|
||||
|
@ -41,9 +41,13 @@ subroutine main()
|
||||
!call deb_no_2e_gpu_tmp()
|
||||
!call deb_no_2e_gpu()
|
||||
|
||||
call deb_no_1e_gpu_tmp()
|
||||
!call deb_no_1e_gpu_tmp()
|
||||
!call deb_no_1e_gpu()
|
||||
|
||||
!call deb_no_0e_gpu()
|
||||
|
||||
call deb_no_gpu()
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
|
@ -3,12 +3,6 @@
|
||||
|
||||
subroutine provide_no_0e(n_grid, n_mo, ne_a, ne_b, wr1, mos_l_in_r, mos_r_in_r, int2_grad1_u12, noL_0e)
|
||||
|
||||
BEGIN_DOC
|
||||
!
|
||||
! < Phi_left | L | Phi_right >
|
||||
!
|
||||
END_DOC
|
||||
|
||||
implicit none
|
||||
|
||||
integer, intent(in) :: n_grid, n_mo
|
||||
@ -22,44 +16,47 @@ subroutine provide_no_0e(n_grid, n_mo, ne_a, ne_b, wr1, mos_l_in_r, mos_r_in_r,
|
||||
integer :: i, j, k, ipoint
|
||||
double precision :: t0, t1
|
||||
double precision, allocatable :: tmp(:)
|
||||
double precision, allocatable :: tmp_L(:,:), tmp_R(:,:)
|
||||
double precision, allocatable :: tmp_M(:,:), tmp_S(:), tmp_O(:), tmp_J(:,:)
|
||||
double precision, allocatable :: tmp_M_priv(:,:), tmp_S_priv(:), tmp_O_priv(:), tmp_J_priv(:,:)
|
||||
double precision, allocatable :: tmpL(:,:), tmpR(:,:)
|
||||
double precision, allocatable :: tmpM(:,:), tmpS(:), tmpO(:), tmpJ(:,:)
|
||||
double precision, allocatable :: tmpM_priv(:,:), tmpS_priv(:), tmpO_priv(:), tmpJ_priv(:,:)
|
||||
|
||||
|
||||
call wall_time(t0)
|
||||
|
||||
|
||||
if(ne_a .eq. ne_b) then
|
||||
|
||||
allocate(tmp(ne_b))
|
||||
allocate(tmp_L(n_grid,3), tmp_R(n_grid,3))
|
||||
allocate(tmpL(n_grid,3), tmpR(n_grid,3))
|
||||
|
||||
!$OMP PARALLEL &
|
||||
!$OMP DEFAULT(NONE) &
|
||||
!$OMP PRIVATE(j, i, ipoint, tmp_L, tmp_R) &
|
||||
!$OMP SHARED(ne_b, n_grid, &
|
||||
!$OMP PRIVATE(j, i, ipoint, tmpL, tmpR) &
|
||||
!$OMP SHARED(ne_b, n_grid, &
|
||||
!$OMP mos_l_in_r, mos_r_in_r, wr1, &
|
||||
!$OMP int2_grad1_u12, tmp)
|
||||
|
||||
!$OMP DO
|
||||
do j = 1, ne_b
|
||||
|
||||
tmp_L = 0.d0
|
||||
tmp_R = 0.d0
|
||||
tmpL = 0.d0
|
||||
tmpR = 0.d0
|
||||
do i = 1, ne_b
|
||||
do ipoint = 1, n_grid
|
||||
|
||||
tmp_L(ipoint,1) = tmp_L(ipoint,1) + int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i)
|
||||
tmp_L(ipoint,2) = tmp_L(ipoint,2) + int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i)
|
||||
tmp_L(ipoint,3) = tmp_L(ipoint,3) + int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i)
|
||||
tmpL(ipoint,1) = tmpL(ipoint,1) + int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i)
|
||||
tmpL(ipoint,2) = tmpL(ipoint,2) + int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i)
|
||||
tmpL(ipoint,3) = tmpL(ipoint,3) + int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i)
|
||||
|
||||
tmp_R(ipoint,1) = tmp_R(ipoint,1) + int2_grad1_u12(ipoint,1,i,j) * mos_r_in_r(ipoint,i)
|
||||
tmp_R(ipoint,2) = tmp_R(ipoint,2) + int2_grad1_u12(ipoint,2,i,j) * mos_r_in_r(ipoint,i)
|
||||
tmp_R(ipoint,3) = tmp_R(ipoint,3) + int2_grad1_u12(ipoint,3,i,j) * mos_r_in_r(ipoint,i)
|
||||
tmpR(ipoint,1) = tmpR(ipoint,1) + int2_grad1_u12(ipoint,1,i,j) * mos_r_in_r(ipoint,i)
|
||||
tmpR(ipoint,2) = tmpR(ipoint,2) + int2_grad1_u12(ipoint,2,i,j) * mos_r_in_r(ipoint,i)
|
||||
tmpR(ipoint,3) = tmpR(ipoint,3) + int2_grad1_u12(ipoint,3,i,j) * mos_r_in_r(ipoint,i)
|
||||
enddo
|
||||
enddo
|
||||
|
||||
tmp(j) = 0.d0
|
||||
do ipoint = 1, n_grid
|
||||
tmp(j) = tmp(j) + wr1(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3))
|
||||
tmp(j) = tmp(j) + wr1(ipoint) * (tmpL(ipoint,1)*tmpR(ipoint,1) + tmpL(ipoint,2)*tmpR(ipoint,2) + tmpL(ipoint,3)*tmpR(ipoint,3))
|
||||
enddo
|
||||
enddo ! j
|
||||
!$OMP END DO
|
||||
@ -68,150 +65,149 @@ subroutine provide_no_0e(n_grid, n_mo, ne_a, ne_b, wr1, mos_l_in_r, mos_r_in_r,
|
||||
noL_0e = -2.d0 * sum(tmp)
|
||||
|
||||
deallocate(tmp)
|
||||
deallocate(tmp_L, tmp_R)
|
||||
deallocate(tmpL, tmpR)
|
||||
|
||||
! ---
|
||||
|
||||
allocate(tmp_O(n_grid), tmp_J(n_grid,3))
|
||||
tmp_O = 0.d0
|
||||
tmp_J = 0.d0
|
||||
allocate(tmpO(n_grid), tmpJ(n_grid,3))
|
||||
tmpO = 0.d0
|
||||
tmpJ = 0.d0
|
||||
|
||||
!$OMP PARALLEL &
|
||||
!$OMP DEFAULT(NONE) &
|
||||
!$OMP PRIVATE(i, ipoint, tmp_O_priv, tmp_J_priv) &
|
||||
!$OMP SHARED(ne_b, n_grid, &
|
||||
!$OMP mos_l_in_r, mos_r_in_r, &
|
||||
!$OMP int2_grad1_u12, tmp_O, tmp_J)
|
||||
!$OMP PARALLEL &
|
||||
!$OMP DEFAULT(NONE) &
|
||||
!$OMP PRIVATE(i, ipoint, tmpO_priv, tmpJ_priv) &
|
||||
!$OMP SHARED(ne_b, n_grid, &
|
||||
!$OMP mos_l_in_r, mos_r_in_r, &
|
||||
!$OMP int2_grad1_u12, tmpO, tmpJ)
|
||||
|
||||
allocate(tmp_O_priv(n_grid), tmp_J_priv(n_grid,3))
|
||||
tmp_O_priv = 0.d0
|
||||
tmp_J_priv = 0.d0
|
||||
allocate(tmpO_priv(n_grid), tmpJ_priv(n_grid,3))
|
||||
tmpO_priv = 0.d0
|
||||
tmpJ_priv = 0.d0
|
||||
|
||||
!$OMP DO
|
||||
do i = 1, ne_b
|
||||
do ipoint = 1, n_grid
|
||||
tmp_O_priv(ipoint) = tmp_O_priv(ipoint) + mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,i)
|
||||
tmp_J_priv(ipoint,1) = tmp_J_priv(ipoint,1) + int2_grad1_u12(ipoint,1,i,i)
|
||||
tmp_J_priv(ipoint,2) = tmp_J_priv(ipoint,2) + int2_grad1_u12(ipoint,2,i,i)
|
||||
tmp_J_priv(ipoint,3) = tmp_J_priv(ipoint,3) + int2_grad1_u12(ipoint,3,i,i)
|
||||
tmpO_priv(ipoint) = tmpO_priv(ipoint) + mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,i)
|
||||
tmpJ_priv(ipoint,1) = tmpJ_priv(ipoint,1) + int2_grad1_u12(ipoint,1,i,i)
|
||||
tmpJ_priv(ipoint,2) = tmpJ_priv(ipoint,2) + int2_grad1_u12(ipoint,2,i,i)
|
||||
tmpJ_priv(ipoint,3) = tmpJ_priv(ipoint,3) + int2_grad1_u12(ipoint,3,i,i)
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO NOWAIT
|
||||
|
||||
!$OMP CRITICAL
|
||||
tmp_O = tmp_O + tmp_O_priv
|
||||
tmp_J = tmp_J + tmp_J_priv
|
||||
tmpO = tmpO + tmpO_priv
|
||||
tmpJ = tmpJ + tmpJ_priv
|
||||
!$OMP END CRITICAL
|
||||
|
||||
deallocate(tmp_O_priv, tmp_J_priv)
|
||||
deallocate(tmpO_priv, tmpJ_priv)
|
||||
!$OMP END PARALLEL
|
||||
|
||||
allocate(tmp_M(n_grid,3), tmp_S(n_grid))
|
||||
tmp_M = 0.d0
|
||||
tmp_S = 0.d0
|
||||
allocate(tmpM(n_grid,3), tmpS(n_grid))
|
||||
tmpM = 0.d0
|
||||
tmpS = 0.d0
|
||||
|
||||
!$OMP PARALLEL &
|
||||
!$OMP DEFAULT(NONE) &
|
||||
!$OMP PRIVATE(i, j, ipoint, tmp_M_priv, tmp_S_priv) &
|
||||
!$OMP SHARED(ne_b, n_grid, &
|
||||
!$OMP mos_l_in_r, mos_r_in_r, &
|
||||
!$OMP int2_grad1_u12, tmp_M, tmp_S)
|
||||
!$OMP PARALLEL &
|
||||
!$OMP DEFAULT(NONE) &
|
||||
!$OMP PRIVATE(i, j, ipoint, tmpM_priv, tmpS_priv) &
|
||||
!$OMP SHARED(ne_b, n_grid, &
|
||||
!$OMP mos_l_in_r, mos_r_in_r, &
|
||||
!$OMP int2_grad1_u12, tmpM, tmpS)
|
||||
|
||||
allocate(tmp_M_priv(n_grid,3), tmp_S_priv(n_grid))
|
||||
tmp_M_priv = 0.d0
|
||||
tmp_S_priv = 0.d0
|
||||
allocate(tmpM_priv(n_grid,3), tmpS_priv(n_grid))
|
||||
tmpM_priv = 0.d0
|
||||
tmpS_priv = 0.d0
|
||||
|
||||
!$OMP DO COLLAPSE(2)
|
||||
do i = 1, ne_b
|
||||
do j = 1, ne_b
|
||||
do ipoint = 1, n_grid
|
||||
|
||||
tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
|
||||
tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
|
||||
tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
|
||||
tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
|
||||
tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
|
||||
tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
|
||||
|
||||
tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,i) &
|
||||
+ int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,i) &
|
||||
+ int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,i)
|
||||
tmpS_priv(ipoint) = tmpS_priv(ipoint) + int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,i) &
|
||||
+ int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,i) &
|
||||
+ int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,i)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO NOWAIT
|
||||
|
||||
!$OMP CRITICAL
|
||||
tmp_M = tmp_M + tmp_M_priv
|
||||
tmp_S = tmp_S + tmp_S_priv
|
||||
tmpM = tmpM + tmpM_priv
|
||||
tmpS = tmpS + tmpS_priv
|
||||
!$OMP END CRITICAL
|
||||
|
||||
deallocate(tmp_M_priv, tmp_S_priv)
|
||||
deallocate(tmpM_priv, tmpS_priv)
|
||||
!$OMP END PARALLEL
|
||||
|
||||
allocate(tmp(n_grid))
|
||||
|
||||
do ipoint = 1, n_grid
|
||||
|
||||
tmp_S(ipoint) = 2.d0 * (tmp_J(ipoint,1)*tmp_J(ipoint,1) + tmp_J(ipoint,2)*tmp_J(ipoint,2) + tmp_J(ipoint,3)*tmp_J(ipoint,3)) - tmp_S(ipoint)
|
||||
tmpS(ipoint) = 2.d0 * (tmpJ(ipoint,1)*tmpJ(ipoint,1) + tmpJ(ipoint,2)*tmpJ(ipoint,2) + tmpJ(ipoint,3)*tmpJ(ipoint,3)) - tmpS(ipoint)
|
||||
|
||||
tmp(ipoint) = wr1(ipoint) * ( tmp_O(ipoint) * tmp_S(ipoint) &
|
||||
- 2.d0 * ( tmp_J(ipoint,1) * tmp_M(ipoint,1) &
|
||||
+ tmp_J(ipoint,2) * tmp_M(ipoint,2) &
|
||||
+ tmp_J(ipoint,3) * tmp_M(ipoint,3)))
|
||||
tmp(ipoint) = wr1(ipoint) * ( tmpO(ipoint) * tmpS(ipoint) - 2.d0 * ( tmpJ(ipoint,1) * tmpM(ipoint,1) &
|
||||
+ tmpJ(ipoint,2) * tmpM(ipoint,2) &
|
||||
+ tmpJ(ipoint,3) * tmpM(ipoint,3) ) )
|
||||
enddo
|
||||
|
||||
noL_0e = noL_0e -2.d0 * (sum(tmp))
|
||||
noL_0e = noL_0e - 2.d0 * (sum(tmp))
|
||||
|
||||
deallocate(tmp)
|
||||
|
||||
else
|
||||
|
||||
allocate(tmp(ne_a))
|
||||
allocate(tmp_L(n_grid,3), tmp_R(n_grid,3))
|
||||
allocate(tmpL(n_grid,3), tmpR(n_grid,3))
|
||||
|
||||
!$OMP PARALLEL &
|
||||
!$OMP DEFAULT(NONE) &
|
||||
!$OMP PRIVATE(j, i, ipoint, tmp_L, tmp_R) &
|
||||
!$OMP SHARED(ne_b, ne_a, n_grid, &
|
||||
!$OMP mos_l_in_r, mos_r_in_r, &
|
||||
!$OMP PARALLEL &
|
||||
!$OMP DEFAULT(NONE) &
|
||||
!$OMP PRIVATE(j, i, ipoint, tmpL, tmpR) &
|
||||
!$OMP SHARED(ne_b, ne_a, n_grid, &
|
||||
!$OMP mos_l_in_r, mos_r_in_r, &
|
||||
!$OMP int2_grad1_u12, tmp, wr1)
|
||||
|
||||
!$OMP DO
|
||||
do j = 1, ne_b
|
||||
|
||||
tmp_L = 0.d0
|
||||
tmp_R = 0.d0
|
||||
tmpL = 0.d0
|
||||
tmpR = 0.d0
|
||||
do i = ne_b+1, ne_a
|
||||
do ipoint = 1, n_grid
|
||||
|
||||
tmp_L(ipoint,1) = tmp_L(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i)
|
||||
tmp_L(ipoint,2) = tmp_L(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i)
|
||||
tmp_L(ipoint,3) = tmp_L(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i)
|
||||
tmpL(ipoint,1) = tmpL(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i)
|
||||
tmpL(ipoint,2) = tmpL(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i)
|
||||
tmpL(ipoint,3) = tmpL(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i)
|
||||
|
||||
tmp_R(ipoint,1) = tmp_R(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,i,j) * mos_r_in_r(ipoint,i)
|
||||
tmp_R(ipoint,2) = tmp_R(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,i,j) * mos_r_in_r(ipoint,i)
|
||||
tmp_R(ipoint,3) = tmp_R(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,i,j) * mos_r_in_r(ipoint,i)
|
||||
tmpR(ipoint,1) = tmpR(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,i,j) * mos_r_in_r(ipoint,i)
|
||||
tmpR(ipoint,2) = tmpR(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,i,j) * mos_r_in_r(ipoint,i)
|
||||
tmpR(ipoint,3) = tmpR(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,i,j) * mos_r_in_r(ipoint,i)
|
||||
enddo
|
||||
enddo
|
||||
|
||||
tmp(j) = 0.d0
|
||||
do ipoint = 1, n_grid
|
||||
tmp(j) = tmp(j) + wr1(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3))
|
||||
tmp(j) = tmp(j) + wr1(ipoint) * (tmpL(ipoint,1)*tmpR(ipoint,1) + tmpL(ipoint,2)*tmpR(ipoint,2) + tmpL(ipoint,3)*tmpR(ipoint,3))
|
||||
enddo
|
||||
|
||||
do i = 1, ne_b
|
||||
do ipoint = 1, n_grid
|
||||
|
||||
tmp_L(ipoint,1) = tmp_L(ipoint,1) + int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i)
|
||||
tmp_L(ipoint,2) = tmp_L(ipoint,2) + int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i)
|
||||
tmp_L(ipoint,3) = tmp_L(ipoint,3) + int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i)
|
||||
tmpL(ipoint,1) = tmpL(ipoint,1) + int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i)
|
||||
tmpL(ipoint,2) = tmpL(ipoint,2) + int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i)
|
||||
tmpL(ipoint,3) = tmpL(ipoint,3) + int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i)
|
||||
|
||||
tmp_R(ipoint,1) = tmp_R(ipoint,1) + int2_grad1_u12(ipoint,1,i,j) * mos_r_in_r(ipoint,i)
|
||||
tmp_R(ipoint,2) = tmp_R(ipoint,2) + int2_grad1_u12(ipoint,2,i,j) * mos_r_in_r(ipoint,i)
|
||||
tmp_R(ipoint,3) = tmp_R(ipoint,3) + int2_grad1_u12(ipoint,3,i,j) * mos_r_in_r(ipoint,i)
|
||||
tmpR(ipoint,1) = tmpR(ipoint,1) + int2_grad1_u12(ipoint,1,i,j) * mos_r_in_r(ipoint,i)
|
||||
tmpR(ipoint,2) = tmpR(ipoint,2) + int2_grad1_u12(ipoint,2,i,j) * mos_r_in_r(ipoint,i)
|
||||
tmpR(ipoint,3) = tmpR(ipoint,3) + int2_grad1_u12(ipoint,3,i,j) * mos_r_in_r(ipoint,i)
|
||||
enddo
|
||||
enddo
|
||||
|
||||
do ipoint = 1, n_grid
|
||||
tmp(j) = tmp(j) + wr1(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3))
|
||||
tmp(j) = tmp(j) + wr1(ipoint) * (tmpL(ipoint,1)*tmpR(ipoint,1) + tmpL(ipoint,2)*tmpR(ipoint,2) + tmpL(ipoint,3)*tmpR(ipoint,3))
|
||||
enddo
|
||||
enddo ! j
|
||||
!$OMP END DO
|
||||
@ -219,33 +215,33 @@ subroutine provide_no_0e(n_grid, n_mo, ne_a, ne_b, wr1, mos_l_in_r, mos_r_in_r,
|
||||
|
||||
! ---
|
||||
|
||||
!$OMP PARALLEL &
|
||||
!$OMP DEFAULT(NONE) &
|
||||
!$OMP PRIVATE(j, i, ipoint, tmp_L, tmp_R) &
|
||||
!$OMP SHARED(ne_b, ne_a, n_grid, &
|
||||
!$OMP mos_l_in_r, mos_r_in_r, &
|
||||
!$OMP PARALLEL &
|
||||
!$OMP DEFAULT(NONE) &
|
||||
!$OMP PRIVATE(j, i, ipoint, tmpL, tmpR) &
|
||||
!$OMP SHARED(ne_b, ne_a, n_grid, &
|
||||
!$OMP mos_l_in_r, mos_r_in_r, &
|
||||
!$OMP int2_grad1_u12, tmp, wr1)
|
||||
|
||||
!$OMP DO
|
||||
do j = ne_b+1, ne_a
|
||||
|
||||
tmp_L = 0.d0
|
||||
tmp_R = 0.d0
|
||||
tmpL = 0.d0
|
||||
tmpR = 0.d0
|
||||
do i = 1, ne_a
|
||||
do ipoint = 1, n_grid
|
||||
tmp_L(ipoint,1) = tmp_L(ipoint,1) + int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i)
|
||||
tmp_L(ipoint,2) = tmp_L(ipoint,2) + int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i)
|
||||
tmp_L(ipoint,3) = tmp_L(ipoint,3) + int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i)
|
||||
tmpL(ipoint,1) = tmpL(ipoint,1) + int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i)
|
||||
tmpL(ipoint,2) = tmpL(ipoint,2) + int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i)
|
||||
tmpL(ipoint,3) = tmpL(ipoint,3) + int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i)
|
||||
|
||||
tmp_R(ipoint,1) = tmp_R(ipoint,1) + int2_grad1_u12(ipoint,1,i,j) * mos_r_in_r(ipoint,i)
|
||||
tmp_R(ipoint,2) = tmp_R(ipoint,2) + int2_grad1_u12(ipoint,2,i,j) * mos_r_in_r(ipoint,i)
|
||||
tmp_R(ipoint,3) = tmp_R(ipoint,3) + int2_grad1_u12(ipoint,3,i,j) * mos_r_in_r(ipoint,i)
|
||||
tmpR(ipoint,1) = tmpR(ipoint,1) + int2_grad1_u12(ipoint,1,i,j) * mos_r_in_r(ipoint,i)
|
||||
tmpR(ipoint,2) = tmpR(ipoint,2) + int2_grad1_u12(ipoint,2,i,j) * mos_r_in_r(ipoint,i)
|
||||
tmpR(ipoint,3) = tmpR(ipoint,3) + int2_grad1_u12(ipoint,3,i,j) * mos_r_in_r(ipoint,i)
|
||||
enddo
|
||||
enddo
|
||||
|
||||
tmp(j) = 0.d0
|
||||
do ipoint = 1, n_grid
|
||||
tmp(j) = tmp(j) + 0.5d0 * wr1(ipoint) * (tmp_L(ipoint,1)*tmp_R(ipoint,1) + tmp_L(ipoint,2)*tmp_R(ipoint,2) + tmp_L(ipoint,3)*tmp_R(ipoint,3))
|
||||
tmp(j) = tmp(j) + 0.5d0 * wr1(ipoint) * (tmpL(ipoint,1)*tmpR(ipoint,1) + tmpL(ipoint,2)*tmpR(ipoint,2) + tmpL(ipoint,3)*tmpR(ipoint,3))
|
||||
enddo
|
||||
enddo ! j
|
||||
!$OMP END DO
|
||||
@ -254,32 +250,32 @@ subroutine provide_no_0e(n_grid, n_mo, ne_a, ne_b, wr1, mos_l_in_r, mos_r_in_r,
|
||||
noL_0e = -2.d0 * sum(tmp)
|
||||
|
||||
deallocate(tmp)
|
||||
deallocate(tmp_L, tmp_R)
|
||||
deallocate(tmpL, tmpR)
|
||||
|
||||
! ---
|
||||
|
||||
allocate(tmp_O(n_grid), tmp_J(n_grid,3))
|
||||
tmp_O = 0.d0
|
||||
tmp_J = 0.d0
|
||||
allocate(tmpO(n_grid), tmpJ(n_grid,3))
|
||||
tmpO = 0.d0
|
||||
tmpJ = 0.d0
|
||||
|
||||
!$OMP PARALLEL &
|
||||
!$OMP DEFAULT(NONE) &
|
||||
!$OMP PRIVATE(i, ipoint, tmp_O_priv, tmp_J_priv) &
|
||||
!$OMP SHARED(ne_b, ne_a, n_grid, &
|
||||
!$OMP mos_l_in_r, mos_r_in_r, &
|
||||
!$OMP int2_grad1_u12, tmp_O, tmp_J)
|
||||
!$OMP PARALLEL &
|
||||
!$OMP DEFAULT(NONE) &
|
||||
!$OMP PRIVATE(i, ipoint, tmpO_priv, tmpJ_priv) &
|
||||
!$OMP SHARED(ne_b, ne_a, n_grid, &
|
||||
!$OMP mos_l_in_r, mos_r_in_r, &
|
||||
!$OMP int2_grad1_u12, tmpO, tmpJ)
|
||||
|
||||
allocate(tmp_O_priv(n_grid), tmp_J_priv(n_grid,3))
|
||||
tmp_O_priv = 0.d0
|
||||
tmp_J_priv = 0.d0
|
||||
allocate(tmpO_priv(n_grid), tmpJ_priv(n_grid,3))
|
||||
tmpO_priv = 0.d0
|
||||
tmpJ_priv = 0.d0
|
||||
|
||||
!$OMP DO
|
||||
do i = 1, ne_b
|
||||
do ipoint = 1, n_grid
|
||||
tmp_O_priv(ipoint) = tmp_O_priv(ipoint) + mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,i)
|
||||
tmp_J_priv(ipoint,1) = tmp_J_priv(ipoint,1) + int2_grad1_u12(ipoint,1,i,i)
|
||||
tmp_J_priv(ipoint,2) = tmp_J_priv(ipoint,2) + int2_grad1_u12(ipoint,2,i,i)
|
||||
tmp_J_priv(ipoint,3) = tmp_J_priv(ipoint,3) + int2_grad1_u12(ipoint,3,i,i)
|
||||
tmpO_priv(ipoint) = tmpO_priv(ipoint) + mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,i)
|
||||
tmpJ_priv(ipoint,1) = tmpJ_priv(ipoint,1) + int2_grad1_u12(ipoint,1,i,i)
|
||||
tmpJ_priv(ipoint,2) = tmpJ_priv(ipoint,2) + int2_grad1_u12(ipoint,2,i,i)
|
||||
tmpJ_priv(ipoint,3) = tmpJ_priv(ipoint,3) + int2_grad1_u12(ipoint,3,i,i)
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO NOWAIT
|
||||
@ -287,49 +283,49 @@ subroutine provide_no_0e(n_grid, n_mo, ne_a, ne_b, wr1, mos_l_in_r, mos_r_in_r,
|
||||
!$OMP DO
|
||||
do i = ne_b+1, ne_a
|
||||
do ipoint = 1, n_grid
|
||||
tmp_O_priv(ipoint) = tmp_O_priv(ipoint) + 0.5d0 * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,i)
|
||||
tmp_J_priv(ipoint,1) = tmp_J_priv(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,i,i)
|
||||
tmp_J_priv(ipoint,2) = tmp_J_priv(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,i,i)
|
||||
tmp_J_priv(ipoint,3) = tmp_J_priv(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,i,i)
|
||||
tmpO_priv(ipoint) = tmpO_priv(ipoint) + 0.5d0 * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,i)
|
||||
tmpJ_priv(ipoint,1) = tmpJ_priv(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,i,i)
|
||||
tmpJ_priv(ipoint,2) = tmpJ_priv(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,i,i)
|
||||
tmpJ_priv(ipoint,3) = tmpJ_priv(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,i,i)
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO NOWAIT
|
||||
|
||||
!$OMP CRITICAL
|
||||
tmp_O = tmp_O + tmp_O_priv
|
||||
tmp_J = tmp_J + tmp_J_priv
|
||||
tmpO = tmpO + tmpO_priv
|
||||
tmpJ = tmpJ + tmpJ_priv
|
||||
!$OMP END CRITICAL
|
||||
|
||||
deallocate(tmp_O_priv, tmp_J_priv)
|
||||
deallocate(tmpO_priv, tmpJ_priv)
|
||||
!$OMP END PARALLEL
|
||||
|
||||
! ---
|
||||
|
||||
allocate(tmp_M(n_grid,3), tmp_S(n_grid))
|
||||
tmp_M = 0.d0
|
||||
tmp_S = 0.d0
|
||||
allocate(tmpM(n_grid,3), tmpS(n_grid))
|
||||
tmpM = 0.d0
|
||||
tmpS = 0.d0
|
||||
|
||||
!$OMP PARALLEL &
|
||||
!$OMP DEFAULT(NONE) &
|
||||
!$OMP PRIVATE(i, j, ipoint, tmp_M_priv, tmp_S_priv) &
|
||||
!$OMP SHARED(ne_b, ne_a, n_grid, &
|
||||
!$OMP mos_l_in_r, mos_r_in_r, &
|
||||
!$OMP int2_grad1_u12, tmp_M, tmp_S)
|
||||
!$OMP PARALLEL &
|
||||
!$OMP DEFAULT(NONE) &
|
||||
!$OMP PRIVATE(i, j, ipoint, tmpM_priv, tmpS_priv) &
|
||||
!$OMP SHARED(ne_b, ne_a, n_grid, &
|
||||
!$OMP mos_l_in_r, mos_r_in_r, &
|
||||
!$OMP int2_grad1_u12, tmpM, tmpS)
|
||||
|
||||
allocate(tmp_M_priv(n_grid,3), tmp_S_priv(n_grid))
|
||||
tmp_M_priv = 0.d0
|
||||
tmp_S_priv = 0.d0
|
||||
allocate(tmpM_priv(n_grid,3), tmpS_priv(n_grid))
|
||||
tmpM_priv = 0.d0
|
||||
tmpS_priv = 0.d0
|
||||
|
||||
!$OMP DO COLLAPSE(2)
|
||||
do i = 1, ne_b
|
||||
do j = 1, ne_b
|
||||
do ipoint = 1, n_grid
|
||||
|
||||
tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
|
||||
tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
|
||||
tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
|
||||
tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
|
||||
tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
|
||||
tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
|
||||
|
||||
tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,i) &
|
||||
tmpS_priv(ipoint) = tmpS_priv(ipoint) + int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,i) &
|
||||
+ int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,i) &
|
||||
+ int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,i)
|
||||
enddo
|
||||
@ -342,17 +338,17 @@ subroutine provide_no_0e(n_grid, n_mo, ne_a, ne_b, wr1, mos_l_in_r, mos_r_in_r,
|
||||
do j = 1, ne_b
|
||||
do ipoint = 1, n_grid
|
||||
|
||||
tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
|
||||
tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
|
||||
tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
|
||||
tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
|
||||
tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
|
||||
tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
|
||||
|
||||
tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,i,j) * mos_l_in_r(ipoint,j) * mos_r_in_r(ipoint,i)
|
||||
tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,i,j) * mos_l_in_r(ipoint,j) * mos_r_in_r(ipoint,i)
|
||||
tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,i,j) * mos_l_in_r(ipoint,j) * mos_r_in_r(ipoint,i)
|
||||
tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,i,j) * mos_l_in_r(ipoint,j) * mos_r_in_r(ipoint,i)
|
||||
tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,i,j) * mos_l_in_r(ipoint,j) * mos_r_in_r(ipoint,i)
|
||||
tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,i,j) * mos_l_in_r(ipoint,j) * mos_r_in_r(ipoint,i)
|
||||
|
||||
tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,i) &
|
||||
+ int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,i) &
|
||||
+ int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,i)
|
||||
tmpS_priv(ipoint) = tmpS_priv(ipoint) + int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,i) &
|
||||
+ int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,i) &
|
||||
+ int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,i)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
@ -363,39 +359,38 @@ subroutine provide_no_0e(n_grid, n_mo, ne_a, ne_b, wr1, mos_l_in_r, mos_r_in_r,
|
||||
do j = ne_b+1, ne_a
|
||||
do ipoint = 1, n_grid
|
||||
|
||||
tmp_M_priv(ipoint,1) = tmp_M_priv(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
|
||||
tmp_M_priv(ipoint,2) = tmp_M_priv(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
|
||||
tmp_M_priv(ipoint,3) = tmp_M_priv(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
|
||||
tmpM_priv(ipoint,1) = tmpM_priv(ipoint,1) + 0.5d0 * int2_grad1_u12(ipoint,1,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
|
||||
tmpM_priv(ipoint,2) = tmpM_priv(ipoint,2) + 0.5d0 * int2_grad1_u12(ipoint,2,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
|
||||
tmpM_priv(ipoint,3) = tmpM_priv(ipoint,3) + 0.5d0 * int2_grad1_u12(ipoint,3,j,i) * mos_l_in_r(ipoint,i) * mos_r_in_r(ipoint,j)
|
||||
|
||||
tmp_S_priv(ipoint) = tmp_S_priv(ipoint) + 0.5d0 * int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,i) &
|
||||
+ 0.5d0 * int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,i) &
|
||||
+ 0.5d0 * int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,i)
|
||||
tmpS_priv(ipoint) = tmpS_priv(ipoint) + 0.5d0 * int2_grad1_u12(ipoint,1,i,j) * int2_grad1_u12(ipoint,1,j,i) &
|
||||
+ 0.5d0 * int2_grad1_u12(ipoint,2,i,j) * int2_grad1_u12(ipoint,2,j,i) &
|
||||
+ 0.5d0 * int2_grad1_u12(ipoint,3,i,j) * int2_grad1_u12(ipoint,3,j,i)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$OMP END DO NOWAIT
|
||||
|
||||
!$OMP CRITICAL
|
||||
tmp_M = tmp_M + tmp_M_priv
|
||||
tmp_S = tmp_S + tmp_S_priv
|
||||
tmpM = tmpM + tmpM_priv
|
||||
tmpS = tmpS + tmpS_priv
|
||||
!$OMP END CRITICAL
|
||||
|
||||
deallocate(tmp_M_priv, tmp_S_priv)
|
||||
deallocate(tmpM_priv, tmpS_priv)
|
||||
!$OMP END PARALLEL
|
||||
|
||||
allocate(tmp(n_grid))
|
||||
|
||||
do ipoint = 1, n_grid
|
||||
|
||||
tmp_S(ipoint) = 2.d0 * (tmp_J(ipoint,1)*tmp_J(ipoint,1) + tmp_J(ipoint,2)*tmp_J(ipoint,2) + tmp_J(ipoint,3)*tmp_J(ipoint,3)) - tmp_S(ipoint)
|
||||
tmpS(ipoint) = 2.d0 * (tmpJ(ipoint,1)*tmpJ(ipoint,1) + tmpJ(ipoint,2)*tmpJ(ipoint,2) + tmpJ(ipoint,3)*tmpJ(ipoint,3)) - tmpS(ipoint)
|
||||
|
||||
tmp(ipoint) = wr1(ipoint) * ( tmp_O(ipoint) * tmp_S(ipoint) &
|
||||
- 2.d0 * ( tmp_J(ipoint,1) * tmp_M(ipoint,1) &
|
||||
+ tmp_J(ipoint,2) * tmp_M(ipoint,2) &
|
||||
+ tmp_J(ipoint,3) * tmp_M(ipoint,3)))
|
||||
tmp(ipoint) = wr1(ipoint) * ( tmpO(ipoint) * tmpS(ipoint) - 2.d0 * ( tmpJ(ipoint,1) * tmpM(ipoint,1) &
|
||||
+ tmpJ(ipoint,2) * tmpM(ipoint,2) &
|
||||
+ tmpJ(ipoint,3) * tmpM(ipoint,3) ) )
|
||||
enddo
|
||||
|
||||
noL_0e = noL_0e -2.d0 * (sum(tmp))
|
||||
noL_0e = noL_0e - 2.d0 * (sum(tmp))
|
||||
|
||||
deallocate(tmp)
|
||||
|
||||
|
@ -120,12 +120,12 @@ subroutine do_work_on_gpu()
|
||||
call wall_time(cuda_time0)
|
||||
print*, ' start CUDA kernel'
|
||||
|
||||
call cutc_int_c(nxBlocks, nyBlocks, nzBlocks, blockxSize, blockySize, blockzSize, &
|
||||
n_points_final_grid, n_points_extra_final_grid, ao_num, nucl_num, jBH_size, &
|
||||
final_grid_points, final_weight_at_r_vector, &
|
||||
final_grid_points_extra, final_weight_at_r_vector_extra, &
|
||||
rn, aos_data1, aos_data2, jBH_c, jBH_m, jBH_n, jBH_o, &
|
||||
int2_grad1_u12_ao, int_2e_ao)
|
||||
call cutc_int(nxBlocks, nyBlocks, nzBlocks, blockxSize, blockySize, blockzSize, &
|
||||
n_points_final_grid, n_points_extra_final_grid, ao_num, nucl_num, jBH_size, &
|
||||
final_grid_points, final_weight_at_r_vector, &
|
||||
final_grid_points_extra, final_weight_at_r_vector_extra, &
|
||||
rn, aos_data1, aos_data2, jBH_c, jBH_m, jBH_n, jBH_o, &
|
||||
int2_grad1_u12_ao, int_2e_ao)
|
||||
|
||||
call wall_time(cuda_time1)
|
||||
print*, ' wall time for CUDA kernel (min) = ', (cuda_time1-cuda_time0) / 60.d0
|
||||
|
Loading…
Reference in New Issue
Block a user