10
0
mirror of https://github.com/QuantumPackage/qp2.git synced 2024-12-22 20:34:58 +01:00

DGEMM added and tested for Be(VDZ)

This commit is contained in:
Abdallah Ammar 2023-04-21 22:22:25 +02:00
parent 333d8a551c
commit 195c7402cf
4 changed files with 97 additions and 52 deletions

View File

@ -232,37 +232,33 @@ BEGIN_PROVIDER [ double precision, grad12_j12, (ao_num, ao_num, n_points_final_g
PROVIDE j1b_type
if(j1b_type .eq. 3) then
do ipoint = 1, n_points_final_grid
tmp1 = v_1b(ipoint)
tmp1 = tmp1 * tmp1
do j = 1, ao_num
do i = 1, ao_num
grad12_j12(i,j,ipoint) = tmp1 * int2_grad1u2_grad2u2_j1b2(i,j,ipoint)
enddo
do ipoint = 1, n_points_final_grid
tmp1 = v_1b(ipoint)
tmp1 = tmp1 * tmp1
do j = 1, ao_num
do i = 1, ao_num
grad12_j12(i,j,ipoint) = tmp1 * int2_grad1u2_grad2u2_j1b2(i,j,ipoint)
enddo
enddo
enddo
else
grad12_j12 = 0.d0
do ipoint = 1, n_points_final_grid
r(1) = final_grid_points(1,ipoint)
r(2) = final_grid_points(2,ipoint)
r(3) = final_grid_points(3,ipoint)
do j = 1, ao_num
do i = 1, ao_num
do igauss = 1, n_max_fit_slat
delta = expo_gauss_1_erf_x_2(igauss)
coef = coef_gauss_1_erf_x_2(igauss)
grad12_j12(i,j,ipoint) += -0.25d0 * coef * overlap_gauss_r12_ao(r, delta, i, j)
enddo
enddo
enddo
enddo
endif
!if(j1b_type .eq. 0) then
! grad12_j12 = 0.d0
! do ipoint = 1, n_points_final_grid
! r(1) = final_grid_points(1,ipoint)
! r(2) = final_grid_points(2,ipoint)
! r(3) = final_grid_points(3,ipoint)
! do j = 1, ao_num
! do i = 1, ao_num
! do igauss = 1, n_max_fit_slat
! delta = expo_gauss_1_erf_x_2(igauss)
! coef = coef_gauss_1_erf_x_2(igauss)
! grad12_j12(i,j,ipoint) += -0.25d0 * coef * overlap_gauss_r12_ao(r, delta, i, j)
! enddo
! enddo
! enddo
! enddo
!endif
call wall_time(time1)
print*, ' Wall time for grad12_j12 = ', time1 - time0
@ -398,7 +394,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao
tc_grad_square_ao = 0.d0
call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 &
, int2_grad1_u12_squared_ao(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid &
, 1.d0, tc_grad_square_ao, ao_num*ao_num)
, 0.d0, tc_grad_square_ao, ao_num*ao_num)
deallocate(b_mat)
call sum_A_At(tc_grad_square_ao(1,1,1,1), ao_num*ao_num)

View File

@ -1,7 +1,7 @@
! ---
BEGIN_PROVIDER [ double precision, grad1_u12_num, (n_points_extra_final_grid, n_points_final_grid, 3)]
BEGIN_PROVIDER [ double precision, grad1_u12_num, (n_points_extra_final_grid, n_points_final_grid, 3)]
&BEGIN_PROVIDER [ double precision, grad1_u12_squared_num, (n_points_extra_final_grid, n_points_final_grid)]
BEGIN_DOC
@ -32,8 +32,15 @@
double precision :: v1b_r1, v1b_r2, u2b_r12
double precision :: grad1_v1b(3), grad1_u2b(3)
double precision :: dx, dy, dz
double precision, external :: j12_mu, j1b_nucl
!$OMP PARALLEL &
!$OMP DEFAULT (NONE) &
!$OMP PRIVATE (ipoint, jpoint, r1, r2, v1b_r1, v1b_r2, u2b_r12, grad1_v1b, grad1_u2b, dx, dy, dz) &
!$OMP SHARED (n_points_final_grid, n_points_extra_final_grid, final_grid_points, &
!$OMP final_grid_points_extra, grad1_u12_num, grad1_u12_squared_num)
!$OMP DO SCHEDULE (static)
do ipoint = 1, n_points_final_grid ! r1
r1(1) = final_grid_points(1,ipoint)
@ -41,7 +48,7 @@
r1(3) = final_grid_points(3,ipoint)
v1b_r1 = j1b_nucl(r1)
call grad1_j1b_nuc(r1, grad1_v1b)
call grad1_j1b_nucl(r1, grad1_v1b)
do jpoint = 1, n_points_extra_final_grid ! r2
@ -53,15 +60,19 @@
u2b_r12 = j12_mu(r1, r2)
call grad1_j12_mu(r1, r2, grad1_u2b)
grad1_u12_num(jpoint,ipoint,1) = (grad1_u2b(1) * v1b_r1 + u2b_r12 * grad1_v1b(1)) * v1b_r2
grad1_u12_num(jpoint,ipoint,2) = (grad1_u2b(2) * v1b_r1 + u2b_r12 * grad1_v1b(2)) * v1b_r2
grad1_u12_num(jpoint,ipoint,3) = (grad1_u2b(3) * v1b_r1 + u2b_r12 * grad1_v1b(3)) * v1b_r2
dx = (grad1_u2b(1) * v1b_r1 + u2b_r12 * grad1_v1b(1)) * v1b_r2
dy = (grad1_u2b(2) * v1b_r1 + u2b_r12 * grad1_v1b(2)) * v1b_r2
dz = (grad1_u2b(3) * v1b_r1 + u2b_r12 * grad1_v1b(3)) * v1b_r2
grad1_u12_squared_num(jpoint,ipoint) = ( grad1_u12_num(jpoint,ipoint,1) * grad1_u12_num(jpoint,ipoint,1) &
+ grad1_u12_num(jpoint,ipoint,2) * grad1_u12_num(jpoint,ipoint,2) &
+ grad1_u12_num(jpoint,ipoint,3) * grad1_u12_num(jpoint,ipoint,3) )
grad1_u12_num(jpoint,ipoint,1) = dx
grad1_u12_num(jpoint,ipoint,2) = dy
grad1_u12_num(jpoint,ipoint,3) = dz
grad1_u12_squared_num(jpoint,ipoint) = dx*dx + dy*dy + dz*dz
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
else
@ -170,7 +181,7 @@ end function j1b_nucl
! ---
subroutine grad1_j1b_nuc(r, grad)
subroutine grad1_j1b_nucl(r, grad)
implicit none
double precision, intent(in) :: r(3)
@ -228,7 +239,7 @@ subroutine grad1_j1b_nuc(r, grad)
endif
return
end subroutine grad1_j1b_nuc
end subroutine grad1_j1b_nucl
! ---

View File

@ -247,8 +247,7 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num,
do m = 1, 3
call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 &
, int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid &
, 1.d0, tc_grad_and_lapl_ao, ao_num*ao_num)
, 0.d0, tc_grad_and_lapl_ao, ao_num*ao_num)
enddo
deallocate(b_mat)

View File

@ -50,10 +50,9 @@
! ---
PROVIDE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b x_v_ij_erf_rk_cst_mu_j1b
if(.not.read_tc_integ) then
call wall_time(time0)
PROVIDE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b x_v_ij_erf_rk_cst_mu_j1b
int2_grad1_u12_ao = 0.d0
! TODO OPENMP
do ipoint = 1, n_points_final_grid
@ -133,15 +132,38 @@
do m = 1, 3
call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, 1.d0 &
, tmp(1,1,1), n_points_extra_final_grid, grad1_u12_num(1,1,m), n_points_extra_final_grid &
, 1.d0, int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num)
, 0.d0, int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num)
enddo
!! DEBUG
!PROVIDE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b x_v_ij_erf_rk_cst_mu_j1b
!int2_grad1_u12_ao = 0.d0
!do ipoint = 1, n_points_final_grid
! x = final_grid_points(1,ipoint)
! y = final_grid_points(2,ipoint)
! z = final_grid_points(3,ipoint)
! tmp0 = 0.5d0 * v_1b(ipoint)
! tmp_x = v_1b_grad(1,ipoint)
! tmp_y = v_1b_grad(2,ipoint)
! tmp_z = v_1b_grad(3,ipoint)
! do j = 1, ao_num
! do i = 1, ao_num
! tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint)
! tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint)
! int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x
! int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y
! int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z
! enddo
! enddo
!enddo
! these dgemm are equivalen to
!!$OMP PARALLEL &
!!$OMP DEFAULT (NONE) &
!!$OMP PRIVATE (j, i, ipoint, jpoint, w) &
!!$OMP SHARED (int2_grad1_u12_ao, ao_num, n_points_final_grid, &
!!$OMP n_points_extra_final_grid, final_weight_at_r_vector_extra, &
!!$OMP aos_in_r_array_extra_transp, grad1_u12_num,tmp)
!!$OMP aos_in_r_array_extra_transp, grad1_u12_num, tmp)
!!$OMP DO SCHEDULE (static)
!do ipoint = 1, n_points_final_grid
! do j = 1, ao_num
@ -165,22 +187,42 @@
call wall_time(time0)
int2_grad1_u12_squared_ao = 0.d0
call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, 1.d0 &
call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, -0.5d0 &
, tmp(1,1,1), n_points_extra_final_grid, grad1_u12_squared_num(1,1), n_points_extra_final_grid &
, 1.d0, int2_grad1_u12_squared_ao(1,1,1), ao_num*ao_num)
, 0.d0, int2_grad1_u12_squared_ao(1,1,1), ao_num*ao_num)
!! DEBUG
!PROVIDE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12
!int2_grad1_u12_squared_ao = 0.d0
!!$OMP PARALLEL &
!!$OMP DEFAULT (NONE) &
!!$OMP PRIVATE (i, j, ipoint) &
!!$OMP SHARED (int2_grad1_u12_squared_ao, ao_num, n_points_final_grid, u12sq_j1bsq, u12_grad1_u12_j1b_grad1_j1b, grad12_j12)
!!$OMP DO SCHEDULE (static)
!do ipoint = 1, n_points_final_grid
! do j = 1, ao_num
! do i = 1, ao_num
! int2_grad1_u12_squared_ao(i,j,ipoint) = u12sq_j1bsq(i,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) + 0.5d0 * grad12_j12(i,j,ipoint)
! enddo
! enddo
!enddo
!!$OMP END DO
!!$OMP END PARALLEL
!call wall_time(time1)
!! this dgemm is equivalen to
!!$OMP PARALLEL &
!!$OMP DEFAULT (NONE) &
!!$OMP PRIVATE (i, j, ipoint, jpoint, w) &
!!$OMP SHARED (int2_grad1_u12_squared_ao, ao_num, n_points_final_grid, &
!!$OMP n_points_extra_final_grid, final_weight_at_r_vector_extra, &
!!$OMP aos_in_r_array_extra_transp, grad1_u12_squared_num,tmp)
!!$OMP aos_in_r_array_extra_transp, grad1_u12_squared_num, tmp)
!!$OMP DO SCHEDULE (static)
!do ipoint = 1, n_points_final_grid
! do j = 1, ao_num
! do i = 1, ao_num
! do jpoint = 1, n_points_extra_final_grid
! w = tmp(jpoint,i,j)
! w = 0.5d0 * tmp(jpoint,i,j)
! int2_grad1_u12_squared_ao(i,j,ipoint) += w * grad1_u12_squared_num(jpoint,ipoint)
! enddo
! enddo
@ -208,9 +250,6 @@
call ezfio_set_tc_keywords_io_tc_integ('Read')
endif
call wall_time(time1)
print*, ' wall time for int2_grad1_u12_ao & int2_grad1_u12_squared_ao = ', time1 - time0
END_PROVIDER
! ---