10
0
mirror of https://github.com/QuantumPackage/qp2.git synced 2024-07-03 18:06:04 +02:00

DGEMM added and tested for Be(VDZ)

This commit is contained in:
Abdallah Ammar 2023-04-21 22:22:25 +02:00
parent 333d8a551c
commit 195c7402cf
4 changed files with 97 additions and 52 deletions

View File

@ -232,8 +232,6 @@ BEGIN_PROVIDER [ double precision, grad12_j12, (ao_num, ao_num, n_points_final_g
PROVIDE j1b_type PROVIDE j1b_type
if(j1b_type .eq. 3) then
do ipoint = 1, n_points_final_grid do ipoint = 1, n_points_final_grid
tmp1 = v_1b(ipoint) tmp1 = v_1b(ipoint)
tmp1 = tmp1 * tmp1 tmp1 = tmp1 * tmp1
@ -244,25 +242,23 @@ BEGIN_PROVIDER [ double precision, grad12_j12, (ao_num, ao_num, n_points_final_g
enddo enddo
enddo enddo
else !if(j1b_type .eq. 0) then
! grad12_j12 = 0.d0
grad12_j12 = 0.d0 ! do ipoint = 1, n_points_final_grid
do ipoint = 1, n_points_final_grid ! r(1) = final_grid_points(1,ipoint)
r(1) = final_grid_points(1,ipoint) ! r(2) = final_grid_points(2,ipoint)
r(2) = final_grid_points(2,ipoint) ! r(3) = final_grid_points(3,ipoint)
r(3) = final_grid_points(3,ipoint) ! do j = 1, ao_num
do j = 1, ao_num ! do i = 1, ao_num
do i = 1, ao_num ! do igauss = 1, n_max_fit_slat
do igauss = 1, n_max_fit_slat ! delta = expo_gauss_1_erf_x_2(igauss)
delta = expo_gauss_1_erf_x_2(igauss) ! coef = coef_gauss_1_erf_x_2(igauss)
coef = coef_gauss_1_erf_x_2(igauss) ! grad12_j12(i,j,ipoint) += -0.25d0 * coef * overlap_gauss_r12_ao(r, delta, i, j)
grad12_j12(i,j,ipoint) += -0.25d0 * coef * overlap_gauss_r12_ao(r, delta, i, j) ! enddo
enddo ! enddo
enddo ! enddo
enddo ! enddo
enddo !endif
endif
call wall_time(time1) call wall_time(time1)
print*, ' Wall time for grad12_j12 = ', time1 - time0 print*, ' Wall time for grad12_j12 = ', time1 - time0
@ -398,7 +394,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao
tc_grad_square_ao = 0.d0 tc_grad_square_ao = 0.d0
call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 &
, int2_grad1_u12_squared_ao(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & , int2_grad1_u12_squared_ao(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid &
, 1.d0, tc_grad_square_ao, ao_num*ao_num) , 0.d0, tc_grad_square_ao, ao_num*ao_num)
deallocate(b_mat) deallocate(b_mat)
call sum_A_At(tc_grad_square_ao(1,1,1,1), ao_num*ao_num) call sum_A_At(tc_grad_square_ao(1,1,1,1), ao_num*ao_num)

View File

@ -32,8 +32,15 @@
double precision :: v1b_r1, v1b_r2, u2b_r12 double precision :: v1b_r1, v1b_r2, u2b_r12
double precision :: grad1_v1b(3), grad1_u2b(3) double precision :: grad1_v1b(3), grad1_u2b(3)
double precision :: dx, dy, dz
double precision, external :: j12_mu, j1b_nucl double precision, external :: j12_mu, j1b_nucl
!$OMP PARALLEL &
!$OMP DEFAULT (NONE) &
!$OMP PRIVATE (ipoint, jpoint, r1, r2, v1b_r1, v1b_r2, u2b_r12, grad1_v1b, grad1_u2b, dx, dy, dz) &
!$OMP SHARED (n_points_final_grid, n_points_extra_final_grid, final_grid_points, &
!$OMP final_grid_points_extra, grad1_u12_num, grad1_u12_squared_num)
!$OMP DO SCHEDULE (static)
do ipoint = 1, n_points_final_grid ! r1 do ipoint = 1, n_points_final_grid ! r1
r1(1) = final_grid_points(1,ipoint) r1(1) = final_grid_points(1,ipoint)
@ -41,7 +48,7 @@
r1(3) = final_grid_points(3,ipoint) r1(3) = final_grid_points(3,ipoint)
v1b_r1 = j1b_nucl(r1) v1b_r1 = j1b_nucl(r1)
call grad1_j1b_nuc(r1, grad1_v1b) call grad1_j1b_nucl(r1, grad1_v1b)
do jpoint = 1, n_points_extra_final_grid ! r2 do jpoint = 1, n_points_extra_final_grid ! r2
@ -53,15 +60,19 @@
u2b_r12 = j12_mu(r1, r2) u2b_r12 = j12_mu(r1, r2)
call grad1_j12_mu(r1, r2, grad1_u2b) call grad1_j12_mu(r1, r2, grad1_u2b)
grad1_u12_num(jpoint,ipoint,1) = (grad1_u2b(1) * v1b_r1 + u2b_r12 * grad1_v1b(1)) * v1b_r2 dx = (grad1_u2b(1) * v1b_r1 + u2b_r12 * grad1_v1b(1)) * v1b_r2
grad1_u12_num(jpoint,ipoint,2) = (grad1_u2b(2) * v1b_r1 + u2b_r12 * grad1_v1b(2)) * v1b_r2 dy = (grad1_u2b(2) * v1b_r1 + u2b_r12 * grad1_v1b(2)) * v1b_r2
grad1_u12_num(jpoint,ipoint,3) = (grad1_u2b(3) * v1b_r1 + u2b_r12 * grad1_v1b(3)) * v1b_r2 dz = (grad1_u2b(3) * v1b_r1 + u2b_r12 * grad1_v1b(3)) * v1b_r2
grad1_u12_squared_num(jpoint,ipoint) = ( grad1_u12_num(jpoint,ipoint,1) * grad1_u12_num(jpoint,ipoint,1) & grad1_u12_num(jpoint,ipoint,1) = dx
+ grad1_u12_num(jpoint,ipoint,2) * grad1_u12_num(jpoint,ipoint,2) & grad1_u12_num(jpoint,ipoint,2) = dy
+ grad1_u12_num(jpoint,ipoint,3) * grad1_u12_num(jpoint,ipoint,3) ) grad1_u12_num(jpoint,ipoint,3) = dz
grad1_u12_squared_num(jpoint,ipoint) = dx*dx + dy*dy + dz*dz
enddo enddo
enddo enddo
!$OMP END DO
!$OMP END PARALLEL
else else
@ -170,7 +181,7 @@ end function j1b_nucl
! --- ! ---
subroutine grad1_j1b_nuc(r, grad) subroutine grad1_j1b_nucl(r, grad)
implicit none implicit none
double precision, intent(in) :: r(3) double precision, intent(in) :: r(3)
@ -228,7 +239,7 @@ subroutine grad1_j1b_nuc(r, grad)
endif endif
return return
end subroutine grad1_j1b_nuc end subroutine grad1_j1b_nucl
! --- ! ---

View File

@ -247,8 +247,7 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num,
do m = 1, 3 do m = 1, 3
call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 &
, int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid & , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid &
, 1.d0, tc_grad_and_lapl_ao, ao_num*ao_num) , 0.d0, tc_grad_and_lapl_ao, ao_num*ao_num)
enddo enddo
deallocate(b_mat) deallocate(b_mat)

View File

@ -50,10 +50,9 @@
! --- ! ---
PROVIDE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b x_v_ij_erf_rk_cst_mu_j1b
if(.not.read_tc_integ) then if(.not.read_tc_integ) then
call wall_time(time0) call wall_time(time0)
PROVIDE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b x_v_ij_erf_rk_cst_mu_j1b
int2_grad1_u12_ao = 0.d0 int2_grad1_u12_ao = 0.d0
! TODO OPENMP ! TODO OPENMP
do ipoint = 1, n_points_final_grid do ipoint = 1, n_points_final_grid
@ -133,15 +132,38 @@
do m = 1, 3 do m = 1, 3
call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, 1.d0 & call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, 1.d0 &
, tmp(1,1,1), n_points_extra_final_grid, grad1_u12_num(1,1,m), n_points_extra_final_grid & , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_num(1,1,m), n_points_extra_final_grid &
, 1.d0, int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num) , 0.d0, int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num)
enddo enddo
!! DEBUG
!PROVIDE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b x_v_ij_erf_rk_cst_mu_j1b
!int2_grad1_u12_ao = 0.d0
!do ipoint = 1, n_points_final_grid
! x = final_grid_points(1,ipoint)
! y = final_grid_points(2,ipoint)
! z = final_grid_points(3,ipoint)
! tmp0 = 0.5d0 * v_1b(ipoint)
! tmp_x = v_1b_grad(1,ipoint)
! tmp_y = v_1b_grad(2,ipoint)
! tmp_z = v_1b_grad(3,ipoint)
! do j = 1, ao_num
! do i = 1, ao_num
! tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint)
! tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint)
! int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x
! int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y
! int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z
! enddo
! enddo
!enddo
! these dgemm are equivalen to ! these dgemm are equivalen to
!!$OMP PARALLEL & !!$OMP PARALLEL &
!!$OMP DEFAULT (NONE) & !!$OMP DEFAULT (NONE) &
!!$OMP PRIVATE (j, i, ipoint, jpoint, w) & !!$OMP PRIVATE (j, i, ipoint, jpoint, w) &
!!$OMP SHARED (int2_grad1_u12_ao, ao_num, n_points_final_grid, & !!$OMP SHARED (int2_grad1_u12_ao, ao_num, n_points_final_grid, &
!!$OMP n_points_extra_final_grid, final_weight_at_r_vector_extra, & !!$OMP n_points_extra_final_grid, final_weight_at_r_vector_extra, &
!!$OMP aos_in_r_array_extra_transp, grad1_u12_num,tmp) !!$OMP aos_in_r_array_extra_transp, grad1_u12_num, tmp)
!!$OMP DO SCHEDULE (static) !!$OMP DO SCHEDULE (static)
!do ipoint = 1, n_points_final_grid !do ipoint = 1, n_points_final_grid
! do j = 1, ao_num ! do j = 1, ao_num
@ -165,22 +187,42 @@
call wall_time(time0) call wall_time(time0)
int2_grad1_u12_squared_ao = 0.d0 int2_grad1_u12_squared_ao = 0.d0
call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, 1.d0 & call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, -0.5d0 &
, tmp(1,1,1), n_points_extra_final_grid, grad1_u12_squared_num(1,1), n_points_extra_final_grid & , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_squared_num(1,1), n_points_extra_final_grid &
, 1.d0, int2_grad1_u12_squared_ao(1,1,1), ao_num*ao_num) , 0.d0, int2_grad1_u12_squared_ao(1,1,1), ao_num*ao_num)
!! DEBUG
!PROVIDE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12
!int2_grad1_u12_squared_ao = 0.d0
!!$OMP PARALLEL &
!!$OMP DEFAULT (NONE) &
!!$OMP PRIVATE (i, j, ipoint) &
!!$OMP SHARED (int2_grad1_u12_squared_ao, ao_num, n_points_final_grid, u12sq_j1bsq, u12_grad1_u12_j1b_grad1_j1b, grad12_j12)
!!$OMP DO SCHEDULE (static)
!do ipoint = 1, n_points_final_grid
! do j = 1, ao_num
! do i = 1, ao_num
! int2_grad1_u12_squared_ao(i,j,ipoint) = u12sq_j1bsq(i,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) + 0.5d0 * grad12_j12(i,j,ipoint)
! enddo
! enddo
!enddo
!!$OMP END DO
!!$OMP END PARALLEL
!call wall_time(time1)
!! this dgemm is equivalen to !! this dgemm is equivalen to
!!$OMP PARALLEL & !!$OMP PARALLEL &
!!$OMP DEFAULT (NONE) & !!$OMP DEFAULT (NONE) &
!!$OMP PRIVATE (i, j, ipoint, jpoint, w) & !!$OMP PRIVATE (i, j, ipoint, jpoint, w) &
!!$OMP SHARED (int2_grad1_u12_squared_ao, ao_num, n_points_final_grid, & !!$OMP SHARED (int2_grad1_u12_squared_ao, ao_num, n_points_final_grid, &
!!$OMP n_points_extra_final_grid, final_weight_at_r_vector_extra, & !!$OMP n_points_extra_final_grid, final_weight_at_r_vector_extra, &
!!$OMP aos_in_r_array_extra_transp, grad1_u12_squared_num,tmp) !!$OMP aos_in_r_array_extra_transp, grad1_u12_squared_num, tmp)
!!$OMP DO SCHEDULE (static) !!$OMP DO SCHEDULE (static)
!do ipoint = 1, n_points_final_grid !do ipoint = 1, n_points_final_grid
! do j = 1, ao_num ! do j = 1, ao_num
! do i = 1, ao_num ! do i = 1, ao_num
! do jpoint = 1, n_points_extra_final_grid ! do jpoint = 1, n_points_extra_final_grid
! w = tmp(jpoint,i,j) ! w = 0.5d0 * tmp(jpoint,i,j)
! int2_grad1_u12_squared_ao(i,j,ipoint) += w * grad1_u12_squared_num(jpoint,ipoint) ! int2_grad1_u12_squared_ao(i,j,ipoint) += w * grad1_u12_squared_num(jpoint,ipoint)
! enddo ! enddo
! enddo ! enddo
@ -208,9 +250,6 @@
call ezfio_set_tc_keywords_io_tc_integ('Read') call ezfio_set_tc_keywords_io_tc_integ('Read')
endif endif
call wall_time(time1)
print*, ' wall time for int2_grad1_u12_ao & int2_grad1_u12_squared_ao = ', time1 - time0
END_PROVIDER END_PROVIDER
! --- ! ---