diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f index 558ca268..8b801f9d 100644 --- a/src/non_h_ints_mu/grad_squared.irp.f +++ b/src/non_h_ints_mu/grad_squared.irp.f @@ -232,37 +232,33 @@ BEGIN_PROVIDER [ double precision, grad12_j12, (ao_num, ao_num, n_points_final_g PROVIDE j1b_type - if(j1b_type .eq. 3) then - - do ipoint = 1, n_points_final_grid - tmp1 = v_1b(ipoint) - tmp1 = tmp1 * tmp1 - do j = 1, ao_num - do i = 1, ao_num - grad12_j12(i,j,ipoint) = tmp1 * int2_grad1u2_grad2u2_j1b2(i,j,ipoint) - enddo + do ipoint = 1, n_points_final_grid + tmp1 = v_1b(ipoint) + tmp1 = tmp1 * tmp1 + do j = 1, ao_num + do i = 1, ao_num + grad12_j12(i,j,ipoint) = tmp1 * int2_grad1u2_grad2u2_j1b2(i,j,ipoint) enddo enddo + enddo - else - - grad12_j12 = 0.d0 - do ipoint = 1, n_points_final_grid - r(1) = final_grid_points(1,ipoint) - r(2) = final_grid_points(2,ipoint) - r(3) = final_grid_points(3,ipoint) - do j = 1, ao_num - do i = 1, ao_num - do igauss = 1, n_max_fit_slat - delta = expo_gauss_1_erf_x_2(igauss) - coef = coef_gauss_1_erf_x_2(igauss) - grad12_j12(i,j,ipoint) += -0.25d0 * coef * overlap_gauss_r12_ao(r, delta, i, j) - enddo - enddo - enddo - enddo - - endif + !if(j1b_type .eq. 0) then + ! grad12_j12 = 0.d0 + ! do ipoint = 1, n_points_final_grid + ! r(1) = final_grid_points(1,ipoint) + ! r(2) = final_grid_points(2,ipoint) + ! r(3) = final_grid_points(3,ipoint) + ! do j = 1, ao_num + ! do i = 1, ao_num + ! do igauss = 1, n_max_fit_slat + ! delta = expo_gauss_1_erf_x_2(igauss) + ! coef = coef_gauss_1_erf_x_2(igauss) + ! grad12_j12(i,j,ipoint) += -0.25d0 * coef * overlap_gauss_r12_ao(r, delta, i, j) + ! enddo + ! enddo + ! enddo + ! enddo + !endif call wall_time(time1) print*, ' Wall time for grad12_j12 = ', time1 - time0 @@ -398,7 +394,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao tc_grad_square_ao = 0.d0 call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & , int2_grad1_u12_squared_ao(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid & - , 1.d0, tc_grad_square_ao, ao_num*ao_num) + , 0.d0, tc_grad_square_ao, ao_num*ao_num) deallocate(b_mat) call sum_A_At(tc_grad_square_ao(1,1,1,1), ao_num*ao_num) diff --git a/src/non_h_ints_mu/jast_deriv.irp.f b/src/non_h_ints_mu/jast_deriv.irp.f index e4a3b071..b2772f92 100644 --- a/src/non_h_ints_mu/jast_deriv.irp.f +++ b/src/non_h_ints_mu/jast_deriv.irp.f @@ -1,7 +1,7 @@ ! --- - BEGIN_PROVIDER [ double precision, grad1_u12_num, (n_points_extra_final_grid, n_points_final_grid, 3)] + BEGIN_PROVIDER [ double precision, grad1_u12_num, (n_points_extra_final_grid, n_points_final_grid, 3)] &BEGIN_PROVIDER [ double precision, grad1_u12_squared_num, (n_points_extra_final_grid, n_points_final_grid)] BEGIN_DOC @@ -32,8 +32,15 @@ double precision :: v1b_r1, v1b_r2, u2b_r12 double precision :: grad1_v1b(3), grad1_u2b(3) + double precision :: dx, dy, dz double precision, external :: j12_mu, j1b_nucl + !$OMP PARALLEL & + !$OMP DEFAULT (NONE) & + !$OMP PRIVATE (ipoint, jpoint, r1, r2, v1b_r1, v1b_r2, u2b_r12, grad1_v1b, grad1_u2b, dx, dy, dz) & + !$OMP SHARED (n_points_final_grid, n_points_extra_final_grid, final_grid_points, & + !$OMP final_grid_points_extra, grad1_u12_num, grad1_u12_squared_num) + !$OMP DO SCHEDULE (static) do ipoint = 1, n_points_final_grid ! r1 r1(1) = final_grid_points(1,ipoint) @@ -41,7 +48,7 @@ r1(3) = final_grid_points(3,ipoint) v1b_r1 = j1b_nucl(r1) - call grad1_j1b_nuc(r1, grad1_v1b) + call grad1_j1b_nucl(r1, grad1_v1b) do jpoint = 1, n_points_extra_final_grid ! r2 @@ -53,15 +60,19 @@ u2b_r12 = j12_mu(r1, r2) call grad1_j12_mu(r1, r2, grad1_u2b) - grad1_u12_num(jpoint,ipoint,1) = (grad1_u2b(1) * v1b_r1 + u2b_r12 * grad1_v1b(1)) * v1b_r2 - grad1_u12_num(jpoint,ipoint,2) = (grad1_u2b(2) * v1b_r1 + u2b_r12 * grad1_v1b(2)) * v1b_r2 - grad1_u12_num(jpoint,ipoint,3) = (grad1_u2b(3) * v1b_r1 + u2b_r12 * grad1_v1b(3)) * v1b_r2 + dx = (grad1_u2b(1) * v1b_r1 + u2b_r12 * grad1_v1b(1)) * v1b_r2 + dy = (grad1_u2b(2) * v1b_r1 + u2b_r12 * grad1_v1b(2)) * v1b_r2 + dz = (grad1_u2b(3) * v1b_r1 + u2b_r12 * grad1_v1b(3)) * v1b_r2 - grad1_u12_squared_num(jpoint,ipoint) = ( grad1_u12_num(jpoint,ipoint,1) * grad1_u12_num(jpoint,ipoint,1) & - + grad1_u12_num(jpoint,ipoint,2) * grad1_u12_num(jpoint,ipoint,2) & - + grad1_u12_num(jpoint,ipoint,3) * grad1_u12_num(jpoint,ipoint,3) ) + grad1_u12_num(jpoint,ipoint,1) = dx + grad1_u12_num(jpoint,ipoint,2) = dy + grad1_u12_num(jpoint,ipoint,3) = dz + + grad1_u12_squared_num(jpoint,ipoint) = dx*dx + dy*dy + dz*dz enddo enddo + !$OMP END DO + !$OMP END PARALLEL else @@ -170,7 +181,7 @@ end function j1b_nucl ! --- -subroutine grad1_j1b_nuc(r, grad) +subroutine grad1_j1b_nucl(r, grad) implicit none double precision, intent(in) :: r(3) @@ -228,7 +239,7 @@ subroutine grad1_j1b_nuc(r, grad) endif return -end subroutine grad1_j1b_nuc +end subroutine grad1_j1b_nucl ! --- diff --git a/src/non_h_ints_mu/new_grad_tc.irp.f b/src/non_h_ints_mu/new_grad_tc.irp.f index 2b22dab4..064eb9f1 100644 --- a/src/non_h_ints_mu/new_grad_tc.irp.f +++ b/src/non_h_ints_mu/new_grad_tc.irp.f @@ -247,8 +247,7 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, do m = 1, 3 call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 & , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid & - , 1.d0, tc_grad_and_lapl_ao, ao_num*ao_num) - + , 0.d0, tc_grad_and_lapl_ao, ao_num*ao_num) enddo deallocate(b_mat) diff --git a/src/non_h_ints_mu/tc_integ.irp.f b/src/non_h_ints_mu/tc_integ.irp.f index a92eac02..865522e0 100644 --- a/src/non_h_ints_mu/tc_integ.irp.f +++ b/src/non_h_ints_mu/tc_integ.irp.f @@ -50,10 +50,9 @@ ! --- - PROVIDE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b x_v_ij_erf_rk_cst_mu_j1b - if(.not.read_tc_integ) then call wall_time(time0) + PROVIDE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b x_v_ij_erf_rk_cst_mu_j1b int2_grad1_u12_ao = 0.d0 ! TODO OPENMP do ipoint = 1, n_points_final_grid @@ -133,15 +132,38 @@ do m = 1, 3 call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, 1.d0 & , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_num(1,1,m), n_points_extra_final_grid & - , 1.d0, int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num) + , 0.d0, int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num) enddo + + !! DEBUG + !PROVIDE v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b x_v_ij_erf_rk_cst_mu_j1b + !int2_grad1_u12_ao = 0.d0 + !do ipoint = 1, n_points_final_grid + ! x = final_grid_points(1,ipoint) + ! y = final_grid_points(2,ipoint) + ! z = final_grid_points(3,ipoint) + ! tmp0 = 0.5d0 * v_1b(ipoint) + ! tmp_x = v_1b_grad(1,ipoint) + ! tmp_y = v_1b_grad(2,ipoint) + ! tmp_z = v_1b_grad(3,ipoint) + ! do j = 1, ao_num + ! do i = 1, ao_num + ! tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) + ! tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint) + ! int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x + ! int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y + ! int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z + ! enddo + ! enddo + !enddo + ! these dgemm are equivalen to !!$OMP PARALLEL & !!$OMP DEFAULT (NONE) & !!$OMP PRIVATE (j, i, ipoint, jpoint, w) & !!$OMP SHARED (int2_grad1_u12_ao, ao_num, n_points_final_grid, & !!$OMP n_points_extra_final_grid, final_weight_at_r_vector_extra, & - !!$OMP aos_in_r_array_extra_transp, grad1_u12_num,tmp) + !!$OMP aos_in_r_array_extra_transp, grad1_u12_num, tmp) !!$OMP DO SCHEDULE (static) !do ipoint = 1, n_points_final_grid ! do j = 1, ao_num @@ -165,22 +187,42 @@ call wall_time(time0) int2_grad1_u12_squared_ao = 0.d0 - call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, 1.d0 & + call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, -0.5d0 & , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_squared_num(1,1), n_points_extra_final_grid & - , 1.d0, int2_grad1_u12_squared_ao(1,1,1), ao_num*ao_num) + , 0.d0, int2_grad1_u12_squared_ao(1,1,1), ao_num*ao_num) + + !! DEBUG + !PROVIDE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12 + !int2_grad1_u12_squared_ao = 0.d0 + !!$OMP PARALLEL & + !!$OMP DEFAULT (NONE) & + !!$OMP PRIVATE (i, j, ipoint) & + !!$OMP SHARED (int2_grad1_u12_squared_ao, ao_num, n_points_final_grid, u12sq_j1bsq, u12_grad1_u12_j1b_grad1_j1b, grad12_j12) + !!$OMP DO SCHEDULE (static) + !do ipoint = 1, n_points_final_grid + ! do j = 1, ao_num + ! do i = 1, ao_num + ! int2_grad1_u12_squared_ao(i,j,ipoint) = u12sq_j1bsq(i,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) + 0.5d0 * grad12_j12(i,j,ipoint) + ! enddo + ! enddo + !enddo + !!$OMP END DO + !!$OMP END PARALLEL + !call wall_time(time1) + !! this dgemm is equivalen to !!$OMP PARALLEL & !!$OMP DEFAULT (NONE) & !!$OMP PRIVATE (i, j, ipoint, jpoint, w) & !!$OMP SHARED (int2_grad1_u12_squared_ao, ao_num, n_points_final_grid, & !!$OMP n_points_extra_final_grid, final_weight_at_r_vector_extra, & - !!$OMP aos_in_r_array_extra_transp, grad1_u12_squared_num,tmp) + !!$OMP aos_in_r_array_extra_transp, grad1_u12_squared_num, tmp) !!$OMP DO SCHEDULE (static) !do ipoint = 1, n_points_final_grid ! do j = 1, ao_num ! do i = 1, ao_num ! do jpoint = 1, n_points_extra_final_grid - ! w = tmp(jpoint,i,j) + ! w = 0.5d0 * tmp(jpoint,i,j) ! int2_grad1_u12_squared_ao(i,j,ipoint) += w * grad1_u12_squared_num(jpoint,ipoint) ! enddo ! enddo @@ -208,9 +250,6 @@ call ezfio_set_tc_keywords_io_tc_integ('Read') endif - call wall_time(time1) - print*, ' wall time for int2_grad1_u12_ao & int2_grad1_u12_squared_ao = ', time1 - time0 - END_PROVIDER ! ---