From 0b7c5fcb977592c6807eb80fe2cddcdd852e40db Mon Sep 17 00:00:00 2001 From: eginer Date: Wed, 14 Dec 2022 18:47:32 +0100 Subject: [PATCH] trying to optimize the most intensive part --- src/ao_many_one_e_ints/grad2_jmu_manu.irp.f | 31 ++++--- .../grad_lapl_jmu_manu.irp.f | 21 +++-- src/ao_tc_eff_map/fit_j.irp.f | 10 +++ src/dft_utils_in_r/ao_prod_mlti_pl.irp.f | 83 ++++++++++++++----- src/non_h_ints_mu/grad_squared_manu.irp.f | 3 +- src/tc_scf/test_int.irp.f | 49 ++++++++++- 6 files changed, 151 insertions(+), 46 deletions(-) diff --git a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f index 6f7b29d2..d5210aa7 100644 --- a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f +++ b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f @@ -17,8 +17,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n double precision, allocatable :: int_fit_v(:) double precision, external :: overlap_gauss_r12_ao_with1s double precision :: int_gauss,dsqpi_3_2,int_j1b - double precision :: factor_ij_1s,beta_ij,center_ij_1s(3) - dsqpi_3_2 = (dacos(-1.d0))**(3/2) + double precision :: factor_ij_1s,beta_ij,center_ij_1s(3),sq_pi_3_2 + sq_pi_3_2 = (dacos(-1.d0))**(3/2) provide mu_erf final_grid_points_transp j1b_pen List_comb_thr_b3_coef call wall_time(wall0) @@ -33,7 +33,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo, & !$OMP List_comb_thr_b3_cent, int2_grad1u2_grad2u2_j1b2_test, ao_abs_comb_b3_j1b,& - !$OMP ao_overlap_abs,dsqpi_3_2) + !$OMP ao_overlap_abs,sq_pi_3_2) !$OMP DO SCHEDULE(dynamic) do ipoint = 1, n_points_final_grid r(1) = final_grid_points(1,ipoint) @@ -60,7 +60,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n !DIR$ FORCEINLINE call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s) coef_fit = -0.25d0 * coef_gauss_1_erf_x_2(i_fit) * coef - if(dabs(coef_fit*factor_ij_1s*int_j1b).lt.1.d-10)cycle +! if(dabs(coef_fit*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version + if(dabs(coef_fit*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-3/2)).lt.1.d-10)cycle ! call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, & ! expo_fit, i, j, int_fit_v, n_points_final_grid) @@ -200,7 +201,8 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_ double precision, external :: overlap_gauss_r12_ao double precision, external :: overlap_gauss_r12_ao_with1s - double precision :: factor_ij_1s,beta_ij,center_ij_1s(3) + double precision :: factor_ij_1s,beta_ij,center_ij_1s(3),sq_pi_3_2 + sq_pi_3_2 = (dacos(-1.d0))**(3/2) provide mu_erf final_grid_points j1b_pen call wall_time(wall0) @@ -213,7 +215,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_ !$OMP SHARED (n_points_final_grid, ao_num, List_comb_thr_b3_size, & !$OMP final_grid_points, ng_fit_jast, & !$OMP expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2, & - !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo, & + !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo,sq_pi_3_2, & !$OMP List_comb_thr_b3_cent, int2_u2_j1b2_test,ao_abs_comb_b3_j1b) !$OMP DO do ipoint = 1, n_points_final_grid @@ -242,7 +244,8 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_ coef_fit = coef_gauss_j_mu_x_2(i_fit) !DIR$ FORCEINLINE call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s) - if(dabs(coef_fit*coef*factor_ij_1s*int_j1b).lt.1.d-10)cycle +! if(dabs(coef_fit*coef*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version + if(dabs(coef_fit*coef*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-3/2)).lt.1.d-10)cycle ! --- @@ -291,8 +294,8 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num double precision :: coef, beta, B_center(3), dist double precision :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, coef_tmp double precision :: tmp_x, tmp_y, tmp_z, int_j1b - double precision :: wall0, wall1 - + double precision :: wall0, wall1, sq_pi_3_2,sq_alpha + sq_pi_3_2 = dacos(-1.D0)**(3/2) provide mu_erf final_grid_points j1b_pen call wall_time(wall0) @@ -302,12 +305,12 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, & !$OMP coef_fit, expo_fit, int_fit, alpha_1s, dist, & !$OMP alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp, & - !$OMP tmp_x, tmp_y, tmp_z,int_j1b) & + !$OMP tmp_x, tmp_y, tmp_z,int_j1b,sq_alpha) & !$OMP SHARED (n_points_final_grid, ao_num, List_comb_thr_b3_size, & !$OMP final_grid_points, ng_fit_jast, & !$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, & !$OMP List_comb_thr_b3_coef, List_comb_thr_b3_expo, & - !$OMP List_comb_thr_b3_cent, int2_u_grad1u_x_j1b2_test,ao_abs_comb_b3_j1b) + !$OMP List_comb_thr_b3_cent, int2_u_grad1u_x_j1b2_test,ao_abs_comb_b3_j1b,sq_pi_3_2) !$OMP DO do ipoint = 1, n_points_final_grid @@ -348,7 +351,9 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist coef_tmp = coef * coef_fit * dexp(-expo_coef_1s) - if(dabs(coef_tmp*int_j1b) .lt. 1d-10) cycle + sq_alpha = alpha_1s_inv * dsqrt(alpha_1s_inv) +! if(dabs(coef_tmp*int_j1b) .lt. 1d-10) cycle ! old version + if(dabs(coef_tmp*int_j1b*sq_pi_3_2*sq_alpha) .lt. 1d-10) cycle call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit) @@ -450,7 +455,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p expo_fit = expo_gauss_j_mu_1_erf(i_fit) call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s) -! if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-3/2).lt.1.d-15)cycle + if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-3/2).lt.1.d-15)cycle coef_fit = coef_gauss_j_mu_1_erf(i_fit) alpha_1s = beta + expo_fit diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f index 13ca41f2..f71a66e6 100644 --- a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f +++ b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f @@ -50,7 +50,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num, B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i) B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i) - + ! TODO :: cycle on the 1 - erf(mur12) int_mu = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r) int_coulomb = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, 1.d+9, r) @@ -122,6 +122,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2,int_j1b,factor_ij_1s,beta_ij,center_ij_1s dsqpi_3_2 = (dacos(-1.d0))**(3/2) + provide expo_erfc_mu_gauss ao_prod_sigma ao_prod_center call wall_time(wall0) x_v_ij_erf_rk_cst_mu_tmp_j1b_test = 0.d0 @@ -132,9 +133,9 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num !$OMP SHARED (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points,& !$OMP List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent, & !$OMP x_v_ij_erf_rk_cst_mu_tmp_j1b_test, mu_erf,ao_abs_comb_b2_j1b, & - !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2) + !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma) +! !$OMP ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2,expo_erfc_mu_gauss) !$OMP DO - !do ipoint = 1, 10 do ipoint = 1, n_points_final_grid r(1) = final_grid_points(1,ipoint) r(2) = final_grid_points(2,ipoint) @@ -142,7 +143,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num do i = 1, ao_num do j = i, ao_num - if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle + if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle tmp_x = 0.d0 tmp_y = 0.d0 @@ -157,10 +158,14 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i) B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i) - ! approximate 1 - erf(mu r12) = exp(-2 mu r12^2) -! !DIR$ FORCEINLINE -! call gaussian_product(expo_good_j_mu_1gauss,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s) -! if(dabs(coef * factor_ij_1s*int_j1b).lt.1.d-10)cycle +! if(ao_prod_center(1,j,i).ne.10000.d0)then +! ! approximate 1 - erf(mu r12) by a gaussian * 10 +! !DIR$ FORCEINLINE +! call gaussian_product(expo_erfc_mu_gauss,r, & +! ao_prod_sigma(j,i),ao_prod_center(1,j,i), & +! factor_ij_1s,beta_ij,center_ij_1s) +! if(dabs(coef * factor_ij_1s*int_j1b*10.d0 * dsqpi_3_2 * beta_ij**(-3/2)).lt.1.d-10)cycle +! endif call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints ) call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, 1.d+9, r, ints_coulomb) diff --git a/src/ao_tc_eff_map/fit_j.irp.f b/src/ao_tc_eff_map/fit_j.irp.f index d861054e..902d4514 100644 --- a/src/ao_tc_eff_map/fit_j.irp.f +++ b/src/ao_tc_eff_map/fit_j.irp.f @@ -13,6 +13,16 @@ END_PROVIDER ! --- +BEGIN_PROVIDER [ double precision, expo_erfc_gauss ] + implicit none + expo_erfc_gauss = 1.41211d0 +END_PROVIDER + +BEGIN_PROVIDER [ double precision, expo_erfc_mu_gauss ] + implicit none + expo_erfc_mu_gauss = expo_erfc_gauss * mu_erf * mu_erf +END_PROVIDER + BEGIN_PROVIDER [ double precision, expo_good_j_mu_1gauss ] &BEGIN_PROVIDER [ double precision, coef_good_j_mu_1gauss ] implicit none diff --git a/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f b/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f index 9393668f..39ea0cdf 100644 --- a/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f +++ b/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f @@ -1,5 +1,28 @@ + +BEGIN_PROVIDER [ double precision, ao_abs_int_grid, (ao_num)] + implicit none + BEGIN_DOC +! ao_abs_int_grid(i) = \int dr |phi_i(r) | + END_DOC + integer :: i,j,ipoint + double precision :: contrib, weight,r(3) + ao_abs_int_grid = 0.D0 + do ipoint = 1,n_points_final_grid + r(:) = final_grid_points(:,ipoint) + weight = final_weight_at_r_vector(ipoint) + do i = 1, ao_num + contrib = dabs(aos_in_r_array(i,ipoint)) * weight + ao_abs_int_grid(i) += contrib + enddo + enddo + +END_PROVIDER + BEGIN_PROVIDER [ double precision, ao_overlap_abs_grid, (ao_num, ao_num)] implicit none + BEGIN_DOC +! ao_overlap_abs_grid(j,i) = \int dr |phi_i(r) phi_j(r)| + END_DOC integer :: i,j,ipoint double precision :: contrib, weight,r(3) ao_overlap_abs_grid = 0.D0 @@ -21,7 +44,7 @@ BEGIN_PROVIDER [ double precision, ao_prod_center, (3, ao_num, ao_num)] BEGIN_DOC ! ao_prod_center(1:3,j,i) = \int dr |phi_i(r) phi_j(r)| x/y/z / \int |phi_i(r) phi_j(r)| ! -! if \int |phi_i(r) phi_j(r)| < 1.d-15 then ao_prod_center = 0. +! if \int |phi_i(r) phi_j(r)| < 1.d-10 then ao_prod_center = 10000. END_DOC integer :: i,j,m,ipoint double precision :: contrib, weight,r(3) @@ -38,26 +61,29 @@ BEGIN_PROVIDER [ double precision, ao_prod_center, (3, ao_num, ao_num)] enddo enddo enddo -! do i = 1, ao_num -! do j = 1, ao_num -! if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-10)then -! do m = 1, 3 -! ao_prod_center(m,j,i) *= 1.d0/ao_overlap_abs_grid(j,i) -! enddo -! endif -! enddo -! enddo + do i = 1, ao_num + do j = 1, ao_num + if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-10)then + do m = 1, 3 + ao_prod_center(m,j,i) *= 1.d0/ao_overlap_abs_grid(j,i) + enddo + else + do m = 1, 3 + ao_prod_center(m,j,i) = 10000.d0 + enddo + endif + enddo + enddo END_PROVIDER -BEGIN_PROVIDER [ double precision, ao_prod_sigma, (ao_num, ao_num)] +BEGIN_PROVIDER [ double precision, ao_prod_abs_r, (ao_num, ao_num)] implicit none BEGIN_DOC -! ao_prod_sigma(i,j) = \int |phi_i(r) phi_j(r)| dsqrt((x - <|i|x|j|>)^2 + (y - <|i|y|j|>)^2 +(z - <|i|z|j|>)^2) / \int |phi_i(r) phi_j(r)| +! ao_prod_abs_r(i,j) = \int |phi_i(r) phi_j(r)| dsqrt((x - <|i|x|j|>)^2 + (y - <|i|y|j|>)^2 +(z - <|i|z|j|>)^2) / \int |phi_i(r) phi_j(r)| ! -! gives you a precise idea of the spatial extension of the distribution phi_i(r) phi_j(r) END_DOC - ao_prod_sigma = 0.d0 + ao_prod_abs_r = 0.d0 integer :: i,j,m,ipoint double precision :: contrib, weight,r(3),contrib_x2 do ipoint = 1,n_points_final_grid @@ -71,21 +97,34 @@ BEGIN_PROVIDER [ double precision, ao_prod_sigma, (ao_num, ao_num)] contrib_x2 += (r(m) - ao_prod_center(m,j,i)) * (r(m) - ao_prod_center(m,j,i)) enddo contrib_x2 = dsqrt(contrib_x2) - ao_prod_sigma(j,i) += contrib * contrib_x2 + ao_prod_abs_r(j,i) += contrib * contrib_x2 enddo enddo enddo -! do i = 1, ao_num -! do j = 1, ao_num -! if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-10)then -! ao_prod_sigma(j,i) *= 1.d0/ao_overlap_abs_grid(j,i) -! endif -! enddo -! enddo END_PROVIDER + BEGIN_PROVIDER [double precision, ao_prod_sigma, (ao_num, ao_num)] + implicit none + BEGIN_DOC +! Gaussian exponent reproducing the product |chi_i(r) chi_j(r)| +! +! Therefore |chi_i(r) chi_j(r)| \approx e^{-ao_prod_sigma(j,i) (r - ao_prod_center(1:3,j,i))**2} + END_DOC + integer :: i,j + double precision :: pi,alpha + pi = dacos(-1.d0) + do i = 1, ao_num + do j = 1, ao_num +! if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-5)then + alpha = 1.d0/pi * (2.d0*ao_overlap_abs_grid(j,i)/ao_prod_abs_r(j,i))**2 + ao_prod_sigma(j,i) = alpha +! endif + enddo + enddo + END_PROVIDER + BEGIN_PROVIDER [ double precision, ao_prod_dist_grid, (ao_num, ao_num, n_points_final_grid)] implicit none BEGIN_DOC diff --git a/src/non_h_ints_mu/grad_squared_manu.irp.f b/src/non_h_ints_mu/grad_squared_manu.irp.f index e1fc4d86..14749082 100644 --- a/src/non_h_ints_mu/grad_squared_manu.irp.f +++ b/src/non_h_ints_mu/grad_squared_manu.irp.f @@ -99,6 +99,7 @@ BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b_test, (ao_num, ao double precision :: time0, time1 double precision, external :: overlap_gauss_r12_ao + provide int2_u_grad1u_x_j1b2_test print*, ' providing u12_grad1_u12_j1b_grad1_j1b_test ...' call wall_time(time0) @@ -147,7 +148,7 @@ BEGIN_PROVIDER [ double precision, grad12_j12_test, (ao_num, ao_num, n_points_fi double precision :: tmp1 double precision :: time0, time1 double precision, external :: overlap_gauss_r12_ao - + provide int2_grad1u2_grad2u2_j1b2_test print*, ' providing grad12_j12_test ...' call wall_time(time0) diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f index d0217423..a81b09d5 100644 --- a/src/tc_scf/test_int.irp.f +++ b/src/tc_scf/test_int.irp.f @@ -41,14 +41,21 @@ program test_ints ! call test_total_grad_square ! call test_ao_tc_int_chemist ! call test_grid_points_ao - call test_tc_scf +! call test_tc_scf + call test_int_gauss end subroutine test_tc_scf implicit none + integer :: i +! provide int2_u_grad1u_x_j1b2_test + provide x_v_ij_erf_rk_cst_mu_tmp_j1b_test +! do i = 1, ng_fit_jast +! print*,expo_gauss_1_erf_x_2(i),coef_gauss_1_erf_x_2(i) +! enddo ! provide tc_grad_square_ao_test - provide tc_grad_and_lapl_ao_test +! provide tc_grad_and_lapl_ao_test ! provide int2_u_grad1u_x_j1b2_test ! provide x_v_ij_erf_rk_cst_mu_tmp_j1b_test ! print*,'TC_HF_energy = ',TC_HF_energy @@ -657,3 +664,41 @@ subroutine test_grid_points_ao enddo enddo end + +subroutine test_int_gauss + implicit none + integer :: i,j + print*,'center' + do i = 1, ao_num + do j = i, ao_num + print*,j,i + print*,ao_prod_sigma(j,i),ao_overlap_abs_grid(j,i) + print*,ao_prod_center(1:3,j,i) + enddo + enddo + print*,'' + double precision :: weight, r(3),integral_1,pi,center(3),f_r,alpha,distance,integral_2 + center = 0.d0 + pi = dacos(-1.d0) + integral_1 = 0.d0 + integral_2 = 0.d0 + alpha = 0.75d0 + do i = 1, n_points_final_grid + ! you get x, y and z of the ith grid point + r(1) = final_grid_points(1,i) + r(2) = final_grid_points(2,i) + r(3) = final_grid_points(3,i) + weight = final_weight_at_r_vector(i) + distance = dsqrt( (r(1) - center(1))**2 + (r(2) - center(2))**2 + (r(3) - center(3))**2 ) + f_r = dexp(-alpha * distance*distance) + ! you add the contribution of the grid point to the integral + integral_1 += f_r * weight + integral_2 += f_r * distance * weight + enddo + print*,'integral_1 =',integral_1 + print*,'(pi/alpha)**1.5 =',(pi / alpha)**1.5 + print*,'integral_2 =',integral_2 + print*,'(pi/alpha)**1.5 =',2.d0*pi / (alpha)**2 + + +end