trying to optimize the most intensive part

2025-02-18 22:54:03 +01:00 · 2022-12-14 18:47:32 +01:00 · 2022-12-14 18:47:32 +01:00 · 0b7c5fcb97
commit 0b7c5fcb97
parent a34653b5d1
6 changed files with 151 additions and 46 deletions
--- a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
+++ b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
@ -17,8 +17,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
  double precision, allocatable :: int_fit_v(:)
  double precision, external    :: overlap_gauss_r12_ao_with1s
  double precision :: int_gauss,dsqpi_3_2,int_j1b
-  double precision :: factor_ij_1s,beta_ij,center_ij_1s(3)
-  dsqpi_3_2 = (dacos(-1.d0))**(3/2)
+  double precision :: factor_ij_1s,beta_ij,center_ij_1s(3),sq_pi_3_2 
+  sq_pi_3_2 = (dacos(-1.d0))**(3/2)

  provide mu_erf final_grid_points_transp j1b_pen List_comb_thr_b3_coef
  call wall_time(wall0)
@ -33,7 +33,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
     !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,      &
     !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,    &
     !$OMP          List_comb_thr_b3_cent, int2_grad1u2_grad2u2_j1b2_test, ao_abs_comb_b3_j1b,&
-     !$OMP          ao_overlap_abs,dsqpi_3_2)
+     !$OMP          ao_overlap_abs,sq_pi_3_2)
 !$OMP DO SCHEDULE(dynamic)
 do ipoint = 1, n_points_final_grid
   r(1) = final_grid_points(1,ipoint)
@ -60,7 +60,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
           !DIR$ FORCEINLINE
           call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
           coef_fit = -0.25d0 *  coef_gauss_1_erf_x_2(i_fit) * coef
-           if(dabs(coef_fit*factor_ij_1s*int_j1b).lt.1.d-10)cycle
+!           if(dabs(coef_fit*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version
+           if(dabs(coef_fit*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-3/2)).lt.1.d-10)cycle
  
 !           call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, &
 !                 expo_fit, i, j, int_fit_v, n_points_final_grid)
@ -200,7 +201,8 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_

  double precision, external    :: overlap_gauss_r12_ao
  double precision, external    :: overlap_gauss_r12_ao_with1s
-  double precision :: factor_ij_1s,beta_ij,center_ij_1s(3)
+  double precision :: factor_ij_1s,beta_ij,center_ij_1s(3),sq_pi_3_2
+  sq_pi_3_2 = (dacos(-1.d0))**(3/2)

  provide mu_erf final_grid_points j1b_pen
  call wall_time(wall0)
@ -213,7 +215,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_
 !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, & 
 !$OMP          final_grid_points, ng_fit_jast,                     &
 !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,           &
- !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,       & 
+ !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,sq_pi_3_2,       & 
 !$OMP          List_comb_thr_b3_cent, int2_u2_j1b2_test,ao_abs_comb_b3_j1b)
 !$OMP DO
  do ipoint = 1, n_points_final_grid
@ -242,7 +244,8 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_
            coef_fit = coef_gauss_j_mu_x_2(i_fit)
            !DIR$ FORCEINLINE
            call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
-            if(dabs(coef_fit*coef*factor_ij_1s*int_j1b).lt.1.d-10)cycle
+!            if(dabs(coef_fit*coef*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version
+            if(dabs(coef_fit*coef*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-3/2)).lt.1.d-10)cycle
          
            ! ---
          
@ -291,8 +294,8 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num
  double precision :: coef, beta, B_center(3), dist
  double precision :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, coef_tmp
  double precision :: tmp_x, tmp_y, tmp_z, int_j1b
-  double precision :: wall0, wall1
-
+  double precision :: wall0, wall1, sq_pi_3_2,sq_alpha
+  sq_pi_3_2 = dacos(-1.D0)**(3/2)
  provide mu_erf final_grid_points j1b_pen
  call wall_time(wall0)

@ -302,12 +305,12 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num
 !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
 !$OMP          coef_fit, expo_fit, int_fit, alpha_1s, dist,        &
 !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp,     & 
- !$OMP          tmp_x, tmp_y, tmp_z,int_j1b)                        & 
+ !$OMP          tmp_x, tmp_y, tmp_z,int_j1b,sq_alpha)                        & 
 !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, & 
 !$OMP          final_grid_points, ng_fit_jast,                     &
 !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
 !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,       & 
- !$OMP          List_comb_thr_b3_cent, int2_u_grad1u_x_j1b2_test,ao_abs_comb_b3_j1b)
+ !$OMP          List_comb_thr_b3_cent, int2_u_grad1u_x_j1b2_test,ao_abs_comb_b3_j1b,sq_pi_3_2)
 !$OMP DO

  do ipoint = 1, n_points_final_grid
@ -348,7 +351,9 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num

            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist 
            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
-            if(dabs(coef_tmp*int_j1b) .lt. 1d-10) cycle
+            sq_alpha = alpha_1s_inv * dsqrt(alpha_1s_inv)
+!            if(dabs(coef_tmp*int_j1b) .lt. 1d-10) cycle ! old version
+            if(dabs(coef_tmp*int_j1b*sq_pi_3_2*sq_alpha) .lt. 1d-10) cycle
            
            call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit)

@ -450,7 +455,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p

            expo_fit = expo_gauss_j_mu_1_erf(i_fit)
            call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
-!            if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-3/2).lt.1.d-15)cycle
+            if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-3/2).lt.1.d-15)cycle
            coef_fit = coef_gauss_j_mu_1_erf(i_fit)

            alpha_1s     = beta + expo_fit
--- a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
+++ b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
@ -50,7 +50,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num,
          B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
          B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
          B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
-
+          ! TODO :: cycle on the 1 - erf(mur12)
          int_mu      = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r)
          int_coulomb = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r)

@ -122,6 +122,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num
  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2,int_j1b,factor_ij_1s,beta_ij,center_ij_1s
  dsqpi_3_2 = (dacos(-1.d0))**(3/2)

+  provide expo_erfc_mu_gauss ao_prod_sigma ao_prod_center
  call wall_time(wall0)

  x_v_ij_erf_rk_cst_mu_tmp_j1b_test = 0.d0
@ -132,9 +133,9 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num
 !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points,&
 !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent,  &
 !$OMP          x_v_ij_erf_rk_cst_mu_tmp_j1b_test, mu_erf,ao_abs_comb_b2_j1b,         &
- !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
+ !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma)
+! !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2,expo_erfc_mu_gauss)
 !$OMP DO
-  !do ipoint = 1, 10
  do ipoint = 1, n_points_final_grid
    r(1) = final_grid_points(1,ipoint)
    r(2) = final_grid_points(2,ipoint)
@ -142,7 +143,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num

    do i = 1, ao_num
      do j = i, ao_num
-        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle

        tmp_x = 0.d0
        tmp_y = 0.d0
@ -157,10 +158,14 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num
          B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
          B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)

-          ! approximate 1 - erf(mu r12) = exp(-2 mu r12^2)
-!          !DIR$ FORCEINLINE
-!          call gaussian_product(expo_good_j_mu_1gauss,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
-!          if(dabs(coef * factor_ij_1s*int_j1b).lt.1.d-10)cycle 
+!          if(ao_prod_center(1,j,i).ne.10000.d0)then
+!           ! approximate 1 - erf(mu r12) by a gaussian * 10
+!           !DIR$ FORCEINLINE
+!           call gaussian_product(expo_erfc_mu_gauss,r,     &
+!                ao_prod_sigma(j,i),ao_prod_center(1,j,i),  & 
+!                factor_ij_1s,beta_ij,center_ij_1s)
+!           if(dabs(coef * factor_ij_1s*int_j1b*10.d0 * dsqpi_3_2 * beta_ij**(-3/2)).lt.1.d-10)cycle 
+!          endif
          call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints        )
          call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r, ints_coulomb)

--- a/src/ao_tc_eff_map/fit_j.irp.f
+++ b/src/ao_tc_eff_map/fit_j.irp.f
@ -13,6 +13,16 @@
 END_PROVIDER 
 ! ---

+BEGIN_PROVIDER [ double precision, expo_erfc_gauss ]
+ implicit none 
+ expo_erfc_gauss = 1.41211d0
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, expo_erfc_mu_gauss ]
+ implicit none 
+ expo_erfc_mu_gauss = expo_erfc_gauss * mu_erf * mu_erf
+END_PROVIDER 
+
 BEGIN_PROVIDER [ double precision, expo_good_j_mu_1gauss ]
 &BEGIN_PROVIDER [ double precision, coef_good_j_mu_1gauss ]
 implicit none
--- a/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
+++ b/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
@ -1,5 +1,28 @@
+
+BEGIN_PROVIDER [ double precision, ao_abs_int_grid, (ao_num)]
+ implicit none
+ BEGIN_DOC
+! ao_abs_int_grid(i) = \int dr |phi_i(r) |
+ END_DOC
+ integer :: i,j,ipoint
+ double precision :: contrib, weight,r(3)
+ ao_abs_int_grid = 0.D0
+ do ipoint = 1,n_points_final_grid 
+  r(:) = final_grid_points(:,ipoint)
+  weight = final_weight_at_r_vector(ipoint)
+  do i = 1, ao_num
+    contrib = dabs(aos_in_r_array(i,ipoint)) * weight
+    ao_abs_int_grid(i) += contrib 
+  enddo
+ enddo
+
+END_PROVIDER 
+
 BEGIN_PROVIDER [ double precision, ao_overlap_abs_grid, (ao_num, ao_num)]
 implicit none
+ BEGIN_DOC
+! ao_overlap_abs_grid(j,i) = \int dr |phi_i(r) phi_j(r)| 
+ END_DOC
 integer :: i,j,ipoint
 double precision :: contrib, weight,r(3)
 ao_overlap_abs_grid = 0.D0
@ -21,7 +44,7 @@ BEGIN_PROVIDER [ double precision, ao_prod_center, (3, ao_num, ao_num)]
 BEGIN_DOC
 ! ao_prod_center(1:3,j,i) = \int dr |phi_i(r) phi_j(r)| x/y/z / \int |phi_i(r) phi_j(r)|
 !
-! if \int |phi_i(r) phi_j(r)| < 1.d-15 then ao_prod_center = 0.
+! if \int |phi_i(r) phi_j(r)| < 1.d-10 then ao_prod_center = 10000.
 END_DOC
 integer :: i,j,m,ipoint
 double precision :: contrib, weight,r(3)
@ -38,26 +61,29 @@ BEGIN_PROVIDER [ double precision, ao_prod_center, (3, ao_num, ao_num)]
   enddo
  enddo
 enddo
-! do i = 1, ao_num
-!  do j = 1, ao_num
-!   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-10)then
-!    do m = 1, 3
-!     ao_prod_center(m,j,i) *= 1.d0/ao_overlap_abs_grid(j,i)
-!    enddo
-!   endif
-!  enddo
-! enddo
+ do i = 1, ao_num
+  do j = 1, ao_num
+   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-10)then
+    do m = 1, 3
+     ao_prod_center(m,j,i) *= 1.d0/ao_overlap_abs_grid(j,i)
+    enddo
+   else
+    do m = 1, 3
+     ao_prod_center(m,j,i) = 10000.d0
+    enddo
+   endif
+  enddo
+ enddo

 END_PROVIDER 

-BEGIN_PROVIDER [ double precision, ao_prod_sigma, (ao_num, ao_num)]
+BEGIN_PROVIDER [ double precision, ao_prod_abs_r, (ao_num, ao_num)]
 implicit none
 BEGIN_DOC
-! ao_prod_sigma(i,j) = \int |phi_i(r) phi_j(r)| dsqrt((x - <|i|x|j|>)^2 + (y - <|i|y|j|>)^2 +(z - <|i|z|j|>)^2) / \int |phi_i(r) phi_j(r)|
+! ao_prod_abs_r(i,j) = \int |phi_i(r) phi_j(r)| dsqrt((x - <|i|x|j|>)^2 + (y - <|i|y|j|>)^2 +(z - <|i|z|j|>)^2) / \int |phi_i(r) phi_j(r)|
 !
-! gives you a precise idea of the spatial extension of the distribution phi_i(r) phi_j(r)
 END_DOC
- ao_prod_sigma = 0.d0
+ ao_prod_abs_r = 0.d0
 integer :: i,j,m,ipoint
 double precision :: contrib, weight,r(3),contrib_x2
 do ipoint = 1,n_points_final_grid 
@ -71,21 +97,34 @@ BEGIN_PROVIDER [ double precision, ao_prod_sigma, (ao_num, ao_num)]
     contrib_x2 += (r(m) - ao_prod_center(m,j,i)) * (r(m) - ao_prod_center(m,j,i)) 
    enddo
    contrib_x2 = dsqrt(contrib_x2)
-    ao_prod_sigma(j,i) += contrib * contrib_x2
+    ao_prod_abs_r(j,i) += contrib * contrib_x2
   enddo
  enddo
 enddo

-! do i = 1, ao_num
-!  do j = 1, ao_num
-!   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-10)then
-!     ao_prod_sigma(j,i) *= 1.d0/ao_overlap_abs_grid(j,i)
-!   endif
-!  enddo
-! enddo

 END_PROVIDER 

+ BEGIN_PROVIDER [double precision, ao_prod_sigma, (ao_num, ao_num)]
+ implicit none
+ BEGIN_DOC
+! Gaussian exponent reproducing the product |chi_i(r) chi_j(r)| 
+!
+! Therefore |chi_i(r) chi_j(r)|  \approx e^{-ao_prod_sigma(j,i) (r - ao_prod_center(1:3,j,i))**2}
+ END_DOC
+ integer :: i,j
+ double precision :: pi,alpha
+ pi = dacos(-1.d0)
+ do i = 1, ao_num
+  do j = 1, ao_num
+!   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-5)then
+     alpha = 1.d0/pi * (2.d0*ao_overlap_abs_grid(j,i)/ao_prod_abs_r(j,i))**2
+     ao_prod_sigma(j,i) = alpha
+!   endif
+  enddo
+ enddo
+ END_PROVIDER 
+
 BEGIN_PROVIDER [ double precision, ao_prod_dist_grid, (ao_num, ao_num, n_points_final_grid)]
 implicit none
 BEGIN_DOC
--- a/src/non_h_ints_mu/grad_squared_manu.irp.f
+++ b/src/non_h_ints_mu/grad_squared_manu.irp.f
@ -99,6 +99,7 @@ BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b_test, (ao_num, ao
  double precision           :: time0, time1
  double precision, external :: overlap_gauss_r12_ao

+  provide int2_u_grad1u_x_j1b2_test
  print*, ' providing u12_grad1_u12_j1b_grad1_j1b_test ...'
  call wall_time(time0)

@ -147,7 +148,7 @@ BEGIN_PROVIDER [ double precision, grad12_j12_test, (ao_num, ao_num, n_points_fi
  double precision           :: tmp1
  double precision           :: time0, time1
  double precision, external :: overlap_gauss_r12_ao
-
+  provide int2_grad1u2_grad2u2_j1b2_test
  print*, ' providing grad12_j12_test ...'
  call wall_time(time0)

--- a/src/tc_scf/test_int.irp.f
+++ b/src/tc_scf/test_int.irp.f
@ -41,14 +41,21 @@ program test_ints
 ! call test_total_grad_square
 ! call test_ao_tc_int_chemist
 ! call test_grid_points_ao
- call test_tc_scf
+! call test_tc_scf
+ call test_int_gauss

 end

 subroutine test_tc_scf
 implicit none
+ integer :: i
+! provide int2_u_grad1u_x_j1b2_test
+ provide x_v_ij_erf_rk_cst_mu_tmp_j1b_test
+! do i = 1, ng_fit_jast
+!  print*,expo_gauss_1_erf_x_2(i),coef_gauss_1_erf_x_2(i)
+! enddo
 ! provide tc_grad_square_ao_test
-  provide tc_grad_and_lapl_ao_test
+!  provide tc_grad_and_lapl_ao_test
 ! provide int2_u_grad1u_x_j1b2_test
 ! provide x_v_ij_erf_rk_cst_mu_tmp_j1b_test
 ! print*,'TC_HF_energy = ',TC_HF_energy
@ -657,3 +664,41 @@ subroutine test_grid_points_ao
  enddo
 enddo
 end
+
+subroutine test_int_gauss
+ implicit none
+ integer :: i,j
+ print*,'center'
+ do i = 1, ao_num
+  do j = i, ao_num
+   print*,j,i
+   print*,ao_prod_sigma(j,i),ao_overlap_abs_grid(j,i)
+   print*,ao_prod_center(1:3,j,i)
+  enddo
+ enddo
+ print*,''
+ double precision :: weight, r(3),integral_1,pi,center(3),f_r,alpha,distance,integral_2
+ center = 0.d0
+ pi = dacos(-1.d0)
+ integral_1 = 0.d0
+ integral_2 = 0.d0
+ alpha = 0.75d0
+ do i = 1,  n_points_final_grid
+  ! you get x, y and z of the ith grid point
+  r(1) = final_grid_points(1,i)
+  r(2) = final_grid_points(2,i)
+  r(3) = final_grid_points(3,i)
+  weight = final_weight_at_r_vector(i)
+  distance = dsqrt( (r(1) - center(1))**2 +  (r(2) - center(2))**2 + (r(3) - center(3))**2 )
+  f_r = dexp(-alpha * distance*distance)
+  ! you add the contribution of the grid point to the integral
+  integral_1 += f_r * weight
+  integral_2 += f_r * distance * weight
+ enddo
+ print*,'integral_1      =',integral_1
+ print*,'(pi/alpha)**1.5 =',(pi / alpha)**1.5
+ print*,'integral_2      =',integral_2
+ print*,'(pi/alpha)**1.5 =',2.d0*pi / (alpha)**2
+
+
+end