From 8534b5c104f00f1484c1d2f5b866a75744632042 Mon Sep 17 00:00:00 2001
From: AbdAmmar <abd.ammar.phys@gmail.com>
Date: Wed, 17 Jan 2024 19:23:24 +0100
Subject: [PATCH] fixed bug for env_type = None

---
 .../ao_many_one_e_ints/grad2_jmu_modif.irp.f  |  58 ++--
 plugins/local/non_h_ints_mu/tc_integ.irp.f    |  76 +----
 .../local/non_h_ints_mu/test_non_h_ints.irp.f | 265 +++++++++++++++++-
 .../local/non_h_ints_mu/total_tc_int.irp.f    |   3 -
 .../local/tc_bi_ortho/slater_tc_slow.irp.f    |   2 +-
 5 files changed, 297 insertions(+), 107 deletions(-)

diff --git a/plugins/local/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/plugins/local/ao_many_one_e_ints/grad2_jmu_modif.irp.f
index b1fc6134..bdcaac9d 100644
--- a/plugins/local/ao_many_one_e_ints/grad2_jmu_modif.irp.f
+++ b/plugins/local/ao_many_one_e_ints/grad2_jmu_modif.irp.f
@@ -6,7 +6,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2, (ao_num, ao_num, n_poin
 
   BEGIN_DOC
   !
-  ! -\frac{1}{4} x int dr2 phi_i(r2) phi_j(r2) [1 - erf(mu r12)]^2
+  ! \frac{1}{4} x int dr2 phi_i(r2) phi_j(r2) [1 - erf(mu r12)]^2
   !
   END_DOC
 
@@ -45,7 +45,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2, (ao_num, ao_num, n_poin
           expo_fit = expo_gauss_1_erf_x_2(i_fit)
           coef_fit = coef_gauss_1_erf_x_2(i_fit)
 
-          tmp += -0.25d0 * coef_fit * overlap_gauss_r12_ao(r, expo_fit, i, j)
+          tmp += 0.25d0 * coef_fit * overlap_gauss_r12_ao(r, expo_fit, i, j)
         enddo
 
         int2_grad1u2_grad2u2(j,i,ipoint) = tmp
@@ -96,13 +96,13 @@ BEGIN_PROVIDER [double precision, int2_grad1u2_grad2u2_env2, (ao_num, ao_num, n_
 
   int2_grad1u2_grad2u2_env2 = 0.d0
 
-  !$OMP PARALLEL DEFAULT (NONE)                                      &
-  !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
-  !$OMP          coef_fit, expo_fit, int_fit, tmp)                   & 
+  !$OMP PARALLEL DEFAULT (NONE)                                       &
+  !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,  &
+  !$OMP          coef_fit, expo_fit, int_fit, tmp)                    & 
   !$OMP SHARED  (n_points_final_grid, ao_num, List_env1s_square_size, & 
-  !$OMP          final_grid_points, ng_fit_jast,                     &
-  !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,         &
-  !$OMP          List_env1s_square_coef, List_env1s_square_expo,       & 
+  !$OMP          final_grid_points, ng_fit_jast,                      &
+  !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,          &
+  !$OMP          List_env1s_square_coef, List_env1s_square_expo,      & 
   !$OMP          List_env1s_square_cent, int2_grad1u2_grad2u2_env2)
   !$OMP DO
   do ipoint = 1, n_points_final_grid
@@ -192,13 +192,13 @@ BEGIN_PROVIDER [double precision, int2_u2_env2, (ao_num, ao_num, n_points_final_
 
   int2_u2_env2 = 0.d0
 
-  !$OMP PARALLEL DEFAULT (NONE)                                      &
-  !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
-  !$OMP          coef_fit, expo_fit, int_fit, tmp)                   & 
+  !$OMP PARALLEL DEFAULT (NONE)                                       &
+  !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,  &
+  !$OMP          coef_fit, expo_fit, int_fit, tmp)                    & 
   !$OMP SHARED  (n_points_final_grid, ao_num, List_env1s_square_size, & 
-  !$OMP          final_grid_points, ng_fit_jast,                     &
-  !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,           &
-  !$OMP          List_env1s_square_coef, List_env1s_square_expo,       & 
+  !$OMP          final_grid_points, ng_fit_jast,                      &
+  !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,            &
+  !$OMP          List_env1s_square_coef, List_env1s_square_expo,      & 
   !$OMP          List_env1s_square_cent, int2_u2_env2)
   !$OMP DO
   do ipoint = 1, n_points_final_grid
@@ -287,15 +287,15 @@ BEGIN_PROVIDER [double precision, int2_u_grad1u_x_env2, (ao_num, ao_num, n_point
 
   int2_u_grad1u_x_env2 = 0.d0
 
- !$OMP PARALLEL DEFAULT (NONE)                                      &
- !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
- !$OMP          coef_fit, expo_fit, int_fit, alpha_1s, dist,        &
- !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp,     & 
- !$OMP          tmp_x, tmp_y, tmp_z)                                & 
+ !$OMP PARALLEL DEFAULT (NONE)                                       &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,  &
+ !$OMP          coef_fit, expo_fit, int_fit, alpha_1s, dist,         &
+ !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp,      & 
+ !$OMP          tmp_x, tmp_y, tmp_z)                                 & 
  !$OMP SHARED  (n_points_final_grid, ao_num, List_env1s_square_size, & 
- !$OMP          final_grid_points, ng_fit_jast,                     &
- !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
- !$OMP          List_env1s_square_coef, List_env1s_square_expo,       & 
+ !$OMP          final_grid_points, ng_fit_jast,                      &
+ !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,        &
+ !$OMP          List_env1s_square_coef, List_env1s_square_expo,      & 
  !$OMP          List_env1s_square_cent, int2_u_grad1u_x_env2)
  !$OMP DO
 
@@ -409,14 +409,14 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_env2, (ao_num, ao_num, n_points
 
   int2_u_grad1u_env2 = 0.d0
 
- !$OMP PARALLEL DEFAULT (NONE)                                      &
- !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
- !$OMP          coef_fit, expo_fit, int_fit, tmp, alpha_1s, dist,   &
- !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp)     & 
+ !$OMP PARALLEL DEFAULT (NONE)                                       &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,  &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp, alpha_1s, dist,    &
+ !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp)      & 
  !$OMP SHARED  (n_points_final_grid, ao_num, List_env1s_square_size, & 
- !$OMP          final_grid_points, ng_fit_jast,                     &
- !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
- !$OMP          List_env1s_square_coef, List_env1s_square_expo,       & 
+ !$OMP          final_grid_points, ng_fit_jast,                      &
+ !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,        &
+ !$OMP          List_env1s_square_coef, List_env1s_square_expo,      & 
  !$OMP          List_env1s_square_cent, int2_u_grad1u_env2)
  !$OMP DO
   do ipoint = 1, n_points_final_grid
diff --git a/plugins/local/non_h_ints_mu/tc_integ.irp.f b/plugins/local/non_h_ints_mu/tc_integ.irp.f
index 2255cb5c..775a9e4c 100644
--- a/plugins/local/non_h_ints_mu/tc_integ.irp.f
+++ b/plugins/local/non_h_ints_mu/tc_integ.irp.f
@@ -207,7 +207,7 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p
       do ipoint = 1, n_points_final_grid
         do j = 1, ao_num
           do i = 1, ao_num
-            int2_grad1_u12_square_ao(i,j,ipoint) = int2_grad1u2_grad2u2(i,j,ipoint)
+            int2_grad1_u12_square_ao(i,j,ipoint) = -0.5d0 * int2_grad1u2_grad2u2(i,j,ipoint)
           enddo
         enddo
       enddo
@@ -323,76 +323,6 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p
 
       endif ! use_ipp
 
-!    elseif((j2e_type .eq. "Mu") .and. (env_type .eq. "Sum_Gauss")) then
-!
-!      PROVIDE mu_erf
-!      PROVIDE env_val env_grad
-!      PROVIDE Ir2_Mu_short_Du2_0 Ir2_Mu_short_Du2_x Ir2_Mu_short_Du2_y Ir2_Mu_short_Du2_z Ir2_Mu_short_Du2_2
-!      PROVIDE Ir2_Mu_long_Du2_0 Ir2_Mu_long_Du2_x Ir2_Mu_long_Du2_y Ir2_Mu_long_Du2_z Ir2_Mu_long_Du2_2
-!      PROVIDE Ir2_Mu_gauss_Du2
-!
-!      tmp_ct  = 1.d0 / (dsqrt(dacos(-1.d0)) * mu_erf)
-!      tmp_ct2 = tmp_ct * tmp_ct
-!
-!      int2_grad1_u12_square_ao = 0.d0
-!
-!      !$OMP PARALLEL                                                &
-!      !$OMP DEFAULT (NONE)                                          &
-!      !$OMP PRIVATE (ipoint, i, j, x, y, z, r2, dx, dy, dz, dr2,    &
-!      !$OMP         tmp1, tmp2, tmp3, tmp4, tmp5, tmp6,             & 
-!      !$OMP         tmp0_x, tmp0_y, tmp0_z, tmp1_x, tmp1_y, tmp1_z) &
-!      !$OMP SHARED (ao_num, n_points_final_grid, final_grid_points, &
-!      !$OMP         tmp_ct, tmp_ct2, env_val, env_grad,             &
-!      !$OMP         Ir2_Mu_long_Du2_0, Ir2_Mu_long_Du2_x,     &
-!      !$OMP         Ir2_Mu_long_Du2_y, Ir2_Mu_long_Du2_z,     &
-!      !$OMP         Ir2_Mu_gauss_Du2, Ir2_Mu_long_Du2_2,      &
-!      !$OMP         Ir2_Mu_short_Du2_0, Ir2_Mu_short_Du2_x,   &
-!      !$OMP         Ir2_Mu_short_Du2_y, Ir2_Mu_short_Du2_z,   &
-!      !$OMP         Ir2_Mu_short_Du2_2, int2_grad1_u12_square_ao)
-!      !$OMP DO SCHEDULE (static)
-!      do ipoint = 1, n_points_final_grid
-!
-!        x  = final_grid_points(1,ipoint)
-!        y  = final_grid_points(2,ipoint)
-!        z  = final_grid_points(3,ipoint)
-!        r2 = x*x + y*y + z*z
-!
-!        dx = env_grad(1,ipoint)
-!        dy = env_grad(2,ipoint)
-!        dz = env_grad(3,ipoint)
-!        dr2 = dx*dx + dy*dy + dz*dz
-!
-!        tmp0_x = 0.5d0 * (dr2 * x + env_val(ipoint) * dx)
-!        tmp0_y = 0.5d0 * (dr2 * y + env_val(ipoint) * dy)
-!        tmp0_z = 0.5d0 * (dr2 * z + env_val(ipoint) * dz)
-!
-!        tmp1 = 0.25d0 * (env_val(ipoint)*env_val(ipoint) + r2*dr2 + 2.d0*env_val(ipoint)*(x*dx+y*dy+z*dz))
-!        tmp3 = 0.25d0 * dr2 
-!        tmp4 = tmp3 * tmp_ct2
-!        tmp5 = 0.50d0 * tmp_ct * (r2*dr2 + env_val(ipoint)*(x*dx+y*dy+z*dz))
-!        tmp6 = 0.50d0 * tmp_ct * dr2
-!
-!        tmp1_x = 0.5d0 * tmp_ct * (2.d0*dr2*x + env_val(ipoint)*dx)
-!        tmp1_y = 0.5d0 * tmp_ct * (2.d0*dr2*y + env_val(ipoint)*dy)
-!        tmp1_z = 0.5d0 * tmp_ct * (2.d0*dr2*z + env_val(ipoint)*dz)
-!
-!        do j = 1, ao_num
-!          do i = 1, ao_num
-!
-!            tmp2 = tmp1_x * Ir2_Mu_long_Du2_x (i,j,ipoint) + tmp1_y * Ir2_Mu_long_Du2_y (i,j,ipoint) + tmp1_z * Ir2_Mu_long_Du2_z (i,j,ipoint) &
-!                 - tmp0_x * Ir2_Mu_short_Du2_x(i,j,ipoint) - tmp0_y * Ir2_Mu_short_Du2_y(i,j,ipoint) - tmp0_z * Ir2_Mu_short_Du2_z(i,j,ipoint)
-!
-!            int2_grad1_u12_square_ao(i,j,ipoint) = tmp1 * Ir2_Mu_short_Du2_0(i,j,ipoint) + tmp2 + tmp3 * Ir2_Mu_short_Du2_2(i,j,ipoint) &
-!                                                 + tmp4 * Ir2_Mu_gauss_Du2(i,j,ipoint) - tmp5 * Ir2_Mu_long_Du2_0(i,j,ipoint)           &
-!                                                 - tmp6 * Ir2_Mu_long_Du2_2(i,j,ipoint)
-!          enddo
-!        enddo
-!      enddo
-!      !$OMP END DO
-!      !$OMP END PARALLEL
-!
-!      int2_grad1_u12_square_ao = -0.5d0 * int2_grad1_u12_square_ao
-
     else 
 
       print *, ' Error in int2_grad1_u12_square_ao: Unknown Jastrow'
@@ -409,8 +339,8 @@ BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_p
       PROVIDE j1e_gradx j1e_grady j1e_gradz
       PROVIDE int2_grad1_u2e_ao
 
-      tmp_ct1 = 2.d0 / (dble(elec_num) - 1.d0)
-      tmp_ct2 = 1.d0 / ((dble(elec_num) - 1.d0) * (dble(elec_num) - 1.d0))
+      tmp_ct1 = -1.0d0 / (dble(elec_num) - 1.d0)
+      tmp_ct2 = -0.5d0 / ((dble(elec_num) - 1.d0) * (dble(elec_num) - 1.d0))
 
       !$OMP PARALLEL                                 &
       !$OMP DEFAULT (NONE)                           &
diff --git a/plugins/local/non_h_ints_mu/test_non_h_ints.irp.f b/plugins/local/non_h_ints_mu/test_non_h_ints.irp.f
index 3f88c53f..90e5a7b3 100644
--- a/plugins/local/non_h_ints_mu/test_non_h_ints.irp.f
+++ b/plugins/local/non_h_ints_mu/test_non_h_ints.irp.f
@@ -37,7 +37,10 @@ program test_non_h
 
   !call test_j1e_grad()
 
-  call test_j1e_fit_ao()
+  !call test_j1e_fit_ao()
+
+  call test_tc_grad_and_lapl_ao_new()
+  call test_tc_grad_square_ao_new()
 end
 
 ! ---
@@ -849,3 +852,263 @@ end
 
 ! ---
 
+subroutine test_tc_grad_and_lapl_ao_new()
+
+  implicit none
+  integer                       :: i, j, k, l
+  double precision              :: i_old, i_new, diff, thr, accu, norm
+  double precision, allocatable :: tc_grad_and_lapl_ao_old(:,:,:,:)
+
+  PROVIDE tc_grad_and_lapl_ao_new
+
+  thr  = 1d-10
+  norm = 0.d0
+  accu = 0.d0
+
+  allocate(tc_grad_and_lapl_ao_old(ao_num,ao_num,ao_num,ao_num))
+
+  open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_and_lapl_ao_old', action="read")
+    read(11) tc_grad_and_lapl_ao_old
+  close(11)
+
+  do i = 1, ao_num
+    do j = 1, ao_num
+      do k = 1, ao_num
+        do l = 1, ao_num
+
+          i_old = tc_grad_and_lapl_ao_old(l,k,j,i)
+          i_new = tc_grad_and_lapl_ao_new(l,k,j,i)
+          diff  = dabs(i_old - i_new)
+          if(diff .gt. thr) then
+            print *, ' problem in tc_grad_and_lapl_ao_new on:', l, k, j, i
+            print *, ' old :', i_old
+            print *, ' new :', i_new
+            stop
+          endif
+          accu += diff
+          norm += dabs(i_old) 
+        enddo
+      enddo
+    enddo
+  enddo
+
+  deallocate(tc_grad_and_lapl_ao_old)
+
+  print*, ' accuracy (%) = ', 100.d0 * accu / norm
+
+end
+
+! ---
+
+subroutine test_tc_grad_square_ao_new()
+
+  implicit none
+  integer                       :: i, j, k, l
+  double precision              :: i_old, i_new, diff, thr, accu, norm
+  double precision, allocatable :: tc_grad_square_ao_old(:,:,:,:)
+
+  PROVIDE tc_grad_square_ao_new
+
+  thr  = 1d-10
+  norm = 0.d0
+  accu = 0.d0
+
+  allocate(tc_grad_square_ao_old(ao_num,ao_num,ao_num,ao_num))
+
+  open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_square_ao_old', action="read")
+    read(11) tc_grad_square_ao_old
+  close(11)
+
+  do i = 1, ao_num
+    do j = 1, ao_num
+      do k = 1, ao_num
+        do l = 1, ao_num
+
+          i_old = tc_grad_square_ao_old(l,k,j,i)
+          i_new = tc_grad_square_ao_new(l,k,j,i)
+          diff  = dabs(i_old - i_new)
+          if(diff .gt. thr) then
+            print *, ' problem in tc_grad_and_lapl_ao_new on:', l, k, j, i
+            print *, ' old :', i_old
+            print *, ' new :', i_new
+            stop
+          endif
+          accu += diff
+          norm += dabs(i_old) 
+        enddo
+      enddo
+    enddo
+  enddo
+
+  deallocate(tc_grad_square_ao_old)
+
+  print*, ' accuracy (%) = ', 100.d0 * accu / norm
+
+end
+
+! ---
+
+BEGIN_PROVIDER [double precision, tc_grad_square_ao_new, (ao_num, ao_num, ao_num, ao_num)]
+
+  implicit none
+  integer                       :: i, j, k, l, m, ipoint
+  double precision              :: weight1, ao_k_r, ao_i_r
+  double precision              :: der_envsq_x, der_envsq_y, der_envsq_z, lap_envsq
+  double precision              :: time0, time1
+  double precision, allocatable :: b_mat(:,:,:,:), c_mat(:,:,:)
+  double precision, external    :: get_ao_two_e_integral
+
+  PROVIDe tc_integ_type
+  PROVIDE env_type
+  PROVIDE j2e_type
+  PROVIDE j1e_type
+
+  call wall_time(time0)
+
+  print *, ' providing tc_grad_square_ao_new ...'
+
+  PROVIDE int2_grad1_u12_square_ao
+
+  allocate(c_mat(n_points_final_grid,ao_num,ao_num))
+
+  !$OMP PARALLEL               &
+  !$OMP DEFAULT (NONE)         &
+  !$OMP PRIVATE (i, k, ipoint) &
+  !$OMP SHARED (aos_in_r_array_transp, c_mat, ao_num, n_points_final_grid, final_weight_at_r_vector)
+  !$OMP DO SCHEDULE (static)
+  do i = 1, ao_num
+    do k = 1, ao_num
+      do ipoint = 1, n_points_final_grid
+        c_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k)
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0                 &
+            , int2_grad1_u12_square_ao(1,1,1), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid &
+            , 0.d0, tc_grad_square_ao_new, ao_num*ao_num)
+
+  FREE int2_grad1_u12_square_ao
+
+  if( (tc_integ_type .eq. "semi-analytic")                            .and. &
+      (j2e_type .eq. "Mu")                                            .and. &
+      ((env_type .eq. "Prod_Gauss") .or. (env_type .eq. "Sum_Gauss")) .and. &
+      use_ipp ) then
+
+    ! an additional term is added here directly instead of 
+    ! being added in int2_grad1_u12_square_ao for performance
+
+    PROVIDE int2_u2_env2
+
+    !$OMP PARALLEL                                                                                     &
+    !$OMP DEFAULT (NONE)                                                                               &
+    !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r)                                              &
+    !$OMP SHARED (aos_in_r_array_transp, c_mat, ao_num, n_points_final_grid, final_weight_at_r_vector, &
+    !$OMP         env_square_grad, env_square_lapl, aos_grad_in_r_array_transp_bis)
+    !$OMP DO SCHEDULE (static)
+    do i = 1, ao_num
+      do k = 1, ao_num
+        do ipoint = 1, n_points_final_grid
+
+          weight1 = 0.25d0 * final_weight_at_r_vector(ipoint)
+
+          ao_i_r = aos_in_r_array_transp(ipoint,i)
+          ao_k_r = aos_in_r_array_transp(ipoint,k)
+
+          c_mat(ipoint,k,i) = weight1 * ( ao_k_r * ao_i_r * env_square_lapl(ipoint)                                                                                   &
+                            + (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) + ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) * env_square_grad(ipoint,1) &
+                            + (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) + ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) * env_square_grad(ipoint,2) &
+                            + (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) + ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) * env_square_grad(ipoint,3) )
+        enddo
+      enddo
+    enddo
+    !$OMP END DO
+    !$OMP END PARALLEL
+
+    call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0     &
+              , int2_u2_env2(1,1,1), ao_num*ao_num, c_mat(1,1,1), n_points_final_grid &
+              , 1.d0, tc_grad_square_ao_new, ao_num*ao_num)
+
+    FREE int2_u2_env2
+  endif ! use_ipp
+
+  deallocate(c_mat)
+
+  call sum_A_At(tc_grad_square_ao_new(1,1,1,1), ao_num*ao_num)
+
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_square_ao_new (min) = ', (time1 - time0) / 60.d0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_new, (ao_num, ao_num, ao_num, ao_num)]
+
+  implicit none
+  integer                       :: i, j, k, l, m, ipoint
+  double precision              :: weight1, ao_k_r, ao_i_r
+  double precision              :: der_envsq_x, der_envsq_y, der_envsq_z, lap_envsq
+  double precision              :: time0, time1
+  double precision, allocatable :: b_mat(:,:,:,:), c_mat(:,:,:)
+  double precision, external    :: get_ao_two_e_integral
+
+  PROVIDe tc_integ_type
+  PROVIDE env_type
+  PROVIDE j2e_type
+  PROVIDE j1e_type
+
+  call wall_time(time0)
+
+  print *, ' providing tc_grad_square_ao_new ...'
+
+
+  PROVIDE int2_grad1_u12_ao
+
+  allocate(b_mat(n_points_final_grid,ao_num,ao_num,3))
+
+  !$OMP PARALLEL                                                              &
+  !$OMP DEFAULT (NONE)                                                        &
+  !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r)                       & 
+  !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & 
+  !$OMP         ao_num, n_points_final_grid, final_weight_at_r_vector)
+  !$OMP DO SCHEDULE (static)
+  do i = 1, ao_num
+    do k = 1, ao_num
+      do ipoint = 1, n_points_final_grid
+
+        weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
+        ao_i_r  = aos_in_r_array_transp(ipoint,i)
+        ao_k_r  = aos_in_r_array_transp(ipoint,k)
+
+        b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1))
+        b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2))
+        b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3))
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  tc_grad_and_lapl_ao_new = 0.d0
+  do m = 1, 3
+    call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, -1.d0             &
+              , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid &
+              , 1.d0, tc_grad_and_lapl_ao_new, ao_num*ao_num)
+    enddo
+  deallocate(b_mat)
+
+  FREE int2_grad1_u12_ao
+  FREE int2_grad1_u2e_ao
+
+  call sum_A_At(tc_grad_and_lapl_ao_new(1,1,1,1), ao_num*ao_num)
+
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_and_lapl_ao_new (min) = ', (time1 - time0) / 60.d0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/plugins/local/non_h_ints_mu/total_tc_int.irp.f b/plugins/local/non_h_ints_mu/total_tc_int.irp.f
index 4cedf0e6..38da4047 100644
--- a/plugins/local/non_h_ints_mu/total_tc_int.irp.f
+++ b/plugins/local/non_h_ints_mu/total_tc_int.irp.f
@@ -67,7 +67,6 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n
 
     allocate(c_mat(n_points_final_grid,ao_num,ao_num))
 
-    c_mat = 0.d0
     !$OMP PARALLEL               &
     !$OMP DEFAULT (NONE)         &
     !$OMP PRIVATE (i, k, ipoint) &
@@ -99,7 +98,6 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n
 
       PROVIDE int2_u2_env2
 
-      c_mat = 0.d0
       !$OMP PARALLEL                                                                                     &
       !$OMP DEFAULT (NONE)                                                                               &
       !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r)                                              &
@@ -142,7 +140,6 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n
 
       allocate(b_mat(n_points_final_grid,ao_num,ao_num,3))
 
-      b_mat = 0.d0
       !$OMP PARALLEL                                                              &
       !$OMP DEFAULT (NONE)                                                        &
       !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r)                       & 
diff --git a/plugins/local/tc_bi_ortho/slater_tc_slow.irp.f b/plugins/local/tc_bi_ortho/slater_tc_slow.irp.f
index 02352a32..caf7d665 100644
--- a/plugins/local/tc_bi_ortho/slater_tc_slow.irp.f
+++ b/plugins/local/tc_bi_ortho/slater_tc_slow.irp.f
@@ -27,7 +27,7 @@ subroutine htilde_mu_mat_bi_ortho_tot_slow(key_j, key_i, Nint, htot)
     call htilde_mu_mat_bi_ortho_slow(key_j, key_i, Nint, hmono, htwoe, hthree, htot)
   endif
 
-end subroutine htilde_mu_mat_bi_ortho_tot_slow
+end
 
 ! --