diff --git a/plugins/local/jastrow/EZFIO.cfg b/plugins/local/jastrow/EZFIO.cfg
index a1e0a871..0d4141af 100644
--- a/plugins/local/jastrow/EZFIO.cfg
+++ b/plugins/local/jastrow/EZFIO.cfg
@@ -13,7 +13,7 @@ default: None
 
 [env_type]
 type: character*(32)
-doc: type of 1-body Jastrow: [ None | Prod_Gauss | Sum_Gauss | Sum_Slat | Sum_Quartic ]
+doc: type of envelop for Jastrow: [ None | Prod_Gauss | Sum_Gauss | Sum_Slat | Sum_Quartic ]
 interface: ezfio, provider, ocaml
 default: Sum_Gauss
 
@@ -91,10 +91,22 @@ size: (jastrow.j1e_size,nuclei.nucl_num)
 
 [j1e_coef_ao]
 type: double precision
-doc: coefficients of the 1-body Jastrow in AOs
+doc: coefficients of the 1-electrob Jastrow in AOs
 interface: ezfio
 size: (ao_basis.ao_num)
 
+[j1e_coef_ao2]
+type: double precision
+doc: coefficients of the 1-electron Jastrow in AOsxAOs
+interface: ezfio
+size: (ao_basis.ao_num*ao_basis.ao_num)
+
+[j1e_coef_ao3]
+type: double precision
+doc: coefficients of the 1-electron Jastrow in AOsxAOs
+interface: ezfio
+size: (ao_basis.ao_num,3)
+
 [j1e_expo]
 type: double precision
 doc: exponenets of functions in 1e-Jastrow
@@ -103,13 +115,13 @@ size: (jastrow.j1e_size,nuclei.nucl_num)
 
 [env_expo]
 type: double precision
-doc: exponents of the 1-body Jastrow
+doc: exponents of the envelop for Jastrow
 interface: ezfio
 size: (nuclei.nucl_num)
 
 [env_coef]
 type: double precision
-doc: coefficients of the 1-body Jastrow
+doc: coefficients of the envelop for Jastrow
 interface: ezfio
 size: (nuclei.nucl_num)
 
@@ -125,4 +137,10 @@ doc: nb of Gaussians used to fit Jastrow fcts
 interface: ezfio,provider,ocaml
 default: 20
 
+[a_boys]
+type: double precision
+doc: cutting of the interaction in the range separated model
+interface: ezfio,provider,ocaml
+default: 1.0
+ezfio_name: a_boys
 
diff --git a/plugins/local/non_h_ints_mu/debug_fit.irp.f b/plugins/local/non_h_ints_mu/debug_fit.irp.f
index 3934bb06..d4b917ec 100644
--- a/plugins/local/non_h_ints_mu/debug_fit.irp.f
+++ b/plugins/local/non_h_ints_mu/debug_fit.irp.f
@@ -401,10 +401,10 @@ subroutine test_grad1_u12_withsq_num()
 
   do ipoint = 1, n_points_final_grid
 
-    call get_grad1_u12_withsq_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12(1,ipoint,1) &
-                                                                                           , tmp_grad1_u12(1,ipoint,2) &
-                                                                                           , tmp_grad1_u12(1,ipoint,3) &
-                                                                                           , tmp_grad1_u12_squared(1,ipoint))
+    call get_grad1_u12_withsq_r1_seq(ipoint, n_points_extra_final_grid, tmp_grad1_u12(1,ipoint,1) &
+                                                                      , tmp_grad1_u12(1,ipoint,2) &
+                                                                      , tmp_grad1_u12(1,ipoint,3) &
+                                                                      , tmp_grad1_u12_squared(1,ipoint))
     do jpoint = 1, n_points_extra_final_grid
 
       i_exc  = grad1_u12_squared_num(jpoint,ipoint) 
diff --git a/plugins/local/non_h_ints_mu/jast_1e.irp.f b/plugins/local/non_h_ints_mu/jast_1e.irp.f
index 47245938..37ac0092 100644
--- a/plugins/local/non_h_ints_mu/jast_1e.irp.f
+++ b/plugins/local/non_h_ints_mu/jast_1e.irp.f
@@ -70,14 +70,15 @@ END_PROVIDER
 &BEGIN_PROVIDER [double precision, j1e_gradz, (n_points_final_grid)]
 
   implicit none
-  integer                       :: ipoint, i, j, p
+  integer                       :: ipoint, i, j, ij, p
   integer                       :: ierr
   logical                       :: exists
   double precision              :: x, y, z, dx, dy, dz, d2
   double precision              :: a, c, g, tmp_x, tmp_y, tmp_z
+  double precision              :: cx, cy, cz
   double precision              :: time0, time1
   double precision, allocatable :: Pa(:,:), Pb(:,:), Pt(:,:)
-  double precision, allocatable :: coef_fit(:)
+  double precision, allocatable :: coef_fit(:), coef_fit2(:), coef_fit3(:,:)
 
   PROVIDE j1e_type
 
@@ -162,21 +163,164 @@ END_PROVIDER
 
     deallocate(Pa, Pb, Pt)
 
+!  elseif(j1e_type .eq. "Charge_Harmonizer_AO") then
+!
+!    ! \grad_1 \sum_{\eta} C_{\eta} \chi_{\eta}
+!    ! where 
+!    !       \chi_{\eta} are the AOs
+!    !       C_{\eta} are fitted to mimic (j1e_type .eq. "Charge_Harmonizer")
+!    !
+!    ! The - sign is in the parameters C_{\eta}
+!
+!    PROVIDE aos_grad_in_r_array
+!
+!    allocate(coef_fit(ao_num))
+!
+!    if(mpi_master) then
+!      call ezfio_has_jastrow_j1e_coef_ao(exists)
+!    endif 
+!    IRP_IF MPI_DEBUG
+!      print *,  irp_here, mpi_rank
+!      call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+!    IRP_ENDIF
+!    IRP_IF MPI
+!      include 'mpif.h'
+!      call MPI_BCAST(coef_fit, ao_num, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+!      if (ierr /= MPI_SUCCESS) then
+!        stop 'Unable to read j1e_coef_ao with MPI'
+!      endif
+!    IRP_ENDIF
+!    if(exists) then
+!      if(mpi_master) then
+!        write(6,'(A)') '.. >>>>> [ IO READ: j1e_coef_ao ] <<<<< ..'
+!        call ezfio_get_jastrow_j1e_coef_ao(coef_fit)
+!        IRP_IF MPI
+!          call MPI_BCAST(coef_fit, ao_num, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+!          if (ierr /= MPI_SUCCESS) then
+!            stop 'Unable to read j1e_coef_ao with MPI'
+!          endif
+!        IRP_ENDIF
+!      endif
+!    else
+!
+!      call get_j1e_coef_fit_ao(ao_num, coef_fit)
+!      call ezfio_set_jastrow_j1e_coef_ao(coef_fit)
+!
+!    endif
+!
+!    !$OMP PARALLEL                               &
+!    !$OMP DEFAULT (NONE)                         &
+!    !$OMP PRIVATE (i, ipoint, c)                 &
+!    !$OMP SHARED (n_points_final_grid, ao_num,   &
+!    !$OMP         aos_grad_in_r_array, coef_fit, &
+!    !$OMP         j1e_gradx, j1e_grady, j1e_gradz)
+!    !$OMP DO SCHEDULE (static)
+!    do ipoint = 1, n_points_final_grid
+!
+!      j1e_gradx(ipoint) = 0.d0
+!      j1e_grady(ipoint) = 0.d0
+!      j1e_gradz(ipoint) = 0.d0
+!      do i = 1, ao_num
+!        c = coef_fit(i)
+!        j1e_gradx(ipoint) = j1e_gradx(ipoint) + c * aos_grad_in_r_array(i,ipoint,1)
+!        j1e_grady(ipoint) = j1e_grady(ipoint) + c * aos_grad_in_r_array(i,ipoint,2)
+!        j1e_gradz(ipoint) = j1e_gradz(ipoint) + c * aos_grad_in_r_array(i,ipoint,3)
+!      enddo
+!    enddo
+!    !$OMP END DO
+!    !$OMP END PARALLEL
+!
+!    deallocate(coef_fit)
+!
+!  elseif(j1e_type .eq. "Charge_Harmonizer_AO2") then
+!
+!    ! \grad_1 \sum_{\eta,\beta} C_{\eta,\beta} \chi_{\eta} \chi_{\beta}
+!    ! where 
+!    !       \chi_{\eta} are the AOs
+!    !       C_{\eta,\beta} are fitted to mimic (j1e_type .eq. "Charge_Harmonizer")
+!    !
+!    ! The - sign is in the parameters C_{\eta,\beta}
+!
+!    PROVIDE aos_grad_in_r_array
+!
+!    allocate(coef_fit2(ao_num*ao_num))
+!
+!    if(mpi_master) then
+!      call ezfio_has_jastrow_j1e_coef_ao2(exists)
+!    endif 
+!    IRP_IF MPI_DEBUG
+!      print *,  irp_here, mpi_rank
+!      call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+!    IRP_ENDIF
+!    IRP_IF MPI
+!      call MPI_BCAST(coef_fit2, ao_num*ao_num, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+!      if (ierr /= MPI_SUCCESS) then
+!        stop 'Unable to read j1e_coef_ao2 with MPI'
+!      endif
+!    IRP_ENDIF
+!    if(exists) then
+!      if(mpi_master) then
+!        write(6,'(A)') '.. >>>>> [ IO READ: j1e_coef_ao2 ] <<<<< ..'
+!        call ezfio_get_jastrow_j1e_coef_ao2(coef_fit2)
+!        IRP_IF MPI
+!          call MPI_BCAST(coef_fit2, ao_num*ao_num, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+!          if (ierr /= MPI_SUCCESS) then
+!            stop 'Unable to read j1e_coef_ao2 with MPI'
+!          endif
+!        IRP_ENDIF
+!      endif
+!    else
+!
+!      call get_j1e_coef_fit_ao2(ao_num*ao_num, coef_fit2)
+!      call ezfio_set_jastrow_j1e_coef_ao2(coef_fit2)
+!
+!    endif
+!
+!    !$OMP PARALLEL                                &
+!    !$OMP DEFAULT (NONE)                          &
+!    !$OMP PRIVATE (i, j, ij, ipoint, c)           &
+!    !$OMP SHARED (n_points_final_grid, ao_num,    &
+!    !$OMP         aos_grad_in_r_array, coef_fit2, &
+!    !$OMP         aos_in_r_array, j1e_gradx, j1e_grady, j1e_gradz)
+!    !$OMP DO SCHEDULE (static)
+!    do ipoint = 1, n_points_final_grid
+!
+!      j1e_gradx(ipoint) = 0.d0
+!      j1e_grady(ipoint) = 0.d0
+!      j1e_gradz(ipoint) = 0.d0
+!
+!      do i = 1, ao_num
+!        do j = 1, ao_num
+!          ij = (i-1)*ao_num + j
+!
+!          c = coef_fit2(ij)
+!
+!          j1e_gradx(ipoint) += c * (aos_in_r_array(i,ipoint) * aos_grad_in_r_array(j,ipoint,1) + aos_grad_in_r_array(i,ipoint,1) * aos_in_r_array(j,ipoint))
+!          j1e_grady(ipoint) += c * (aos_in_r_array(i,ipoint) * aos_grad_in_r_array(j,ipoint,2) + aos_grad_in_r_array(i,ipoint,2) * aos_in_r_array(j,ipoint))
+!          j1e_gradz(ipoint) += c * (aos_in_r_array(i,ipoint) * aos_grad_in_r_array(j,ipoint,3) + aos_grad_in_r_array(i,ipoint,3) * aos_in_r_array(j,ipoint))
+!        enddo
+!      enddo
+!    enddo
+!    !$OMP END DO
+!    !$OMP END PARALLEL
+!
+!    deallocate(coef_fit2)
+
   elseif(j1e_type .eq. "Charge_Harmonizer_AO") then
 
-    ! \grad_1 \sum_{\eta} C_{\eta} \chi_{\eta}
+    ! \sum_{\eta} \vec{C}_{\eta} \chi_{\eta}
     ! where 
     !       \chi_{\eta} are the AOs
-    !       C_{\eta} are fitted to mimic (j1e_type .eq. "Charge_Harmonizer")
+    !       \vec{C}_{\eta} are fitted to mimic (j1e_type .eq. "Charge_Harmonizer")
     !
-    ! The - sign is in the parameters C_{\eta}
+    ! The - sign is in the parameters \vec{C}_{\eta}
 
     PROVIDE aos_grad_in_r_array
 
-    allocate(coef_fit(ao_num))
+    allocate(coef_fit3(ao_num,3))
 
     if(mpi_master) then
-      call ezfio_has_jastrow_j1e_coef_ao(exists)
+      call ezfio_has_jastrow_j1e_coef_ao3(exists)
     endif 
     IRP_IF MPI_DEBUG
       print *,  irp_here, mpi_rank
@@ -184,36 +328,35 @@ END_PROVIDER
     IRP_ENDIF
     IRP_IF MPI
       include 'mpif.h'
-      call MPI_BCAST(coef_fit, ao_num, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+      call MPI_BCAST(coef_fit3, (ao_num*3), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
       if (ierr /= MPI_SUCCESS) then
-        stop 'Unable to read j1e_coef_ao with MPI'
+        stop 'Unable to read j1e_coef_ao3 with MPI'
       endif
     IRP_ENDIF
     if(exists) then
       if(mpi_master) then
-        write(6,'(A)') '.. >>>>> [ IO READ: j1e_coef_ao ] <<<<< ..'
-        call ezfio_get_jastrow_j1e_coef_ao(coef_fit)
+        write(6,'(A)') '.. >>>>> [ IO READ: j1e_coef_ao3 ] <<<<< ..'
+        call ezfio_get_jastrow_j1e_coef_ao3(coef_fit3)
         IRP_IF MPI
-          call MPI_BCAST(coef_fit, ao_num, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+          call MPI_BCAST(coef_fit3, (ao_num*3), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
           if (ierr /= MPI_SUCCESS) then
-            stop 'Unable to read j1e_coef_ao with MPI'
+            stop 'Unable to read j1e_coef_ao3 with MPI'
           endif
         IRP_ENDIF
       endif
     else
 
-      call get_j1e_coef_fit_ao(ao_num, coef_fit)
-      call ezfio_set_jastrow_j1e_coef_ao(coef_fit)
+      call get_j1e_coef_fit_ao3(ao_num, coef_fit3)
+      call ezfio_set_jastrow_j1e_coef_ao3(coef_fit3)
 
     endif
 
-
-    !$OMP PARALLEL                               &
-    !$OMP DEFAULT (NONE)                         &
-    !$OMP PRIVATE (i, ipoint, c)                 &
-    !$OMP SHARED (n_points_final_grid, ao_num,   &
-    !$OMP         aos_grad_in_r_array, coef_fit, &
-    !$OMP         j1e_gradx, j1e_grady, j1e_gradz)
+    !$OMP PARALLEL                                &
+    !$OMP DEFAULT (NONE)                          &
+    !$OMP PRIVATE (i, ipoint, cx, cy, cz)         &
+    !$OMP SHARED (n_points_final_grid, ao_num,    &
+    !$OMP         aos_grad_in_r_array, coef_fit3, &
+    !$OMP         aos_in_r_array, j1e_gradx, j1e_grady, j1e_gradz)
     !$OMP DO SCHEDULE (static)
     do ipoint = 1, n_points_final_grid
 
@@ -221,16 +364,19 @@ END_PROVIDER
       j1e_grady(ipoint) = 0.d0
       j1e_gradz(ipoint) = 0.d0
       do i = 1, ao_num
-        c = coef_fit(i)
-        j1e_gradx(ipoint) = j1e_gradx(ipoint) + c * aos_grad_in_r_array(i,ipoint,1)
-        j1e_grady(ipoint) = j1e_grady(ipoint) + c * aos_grad_in_r_array(i,ipoint,2)
-        j1e_gradz(ipoint) = j1e_gradz(ipoint) + c * aos_grad_in_r_array(i,ipoint,3)
+        cx = coef_fit3(i,1)
+        cy = coef_fit3(i,2)
+        cz = coef_fit3(i,3)
+
+        j1e_gradx(ipoint) += cx * aos_in_r_array(i,ipoint)
+        j1e_grady(ipoint) += cy * aos_in_r_array(i,ipoint)
+        j1e_gradz(ipoint) += cz * aos_in_r_array(i,ipoint)
       enddo
     enddo
     !$OMP END DO
     !$OMP END PARALLEL
 
-    deallocate(coef_fit)
+    deallocate(coef_fit3)
 
   else
 
diff --git a/plugins/local/non_h_ints_mu/jast_1e_utils.irp.f b/plugins/local/non_h_ints_mu/jast_1e_utils.irp.f
index b9ea2d6f..9dc0d5b0 100644
--- a/plugins/local/non_h_ints_mu/jast_1e_utils.irp.f
+++ b/plugins/local/non_h_ints_mu/jast_1e_utils.irp.f
@@ -107,5 +107,288 @@ end
 
 ! ---
 
+subroutine get_j1e_coef_fit_ao2(dim_fit, coef_fit)
+
+  implicit none
+  integer         , intent(in)  :: dim_fit
+  double precision, intent(out) :: coef_fit(dim_fit)
+
+  integer                       :: i, j, k, l, ipoint
+  integer                       :: ij, kl
+  double precision              :: g
+  double precision              :: t0, t1
+  double precision, allocatable :: A(:,:), b(:), A_inv(:,:)
+  double precision, allocatable :: Pa(:,:), Pb(:,:), Pt(:,:)
+  double precision, allocatable :: u1e_tmp(:)
 
 
+  PROVIDE j1e_type
+  PROVIDE int2_u2e_ao
+  PROVIDE elec_alpha_num elec_beta_num elec_num
+  PROVIDE mo_coef
+
+  call wall_time(t0)
+  print*, ' PROVIDING the representation of 1e-Jastrow in AOs x AOx ... '
+
+  ! --- --- ---
+  ! get u1e(r)
+
+  allocate(Pa(ao_num,ao_num), Pb(ao_num,ao_num), Pt(ao_num,ao_num))
+
+  call dgemm( 'N', 'T', ao_num, ao_num, elec_alpha_num, 1.d0       &
+            , mo_coef, size(mo_coef, 1), mo_coef, size(mo_coef, 1) &
+            , 0.d0, Pa, size(Pa, 1))
+
+  if(elec_alpha_num .eq. elec_beta_num) then
+    Pb = Pa
+  else
+    call dgemm( 'N', 'T', ao_num, ao_num, elec_beta_num, 1.d0        &
+              , mo_coef, size(mo_coef, 1), mo_coef, size(mo_coef, 1) &
+              , 0.d0, Pb, size(Pb, 1))
+  endif
+  Pt = Pa + Pb
+
+  allocate(u1e_tmp(n_points_final_grid))
+  
+  g = -0.5d0 * (dble(elec_num) - 1.d0) / dble(elec_num)
+  call dgemv("T", ao_num*ao_num, n_points_final_grid, g, int2_u2e_ao, ao_num*ao_num, Pt, 1, 0.d0, u1e_tmp, 1)
+
+  FREE int2_u2e_ao
+
+  deallocate(Pa, Pb, Pt)
+
+  ! --- --- ---
+  ! get A
+
+  allocate(A(ao_num*ao_num,ao_num*ao_num))
+
+  !$OMP PARALLEL                             &
+  !$OMP DEFAULT (NONE)                       &
+  !$OMP PRIVATE (i, j, k, l, ij, kl, ipoint) &
+  !$OMP SHARED (n_points_final_grid, ao_num, &
+  !$OMP         final_weight_at_r_vector, aos_in_r_array_transp, A)
+  !$OMP DO COLLAPSE(2)
+  do k = 1, ao_num
+    do l = 1, ao_num
+      kl = (k-1)*ao_num + l
+
+      do i = 1, ao_num
+        do j = 1, ao_num
+          ij = (i-1)*ao_num + j
+
+          A(ij,kl) = 0.d0
+          do ipoint = 1, n_points_final_grid
+            A(ij,kl) += final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) &
+                                                         * aos_in_r_array_transp(ipoint,k) * aos_in_r_array_transp(ipoint,l)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  print *, ' A'
+  do ij = 1, ao_num*ao_num
+    write(*, '(100000(f15.7))') (A(ij,kl), kl = 1, ao_num*ao_num)
+  enddo
+
+  ! --- --- ---
+  ! get b
+
+  allocate(b(ao_num*ao_num))
+
+  !$OMP PARALLEL                             &
+  !$OMP DEFAULT (NONE)                       &
+  !$OMP PRIVATE (i, j, ij, ipoint)           &
+  !$OMP SHARED (n_points_final_grid, ao_num, &
+  !$OMP         final_weight_at_r_vector, aos_in_r_array_transp, u1e_tmp, b)
+  !$OMP DO COLLAPSE(2)
+  do i = 1, ao_num
+    do j = 1, ao_num
+      ij = (i-1)*ao_num + j
+
+      b(ij) = 0.d0
+      do ipoint = 1, n_points_final_grid
+        b(ij) = b(ij) + final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j) * u1e_tmp(ipoint)
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  deallocate(u1e_tmp)
+
+  ! --- --- ---
+  ! solve Ax = b
+
+  allocate(A_inv(ao_num*ao_num,ao_num*ao_num))
+  call get_inverse(A, ao_num*ao_num, ao_num*ao_num, A_inv, ao_num*ao_num)
+
+  integer :: mn
+  print *, ' check A_inv'
+  do ij = 1, ao_num*ao_num
+    do kl = 1, ao_num*ao_num
+
+      tmp = 0.d0
+      do mn = 1, ao_num*ao_num
+        tmp += A(ij,mn) * A_inv(mn,kl)
+      enddo
+
+      print*, ij, kl, tmp
+    enddo
+  enddo
+
+  ! coef_fit = A_inv x b
+  !call dgemv("N", ao_num*ao_num, ao_num*ao_num, 1.d0, A_inv, ao_num*ao_num, b, 1, 0.d0, coef_fit(1,1), 1)
+  do ij = 1, ao_num*ao_num
+    coef_fit(ij) = 0.d0
+    do kl = 1, ao_num*ao_num
+      coef_fit(ij) += A_inv(ij,kl) * b(kl)
+    enddo
+  enddo
+
+  double precision :: tmp
+  print *, ' check A_inv'
+  do ij = 1, ao_num*ao_num
+    tmp = 0.d0
+    do kl = 1, ao_num*ao_num
+      tmp += A(ij,kl) * coef_fit(kl)
+    enddo
+    tmp = tmp - b(ij)
+    print*, ij, tmp
+  enddo
+
+  deallocate(A)
+  deallocate(A_inv, b)
+
+  call wall_time(t1)
+  print*, ' END after (min) ', (t1-t0)/60.d0
+
+  return
+end
+
+! ---
+
+subroutine get_j1e_coef_fit_ao3(dim_fit, coef_fit)
+
+  implicit none
+  integer         , intent(in)  :: dim_fit
+  double precision, intent(out) :: coef_fit(dim_fit,3)
+
+  integer                       :: i, d, ipoint
+  double precision              :: g
+  double precision              :: t0, t1
+  double precision, allocatable :: A(:,:), b(:,:), A_inv(:,:)
+  double precision, allocatable :: Pa(:,:), Pb(:,:), Pt(:,:)
+  double precision, allocatable :: u1e_tmp(:,:)
+
+
+  PROVIDE j1e_type
+  PROVIDE int2_grad1_u2e_ao
+  PROVIDE elec_alpha_num elec_beta_num elec_num
+  PROVIDE mo_coef
+  PROVIDE ao_overlap
+
+  call wall_time(t0)
+  print*, ' PROVIDING the representation of 1e-Jastrow in AOs ... '
+
+  ! --- --- ---
+  ! get u1e(r)
+
+  allocate(Pa(ao_num,ao_num), Pb(ao_num,ao_num), Pt(ao_num,ao_num))
+
+  call dgemm( 'N', 'T', ao_num, ao_num, elec_alpha_num, 1.d0       &
+            , mo_coef, size(mo_coef, 1), mo_coef, size(mo_coef, 1) &
+            , 0.d0, Pa, size(Pa, 1))
+
+  if(elec_alpha_num .eq. elec_beta_num) then
+    Pb = Pa
+  else
+    call dgemm( 'N', 'T', ao_num, ao_num, elec_beta_num, 1.d0        &
+              , mo_coef, size(mo_coef, 1), mo_coef, size(mo_coef, 1) &
+              , 0.d0, Pb, size(Pb, 1))
+  endif
+  Pt = Pa + Pb
+
+  allocate(u1e_tmp(n_points_final_grid,3))
+  
+  g = -0.5d0 * (dble(elec_num) - 1.d0) / dble(elec_num)
+  do d = 1, 3
+    call dgemv("T", ao_num*ao_num, n_points_final_grid, g, int2_grad1_u2e_ao(1,1,1,d), ao_num*ao_num, Pt, 1, 0.d0, u1e_tmp(1,d), 1)
+  enddo
+
+  deallocate(Pa, Pb, Pt)
+
+  ! --- --- ---
+  ! get A & b
+
+  allocate(A(ao_num,ao_num), b(ao_num,3))
+
+  A(1:ao_num,1:ao_num) = ao_overlap(1:ao_num,1:ao_num) 
+
+  !$OMP PARALLEL                             &
+  !$OMP DEFAULT (NONE)                       &
+  !$OMP PRIVATE (i, ipoint)                  &
+  !$OMP SHARED (n_points_final_grid, ao_num, &
+  !$OMP         final_weight_at_r_vector, aos_in_r_array_transp, u1e_tmp, b)
+  !$OMP DO SCHEDULE (static)
+  do i = 1, ao_num
+    b(i,1) = 0.d0
+    b(i,2) = 0.d0
+    b(i,3) = 0.d0
+    do ipoint = 1, n_points_final_grid
+      b(i,1) = b(i,1) + final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * u1e_tmp(ipoint,1)
+      b(i,2) = b(i,2) + final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * u1e_tmp(ipoint,2)
+      b(i,3) = b(i,3) + final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * u1e_tmp(ipoint,3)
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  deallocate(u1e_tmp)
+
+  ! --- --- ---
+  ! solve Ax = b
+
+  allocate(A_inv(ao_num,ao_num))
+  call get_inverse(A, ao_num, ao_num, A_inv, ao_num)
+
+  ! coef_fit = A_inv x b
+  do d = 1, 3
+    call dgemv("N", ao_num, ao_num, 1.d0, A_inv, ao_num, b(1,d), 1, 0.d0, coef_fit(1,d), 1)
+  enddo
+
+  integer          :: j
+  double precision :: tmp, acc, nrm
+
+  acc = 0.d0
+  nrm = 0.d0
+  print *, ' check A_inv'
+  do d = 1, 3
+    do i = 1, ao_num
+      tmp = 0.d0
+      do j = 1, ao_num
+        tmp += ao_overlap(i,j) * coef_fit(j,d)
+      enddo
+      tmp = tmp - b(i,d)
+      if(dabs(tmp) .gt. 1d-8) then
+        print*, d, i, tmp
+      endif
+
+      acc += dabs(tmp)
+      nrm += dabs(b(i,d))
+    enddo
+  enddo
+  print *, ' Relative Error (%) =', 100.d0*acc/nrm
+
+  deallocate(A, A_inv, b)
+
+  call wall_time(t1)
+  print*, ' END after (min) ', (t1-t0)/60.d0
+
+  return
+end
+
+! ---
+
diff --git a/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f b/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f
index bd7db497..b58d8c17 100644
--- a/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f
+++ b/plugins/local/non_h_ints_mu/jast_deriv_utils_vect.irp.f
@@ -1,7 +1,7 @@
 
 ! ---
 
-subroutine get_grad1_u12_withsq_r1_seq(r1, n_grid2, resx, resy, resz, res)
+subroutine get_grad1_u12_withsq_r1_seq(ipoint, n_grid2, resx, resy, resz, res)
 
   BEGIN_DOC
   ! 
@@ -12,82 +12,93 @@ subroutine get_grad1_u12_withsq_r1_seq(r1, n_grid2, resx, resy, resz, res)
   END_DOC
 
   implicit none
-  integer,          intent(in)  :: n_grid2
-  double precision, intent(in)  :: r1(3)
+  integer,          intent(in)  :: ipoint, n_grid2
   double precision, intent(out) :: resx(n_grid2), resy(n_grid2), resz(n_grid2), res(n_grid2)
 
   integer                       :: jpoint
-  double precision              :: env_r1
-  double precision              :: grad1_env(3)
+  double precision              :: env_r1, tmp
+  double precision              :: grad1_env(3), r1(3)
   double precision, allocatable :: env_r2(:)
   double precision, allocatable :: u2b_r12(:)
   double precision, allocatable :: gradx1_u2b(:), grady1_u2b(:), gradz1_u2b(:)
   double precision, external    :: env_nucl
 
   PROVIDE j1e_type j2e_type env_type
+  PROVIDE final_grid_points
   PROVIDE final_grid_points_extra
 
-  if( ((j2e_type .eq. "Mu") .and. (env_type .eq. "None")) .or. &
-      (j2e_type .eq. "Mur") ) then
+  r1(1) = final_grid_points(1,ipoint)
+  r1(2) = final_grid_points(2,ipoint)
+  r1(3) = final_grid_points(3,ipoint)
 
-    call grad1_j12_mu_r1_seq(r1, n_grid2, resx, resy, resz)
-    do jpoint = 1, n_points_extra_final_grid
-      res(jpoint) = resx(jpoint) * resx(jpoint) + resy(jpoint) * resy(jpoint) + resz(jpoint) * resz(jpoint)
-    enddo
+  if( (j2e_type .eq. "Mu")  .or. &
+      (j2e_type .eq. "Mur") .or. &
+      (j2e_type .eq. "Boys") ) then
 
-  elseif((j2e_type .eq. "Mu") .and. (env_type .ne. "None")) then
+    if(env_type .eq. "None") then
 
-    !   u(r1,r2)        = j12_mu(r12) x v(r1) x v(r2)
-    !   grad1 u(r1, r2) = [(grad1 j12_mu) v(r1) + j12_mu grad1 v(r1)] v(r2)
+      call grad1_j12_r1_seq(r1, n_grid2, resx, resy, resz)
 
-    allocate(env_r2(n_grid2))
-    allocate(u2b_r12(n_grid2))
-    allocate(gradx1_u2b(n_grid2))
-    allocate(grady1_u2b(n_grid2))
-    allocate(gradz1_u2b(n_grid2))
+    else
 
-    env_r1 = env_nucl(r1)
-    call grad1_env_nucl(r1, grad1_env)
+      !   u(r1,r2)        = j12_mu(r12) x v(r1) x v(r2)
+      !   grad1 u(r1, r2) = [(grad1 j12_mu) v(r1) + j12_mu grad1 v(r1)] v(r2)
 
-    call env_nucl_r1_seq(n_grid2, env_r2)
-    call j12_mu_r1_seq(r1, n_grid2, u2b_r12)
-    call grad1_j12_mu_r1_seq(r1, n_grid2, gradx1_u2b, grady1_u2b, gradz1_u2b)
+      allocate(env_r2(n_grid2))
+      allocate(u2b_r12(n_grid2))
+      allocate(gradx1_u2b(n_grid2))
+      allocate(grady1_u2b(n_grid2))
+      allocate(gradz1_u2b(n_grid2))
 
-    do jpoint = 1, n_points_extra_final_grid
-      resx(jpoint) = (gradx1_u2b(jpoint) * env_r1 + u2b_r12(jpoint) * grad1_env(1)) * env_r2(jpoint)
-      resy(jpoint) = (grady1_u2b(jpoint) * env_r1 + u2b_r12(jpoint) * grad1_env(2)) * env_r2(jpoint)
-      resz(jpoint) = (gradz1_u2b(jpoint) * env_r1 + u2b_r12(jpoint) * grad1_env(3)) * env_r2(jpoint)
-      res (jpoint) = resx(jpoint) * resx(jpoint) + resy(jpoint) * resy(jpoint) + resz(jpoint) * resz(jpoint)
-    enddo
+      env_r1 = env_nucl(r1)
+      call grad1_env_nucl(r1, grad1_env)
 
-    deallocate(env_r2, u2b_r12, gradx1_u2b, grady1_u2b, gradz1_u2b)
+      call env_nucl_r1_seq(n_grid2, env_r2)
+      call j12_r1_seq(r1, n_grid2, u2b_r12)
+      call grad1_j12_r1_seq(r1, n_grid2, gradx1_u2b, grady1_u2b, gradz1_u2b)
+
+      do jpoint = 1, n_points_extra_final_grid
+        resx(jpoint) = (gradx1_u2b(jpoint) * env_r1 + u2b_r12(jpoint) * grad1_env(1)) * env_r2(jpoint)
+        resy(jpoint) = (grady1_u2b(jpoint) * env_r1 + u2b_r12(jpoint) * grad1_env(2)) * env_r2(jpoint)
+        resz(jpoint) = (gradz1_u2b(jpoint) * env_r1 + u2b_r12(jpoint) * grad1_env(3)) * env_r2(jpoint)
+      enddo
+
+      deallocate(env_r2, u2b_r12, gradx1_u2b, grady1_u2b, gradz1_u2b)
+
+    endif ! env_type
 
   else
 
     print *, ' Error in get_grad1_u12_withsq_r1_seq: Unknown Jastrow'
     stop
 
+  endif ! j2e_type
+
+
+  if(j1e_type .ne. "None") then
+    PROVIDE j1e_gradx j1e_grady j1e_gradz
+    PROVIDE elec_num
+    tmp = 1.d0 / (dble(elec_num) - 1.d0)
+    do jpoint = 1, n_points_extra_final_grid
+      resx(jpoint) = resx(jpoint) + tmp * j1e_gradx(ipoint)
+      resy(jpoint) = resy(jpoint) + tmp * j1e_grady(ipoint)
+      resz(jpoint) = resz(jpoint) + tmp * j1e_gradz(ipoint)
+    enddo
   endif
 
+  do jpoint = 1, n_points_extra_final_grid
+    res(jpoint) = resx(jpoint) * resx(jpoint) + resy(jpoint) * resy(jpoint) + resz(jpoint) * resz(jpoint)
+  enddo
+
   return
 end
 
 ! ---
 
-subroutine grad1_j12_mu_r1_seq(r1, n_grid2, gradx, grady, gradz)
+subroutine grad1_j12_r1_seq(r1, n_grid2, gradx, grady, gradz)
 
   BEGIN_DOC
   !
-  !  gradient of j(mu(r1,r2),r12) form of jastrow. 
-  !
-  ! if mu(r1,r2) = cst --->
-  !
-  !  d/dx1 j(mu,r12) = 0.5 * (1 - erf(mu *r12))/r12 * (x1 - x2)
-  !
-  ! if mu(r1,r2) /= cst --->
-  !
-  ! d/dx1 j(mu(r1,r2),r12) = exp(-(mu(r1,r2)*r12)**2) /(2 *sqrt(pi) * mu(r1,r2)**2 ) d/dx1 mu(r1,r2) 
-  !                        + 0.5 * (1 - erf(mu(r1,r2) *r12))/r12 * (x1 - x2)
   !
   END_DOC
 
@@ -107,6 +118,9 @@ subroutine grad1_j12_mu_r1_seq(r1, n_grid2, gradx, grady, gradz)
 
   if(j2e_type .eq. "Mu") then
 
+    !  d/dx1 j(mu,r12) = 0.5 * (1 - erf(mu *r12))/r12 * (x1 - x2)
+    !
+
     do jpoint = 1, n_points_extra_final_grid ! r2 
 
       r2(1) = final_grid_points_extra(1,jpoint)
@@ -134,6 +148,9 @@ subroutine grad1_j12_mu_r1_seq(r1, n_grid2, gradx, grady, gradz)
 
   elseif(j2e_type .eq. "Mur") then
 
+    ! d/dx1 j(mu(r1,r2),r12) = exp(-(mu(r1,r2)*r12)**2) /(2 *sqrt(pi) * mu(r1,r2)**2 ) d/dx1 mu(r1,r2) 
+    !                        + 0.5 * (1 - erf(mu(r1,r2) *r12))/r12 * (x1 - x2)
+
     do jpoint = 1, n_points_extra_final_grid ! r2 
 
       r2(1) = final_grid_points_extra(1,jpoint)
@@ -166,9 +183,40 @@ subroutine grad1_j12_mu_r1_seq(r1, n_grid2, gradx, grady, gradz)
       gradz(jpoint) = gradz(jpoint) + tmp * dz
     enddo
 
+  elseif(j2e_type .eq. "Boys") then
+
+   ! j(r12) = 0.5 r12 / (1 + a_boys r_12)
+
+    PROVIDE a_boys
+
+    do jpoint = 1, n_points_extra_final_grid ! r2 
+
+      r2(1) = final_grid_points_extra(1,jpoint)
+      r2(2) = final_grid_points_extra(2,jpoint)
+      r2(3) = final_grid_points_extra(3,jpoint)
+
+      dx  = r1(1) - r2(1)
+      dy  = r1(2) - r2(2)
+      dz  = r1(3) - r2(3)
+      r12 = dsqrt(dx * dx + dy * dy + dz * dz)
+      if(r12 .lt. 1d-10) then
+        gradx(jpoint) = 0.d0 
+        grady(jpoint) = 0.d0 
+        gradz(jpoint) = 0.d0 
+        cycle
+      endif
+
+      tmp = 1.d0 + a_boys * r12
+      tmp = 0.5d0 / (r12 * tmp * tmp)
+
+      gradx(jpoint) = tmp * dx
+      grady(jpoint) = tmp * dy
+      gradz(jpoint) = tmp * dz
+    enddo
+
   else
 
-    print *, ' Error in grad1_j12_mu_r1_seq: Unknown j2e_type = ', j2e_type
+    print *, ' Error in grad1_j12_r1_seq: Unknown j2e_type = ', j2e_type
     stop
 
   endif ! j2e_type
@@ -178,7 +226,7 @@ end
 
 ! ---
 
-subroutine j12_mu_r1_seq(r1, n_grid2, res)
+subroutine j12_r1_seq(r1, n_grid2, res)
 
   include 'constants.include.F'
 
@@ -189,23 +237,57 @@ subroutine j12_mu_r1_seq(r1, n_grid2, res)
 
   integer                       :: jpoint
   double precision              :: r2(3)
+  double precision              :: dx, dy, dz
   double precision              :: mu_tmp, r12
 
   PROVIDE final_grid_points_extra
 
-  do jpoint = 1, n_points_extra_final_grid ! r2 
+  if(j2e_type .eq. "Mu") then
 
-    r2(1) = final_grid_points_extra(1,jpoint)
-    r2(2) = final_grid_points_extra(2,jpoint)
-    r2(3) = final_grid_points_extra(3,jpoint)
+    PROVIDE mu_erf
 
-    r12 = dsqrt( (r1(1) - r2(1)) * (r1(1) - r2(1)) &
-               + (r1(2) - r2(2)) * (r1(2) - r2(2)) &
-               + (r1(3) - r2(3)) * (r1(3) - r2(3)) )
-    mu_tmp = mu_erf * r12
+    do jpoint = 1, n_points_extra_final_grid ! r2 
+  
+      r2(1) = final_grid_points_extra(1,jpoint)
+      r2(2) = final_grid_points_extra(2,jpoint)
+      r2(3) = final_grid_points_extra(3,jpoint)
+  
+      dx  = r1(1) - r2(1)
+      dy  = r1(2) - r2(2)
+      dz  = r1(3) - r2(3)
+      r12 = dsqrt(dx * dx + dy * dy + dz * dz)
 
-    res(jpoint) = 0.5d0 * r12 * (1.d0 - derf(mu_tmp)) - inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / mu_erf
-  enddo
+      mu_tmp = mu_erf * r12
+  
+      res(jpoint) = 0.5d0 * r12 * (1.d0 - derf(mu_tmp)) - inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / mu_erf
+    enddo
+
+  elseif(j2e_type .eq. "Boys") then
+
+   ! j(r12) = 0.5 r12 / (1 + a_boys r_12)
+
+    PROVIDE a_boys
+
+    do jpoint = 1, n_points_extra_final_grid ! r2 
+
+      r2(1) = final_grid_points_extra(1,jpoint)
+      r2(2) = final_grid_points_extra(2,jpoint)
+      r2(3) = final_grid_points_extra(3,jpoint)
+
+      dx  = r1(1) - r2(1)
+      dy  = r1(2) - r2(2)
+      dz  = r1(3) - r2(3)
+      r12 = dsqrt(dx * dx + dy * dy + dz * dz)
+
+      res(jpoint) = 0.5d0 * r12 / (1.d0 + a_boys * r12)
+    enddo
+
+  else
+
+    print *, ' Error in j12_r1_seq: Unknown j2e_type = ', j2e_type
+    stop
+
+  endif ! j2e_type
 
   return
 end
diff --git a/plugins/local/non_h_ints_mu/tc_integ_num.irp.f b/plugins/local/non_h_ints_mu/tc_integ_num.irp.f
index bc31ee91..6b6e755d 100644
--- a/plugins/local/non_h_ints_mu/tc_integ_num.irp.f
+++ b/plugins/local/non_h_ints_mu/tc_integ_num.irp.f
@@ -6,7 +6,7 @@
 
   BEGIN_DOC
   !
-  ! int2_grad1_u12_ao_num(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) 
+  ! int2_grad1_u12_ao_num(i,j,ipoint,:) = \int dr2 [\grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) 
   !
   ! int2_grad1_u12_square_ao_num = -(1/2) x int dr2 chi_l(r2) chi_j(r2) [grad_1 u(r1,r2)]^2
   !
@@ -73,10 +73,10 @@
     !$OMP DO 
     do i_blocks = 1, n_blocks
       ipoint = ii - 1 + i_blocks ! r1
-      call get_grad1_u12_withsq_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12(1,i_blocks,1) &
-                                                                                             , tmp_grad1_u12(1,i_blocks,2) &
-                                                                                             , tmp_grad1_u12(1,i_blocks,3) &
-                                                                                             , tmp_grad1_u12_squared(1,i_blocks))
+      call get_grad1_u12_withsq_r1_seq(ipoint, n_points_extra_final_grid, tmp_grad1_u12(1,i_blocks,1) &
+                                                                        , tmp_grad1_u12(1,i_blocks,2) &
+                                                                        , tmp_grad1_u12(1,i_blocks,3) &
+                                                                        , tmp_grad1_u12_squared(1,i_blocks))
     enddo
     !$OMP END DO
     !$OMP END PARALLEL
@@ -109,10 +109,10 @@
     !$OMP DO 
     do i_rest = 1, n_rest
       ipoint = ii - 1 + i_rest ! r1
-      call get_grad1_u12_withsq_r1_seq(final_grid_points(1,ipoint), n_points_extra_final_grid, tmp_grad1_u12(1,i_rest,1) &
-                                                                                             , tmp_grad1_u12(1,i_rest,2) &
-                                                                                             , tmp_grad1_u12(1,i_rest,3) &
-                                                                                             , tmp_grad1_u12_squared(1,i_rest))
+      call get_grad1_u12_withsq_r1_seq(ipoint, n_points_extra_final_grid, tmp_grad1_u12(1,i_rest,1) &
+                                                                        , tmp_grad1_u12(1,i_rest,2) &
+                                                                        , tmp_grad1_u12(1,i_rest,3) &
+                                                                        , tmp_grad1_u12_squared(1,i_rest))
     enddo
     !$OMP END DO
     !$OMP END PARALLEL
@@ -144,7 +144,7 @@ END_PROVIDER
 
   BEGIN_DOC
   !
-  ! int2_grad1_u12_ao_num_1shot(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) 
+  ! int2_grad1_u12_ao_num_1shot(i,j,ipoint,:) = \int dr2 [\grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) 
   !
   ! int2_grad1_u12_square_ao_num_1shot = -(1/2) x int dr2 chi_l(r2) chi_j(r2) [grad_1 u(r1,r2)]^2
   !
@@ -178,9 +178,7 @@ END_PROVIDER
   !$OMP END PARALLEL
 
   do m = 1, 3
-    !call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, -1.d0         &
-    ! this work also because of the symmetry in K(1,2) and sign compensation in L(1,2,3)
-    call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, +1.d0         &
+    call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid,  1.d0         &
               , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_num(1,1,m), n_points_extra_final_grid &
               , 0.d0, int2_grad1_u12_ao_num_1shot(1,1,1,m), ao_num*ao_num) 
   enddo