diff --git a/src/Davidson/diagonalize_CI.irp.f b/src/Davidson/diagonalize_CI.irp.f
index dcf8101d..b6e57cb9 100644
--- a/src/Davidson/diagonalize_CI.irp.f
+++ b/src/Davidson/diagonalize_CI.irp.f
@@ -8,7 +8,7 @@ BEGIN_PROVIDER [ double precision, CI_energy, (N_states_diag) ]
   integer                        :: j
   character*(8)                  :: st
   call write_time(output_determinants)
-  do j=1,min(N_det,N_states_diag)
+  do j=1,min(N_det,N_states)
     CI_energy(j) = CI_electronic_energy(j) + nuclear_repulsion
     write(st,'(I4)') j
     call write_double(output_determinants,CI_energy(j),'Energy of state '//trim(st))
@@ -39,7 +39,7 @@ END_PROVIDER
    integer, allocatable           :: iorder(:)
    
    ! Guess values for the "N_states_diag" states of the CI_eigenvectors
-   do j=1,min(N_states_diag,N_det)
+   do j=1,min(N_states,N_det)
      do i=1,N_det
        CI_eigenvectors(i,j) = psi_coef(i,j)
      enddo
@@ -148,12 +148,14 @@ END_PROVIDER
       allocate(s2_eigvalues(N_states_diag), e_array(N_states_diag))
       call diagonalize_s2_betweenstates(psi_det,CI_eigenvectors,n_det,size(psi_det,3),size(CI_eigenvectors,1),min(n_states_diag,n_det),s2_eigvalues)
       
-      do j = 1, N_states_diag
+      double precision, allocatable :: psi_coef_tmp(:,:)
+      allocate(psi_coef_tmp(psi_det_size,N_states_diag))
+      do j = 1, N_states
         do i = 1, N_det
-          psi_coef(i,j) = CI_eigenvectors(i,j)
+          psi_coef_tmp(i,j) = CI_eigenvectors(i,j)
         enddo
       enddo
-      call u_0_H_u_0(e_array,psi_coef,n_det,psi_det,N_int,N_states_diag,psi_det_size)
+      call u_0_H_u_0(e_array,psi_coef_tmp,n_det,psi_det,N_int,N_states_diag,psi_det_size)
      
       ! Browsing the "n_states_diag" states and getting the lowest in energy "n_states" ones that have the S^2 value
       ! closer to the "expected_s2" set as input
@@ -172,7 +174,7 @@ END_PROVIDER
       allocate(iorder(i_state))
       do j = 1, i_state
         do i = 1, N_det
-          CI_eigenvectors(i,j) = psi_coef(i,index_good_state_array(j))
+          CI_eigenvectors(i,j) = psi_coef_tmp(i,index_good_state_array(j))
         enddo
         CI_eigenvectors_s2(j) = s2_eigvalues(index_good_state_array(j))
         CI_electronic_energy(j) = e_array(j)
@@ -183,7 +185,7 @@ END_PROVIDER
         CI_electronic_energy(j) = e_array(j)
         CI_eigenvectors_s2(j) = s2_eigvalues(index_good_state_array(iorder(j)))
         do i = 1, N_det
-          CI_eigenvectors(i,j) = psi_coef(i,index_good_state_array(iorder(j)))
+          CI_eigenvectors(i,j) = psi_coef_tmp(i,index_good_state_array(iorder(j)))
         enddo
       enddo
       
@@ -193,12 +195,17 @@ END_PROVIDER
         if(good_state_array(j))cycle
         i_other_state +=1
         do i = 1, N_det
-          CI_eigenvectors(i,i_state + i_other_state) = psi_coef(i,j)
+          CI_eigenvectors(i,i_state + i_other_state) = psi_coef_tmp(i,j)
         enddo
         CI_eigenvectors_s2(i_state + i_other_state) = s2_eigvalues(j)
         CI_electronic_energy(i_state + i_other_state) = e_array(i_state + i_other_state)
       enddo
-      deallocate(iorder,e_array,index_good_state_array,good_state_array)
+      do j=1,N_states
+        do i=1,N_det
+          psi_coef(i,j) = psi_coef_tmp(i,j)
+        enddo
+      enddo
+      deallocate(iorder,e_array,index_good_state_array,good_state_array,psi_coef_tmp)
        
      deallocate(s2_eigvalues)
      
@@ -213,7 +220,7 @@ subroutine diagonalize_CI
 !  eigenstates of the CI matrix
   END_DOC
   integer :: i,j
-  do j=1,N_states_diag
+  do j=1,N_states
     do i=1,N_det
       psi_coef(i,j) = CI_eigenvectors(i,j)
     enddo
diff --git a/src/Davidson/parameters.irp.f b/src/Davidson/parameters.irp.f
index a78ff7f1..d9c82e3c 100644
--- a/src/Davidson/parameters.irp.f
+++ b/src/Davidson/parameters.irp.f
@@ -12,8 +12,7 @@ BEGIN_PROVIDER [ integer, davidson_sze_max ]
   ! Max number of Davidson sizes
   END_DOC
   ASSERT (davidson_sze_max <= davidson_iter_max)
-  davidson_sze_max = max(8,2*N_states_diag)
-  davidson_sze_max = 3
+  davidson_sze_max = 8*N_states
 END_PROVIDER
 
 
diff --git a/src/Determinants/determinants.irp.f b/src/Determinants/determinants.irp.f
index 0e62d524..1f2863eb 100644
--- a/src/Determinants/determinants.irp.f
+++ b/src/Determinants/determinants.irp.f
@@ -242,7 +242,7 @@ END_PROVIDER
 END_PROVIDER 
 
 
-BEGIN_PROVIDER [ double precision, psi_coef, (psi_det_size,N_states_diag) ]
+BEGIN_PROVIDER [ double precision, psi_coef, (psi_det_size,N_states) ]
   implicit none
   BEGIN_DOC
   ! The wave function coefficients. Initialized with Hartree-Fock if the EZFIO file
@@ -255,7 +255,7 @@ BEGIN_PROVIDER [ double precision, psi_coef, (psi_det_size,N_states_diag) ]
   character*(64)                 :: label
 
   psi_coef = 0.d0
-  do i=1,N_states_diag
+  do i=1,N_states
     psi_coef(i,i) = 1.d0
   enddo
 
diff --git a/src/Determinants/s2.irp.f b/src/Determinants/s2.irp.f
index 8db1ff17..78b2d1e7 100644
--- a/src/Determinants/s2.irp.f
+++ b/src/Determinants/s2.irp.f
@@ -300,123 +300,120 @@ subroutine get_uJ_s2_uI(psi_keys_tmp,psi_coefs_tmp,n,nmax_coefs,nmax_keys,s2,nst
 end
 
 subroutine diagonalize_s2_betweenstates(keys_tmp,u_0,n,nmax_keys,nmax_coefs,nstates,s2_eigvalues)
- BEGIN_DOC
-! You enter with nstates vectors in u_0 that may be coupled by S^2
-! The subroutine diagonalize the S^2 operator in the basis of these states. 
-! The vectors that you obtain in output are no more coupled by S^2, 
-! which does not necessary mean that they are eigenfunction of S^2. 
-! n,nmax,nstates = number of determinants, physical dimension of the arrays and number of states
-! keys_tmp = array of integer(bit_kind) that represents the determinants 
-! psi_coefs(i,j) = coeff of the ith determinant in the jth state
-! VECTORS ARE SUPPOSED TO BE ORTHONORMAL IN INPUT
- END_DOC
- implicit none
- use bitmasks
- integer, intent(in) :: n,nmax_keys,nmax_coefs,nstates
- integer(bit_kind), intent(in) :: keys_tmp(N_int,2,nmax_keys)
- double precision, intent(inout) :: u_0(nmax_coefs,nstates)
- double precision, intent(out)   :: s2_eigvalues(nstates)
-
-
- double precision,allocatable :: s2(:,:),overlap(:,:)
- double precision, allocatable :: eigvalues(:),eigvectors(:,:)
- integer :: i,j,k
- double precision, allocatable :: psi_coefs_tmp(:,:)
- double precision :: accu,coef_contract
- double precision :: u_dot_u,u_dot_v
-
- print*,''
- print*,'*********************************************************************'
- print*,'Cleaning the various vectors by diagonalization of the S^2 matrix ...'
- print*,''
- print*,'nstates = ',nstates
- allocate(s2(nstates,nstates),overlap(nstates,nstates))
- !$OMP PARALLEL DO COLLAPSE(2) DEFAULT(NONE) SCHEDULE(dynamic) &
-     !$OMP  PRIVATE(i,j) SHARED(overlap,u_0,nstates,n)
- do i = 1, nstates
-   do j = 1, nstates
-     if (i < j) then
-        cycle
-     else if (i == j) then
-       overlap(i,i) = u_dot_u(u_0(1,i),n)
-     else
-       overlap(i,j) = u_dot_v(u_0(1,j),u_0(1,i),n)
-       overlap(j,i) = overlap(i,j)
-     endif
-   enddo
- enddo
- !$OMP END PARALLEL DO
- call ortho_lowdin(overlap,size(overlap,1),nstates,u_0,size(u_0,1),n)
-
- double precision, allocatable :: v_0(:,:)
- allocate ( v_0(size(u_0,1),nstates) )
- call S2_u_0_nstates(v_0,u_0,n,keys_tmp,N_int,nstates,size(u_0,1))
-      
- do i=1, nstates
-  do j=1,i
-    s2(j,i) = u_dot_v(u_0(1,i), v_0(1,j),n)
-    s2(i,j) = s2(j,i) 
+  BEGIN_DOC
+  ! You enter with nstates vectors in u_0 that may be coupled by S^2
+  ! The subroutine diagonalize the S^2 operator in the basis of these states.
+  ! The vectors that you obtain in output are no more coupled by S^2,
+  ! which does not necessary mean that they are eigenfunction of S^2.
+  ! n,nmax,nstates = number of determinants, physical dimension of the arrays and number of states
+  ! keys_tmp = array of integer(bit_kind) that represents the determinants
+  ! psi_coefs(i,j) = coeff of the ith determinant in the jth state
+  ! VECTORS ARE SUPPOSED TO BE ORTHONORMAL IN INPUT
+  END_DOC
+  implicit none
+  use bitmasks
+  integer, intent(in)            :: n,nmax_keys,nmax_coefs,nstates
+  integer(bit_kind), intent(in)  :: keys_tmp(N_int,2,nmax_keys)
+  double precision, intent(inout) :: u_0(nmax_coefs,nstates)
+  double precision, intent(out)  :: s2_eigvalues(nstates)
+  
+  
+  double precision,allocatable   :: s2(:,:),overlap(:,:)
+  double precision, allocatable  :: eigvectors(:,:,:)
+  integer                        :: i,j,k
+  double precision, allocatable  :: psi_coefs_tmp(:,:)
+  double precision               :: accu,coef_contract
+  double precision               :: u_dot_u,u_dot_v
+  
+  print*,''
+  print*,'*********************************************************************'
+  print*,'Cleaning the various vectors by diagonalization of the S^2 matrix ...'
+  print*,''
+  print*,'nstates = ',nstates
+  allocate(s2(nstates,nstates),overlap(nstates,nstates))
+  overlap = 0.d0
+  call dgemm('T','N',nstates,nstates,n, 1.d0, u_0, size(u_0,1),      &
+      u_0, size(u_0,1), 0.d0, overlap, size(overlap,1))
+  call ortho_lowdin(overlap,size(overlap,1),nstates,u_0,size(u_0,1),n)
+  
+  double precision, allocatable  :: v_0(:,:)
+  allocate ( v_0(size(u_0,1),nstates) )
+  call S2_u_0_nstates(v_0,u_0,n,keys_tmp,N_int,nstates,size(u_0,1))
+  
+  call dgemm('T','N',nstates,nstates,n, 1.d0, u_0, size(u_0,1),      &
+      v_0, size(v_0,1), 0.d0, s2, size(s2,1))
+  
+  print*,'S^2 matrix in the basis of the states considered'
+  do i = 1, nstates
+    write(*,'(100(F5.2,X))')s2(i,:)
   enddo
- enddo
-
-! call get_uJ_s2_uI(keys_tmp,u_0,n_det,size(u_0,1),size(keys_tmp,3),s2,nstates)
- print*,'S^2 matrix in the basis of the states considered'
- do i = 1, nstates
-  write(*,'(10(F10.6,X))')s2(i,:)
- enddo
-
- double precision :: accu_precision_diag,accu_precision_of_diag
- accu_precision_diag = 0.d0
- accu_precision_of_diag = 0.d0
- do i = 1, nstates
-  ! Do not combine states of the same spin symmetry
-  do j = i+1, nstates
-   if( dabs(s2(i,i) - s2(j,j)) .le.0.5d0) then
-    s2(i,j) = 0.d0
-    s2(j,i) = 0.d0
-   endif
+  
+  double precision               :: accu_precision_diag,accu_precision_of_diag
+  accu_precision_diag = 0.d0
+  accu_precision_of_diag = 0.d0
+  do i = 1, nstates
+    ! Do not combine states of the same spin symmetry
+    do j = i+1, nstates
+      if( dabs(s2(i,i) - s2(j,j)) .le.0.5d0) then
+        s2(i,j) = 0.d0
+        s2(j,i) = 0.d0
+      endif
+    enddo
+    ! Do not rotate if the diagonal is correct
+    if( dabs(s2(i,i) - expected_s2).le.5.d-3) then
+      do j = 1, nstates
+        if (j==i) cycle
+        s2(i,j) = 0.d0
+        s2(j,i) = 0.d0
+      enddo
+    endif
   enddo
-  ! Do not rotate if the diagonal is correct
-  if( dabs(s2(i,i) - expected_s2).le.5.d-3) then
-   do j = 1, nstates
-    if (j==i) cycle
-    s2(i,j) = 0.d0
-    s2(j,i) = 0.d0
-   enddo
-  endif
- enddo
-
- print*,'Modified S^2 matrix that will be diagonalized'
- do i = 1, nstates
-  write(*,'(10(F10.6,X))')s2(i,:)
-  s2(i,i) = s2(i,i) 
- enddo
-
- allocate(eigvectors(nstates,nstates))
- call lapack_diagd(s2_eigvalues,eigvectors,s2,nstates,nstates)
- print*,'Eigenvalues'
- do i = 1, nstates
-  print*,'s2 = ',s2_eigvalues(i)
- enddo
-
- allocate(psi_coefs_tmp(nmax_coefs,nstates))
- psi_coefs_tmp = 0.d0
- do j = 1, nstates
-  do k = 1, nstates
-   coef_contract =  eigvectors(k,j)    !  <phi_k|Psi_j>
-   do i = 1, n_det
-    psi_coefs_tmp(i,j) += u_0(i,k) * coef_contract
-   enddo
+  
+  print*,'Modified S^2 matrix that will be diagonalized'
+  do i = 1, nstates
+    write(*,'(10(F5.2,X))')s2(i,:)
+    s2(i,i) = s2(i,i)
   enddo
- enddo
- do j = 1, nstates
-   accu = 1.d0/u_dot_u(psi_coefs_tmp(1,j),n_det)
-   do i = 1, n_det
-    u_0(i,j) = psi_coefs_tmp(i,j) * accu
-   enddo
- enddo
+  
+  allocate(eigvectors(nstates,nstates,2))
+!  call svd(s2, size(s2,1), eigvectors, size(eigvectors,1), s2_eigvalues,&
+!      eigvectors(1,1,2), size(eigvectors,1), nstates, nstates)
 
- deallocate(s2,v_0,eigvectors,psi_coefs_tmp,overlap)
+  call lapack_diagd(s2_eigvalues,eigvectors,s2,nstates,nstates)
+  print*,'Eigenvalues'
+  double precision :: t(nstates), iorder(nstates)
+  do i = 1, nstates
+    t(i) = dabs(s2_eigvalues(i))
+    iorder(i) = i
+  enddo
+  call dsort(t,iorder,nstates)
 
+  do i = 1, nstates
+    s2_eigvalues(i) = t(i)
+    do j=1,nstates
+      eigvectors(j,i,2) = eigvectors(j,iorder(i),1)
+    enddo
+    print*,'s2 = ',s2_eigvalues(i)
+  enddo
+  
+  allocate(psi_coefs_tmp(nmax_coefs,nstates))
+  psi_coefs_tmp = 0.d0
+  do j = 1, nstates
+    do k = 1, nstates
+      coef_contract =  eigvectors(k,j,2)    !  <phi_k|Psi_j>
+      do i = 1, n_det
+        psi_coefs_tmp(i,j) += u_0(i,k) * coef_contract
+      enddo
+    enddo
+  enddo
+  do j = 1, nstates
+    accu = 1.d0/u_dot_u(psi_coefs_tmp(1,j),n_det)
+    do i = 1, n_det
+      u_0(i,j) = psi_coefs_tmp(i,j) * accu
+    enddo
+  enddo
+  
+  deallocate(s2,v_0,eigvectors,psi_coefs_tmp,overlap )
+  
 end
 
diff --git a/src/Utils/util.irp.f b/src/Utils/util.irp.f
index 91a61a43..751610a5 100644
--- a/src/Utils/util.irp.f
+++ b/src/Utils/util.irp.f
@@ -303,23 +303,10 @@ double precision function u_dot_v(u,v,sze)
   END_DOC
   integer, intent(in)            :: sze
   double precision, intent(in)   :: u(sze),v(sze)
+  double precision, external     :: ddot
   
-  integer                        :: i,t1, t2, t3, t4
-  
-  ASSERT (sze > 0)
-  t1 = 0
-  t2 = sze/4
-  t3 = t2+t2
-  t4 = t3+t2
-  u_dot_v = 0.d0
-  !DIR$ VECTOR ALWAYS
-  do i=1,t2
-    u_dot_v = u_dot_v + u(t1+i)*v(t1+i) + u(t2+i)*v(t2+i) +          &
-        u(t3+i)*v(t3+i) + u(t4+i)*v(t4+i)
-  enddo
-  do i=t4+t2+1,sze
-    u_dot_v = u_dot_v + u(i)*v(i)
-  enddo
+  !DIR$ FORCEINLINE
+  u_dot_v = ddot(sze,u,1,v,1)
   
 end
 
@@ -330,28 +317,10 @@ double precision function u_dot_u(u,sze)
   END_DOC
   integer, intent(in)            :: sze
   double precision, intent(in)   :: u(sze)
+  double precision, external     :: ddot
   
-  integer                        :: i
-  integer                        :: t1, t2, t3, t4
-  
-  ASSERT (sze > 0)
-  t1 = 0
-  t2 = sze/4
-  t3 = t2+t2
-  t4 = t3+t2
-  u_dot_u = 0.d0
-! do i=1,t2
-!   u_dot_u = u_dot_u + u(t1+i)*u(t1+i) + u(t2+i)*u(t2+i) +          &
-!       u(t3+i)*u(t3+i) + u(t4+i)*u(t4+i)
-! enddo
-! do i=t4+t2+1,sze
-!   u_dot_u = u_dot_u+u(i)*u(i)
-! enddo
-  
-  !DIR$ VECTOR ALWAYS
-  do i=1,sze
-    u_dot_u = u_dot_u + u(i)*u(i)
-  enddo
+  !DIR$ FORCEINLINE
+  u_dot_u = ddot(sze,u,1,u,1)
   
 end
 
@@ -364,18 +333,17 @@ subroutine normalize(u,sze)
   integer, intent(in)            :: sze
   double precision, intent(inout):: u(sze)
   double precision               :: d
-  double precision, external     :: u_dot_u
+  double precision, external     :: dnrm2
   integer                        :: i
   
   !DIR$ FORCEINLINE
-  d = u_dot_u(u,sze)
+  d = dnrm2(sze,u,1)
   if (d /= 0.d0) then
-    d = 1.d0/dsqrt( d )
+    d = 1.d0/d
   endif
   if (d /= 1.d0) then
-    do i=1,sze
-      u(i) = d*u(i)
-    enddo
+    !DIR$ FORCEINLINE
+    call dscal(sze,d,u,1)
   endif
 end